# The programs in Chapter 1 assume that the following are executed.
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import style
style.use("seaborn-ticks")
n = 3
B = np.random.normal(size=n**2).reshape(3, 3)
A = np.dot(B.T, B)
values, vectors = np.linalg.eig(A)
print("values:\n", values, "\n\nvectors:\n", vectors, "\n")
S = []
for i in range(10):
z = np.random.normal(size=n)
y = np.squeeze(z.T.dot(A.dot(z)))
S.append(y)
if (i+1) % 5 == 0:
print("S[%d:%d]:" % ((i-4), i), S[i-4:i])
n = 250
x = 2 * np.random.normal(size=n)
y = np.sin(2 * np.pi * x) + np.random.normal(size=n) / 4 # Data Generation
def D(t): # definition of function D
return np.maximum(0.75 * (1 - t**2), 0)
def k(x, y, lam): # definition of function K
return D(np.abs((x - y) / lam))
def f(z, lam): # definition of function f
S = 0
T = 0
for i in range(n):
S = S + k(x[i], z, lam) * y[i]
T = T + k(x[i], z, lam)
return S / T
plt.figure(num=1, figsize=(15, 8), dpi=80)
plt.xlim(-3, 3)
plt.ylim(-2, 3)
plt.xticks(fontsize=14)
plt.yticks(fontsize=14)
plt.scatter(x, y, facecolors="none", edgecolors="k", marker="o")
xx = np.arange(-3, 3, 0.1)
yy = [[] for _ in range(3)]
lam = [0.05, 0.35, 0.50]
color = ["g", "b", "r"]
for i in range(3):
for zz in xx:
yy[i].append(f(zz, lam[i]))
plt.plot(xx, yy[i], c=color[i], label=lam[i])
plt.legend(loc="upper left", frameon=True, prop={"size": 14})
plt.title("Nadaraya-Watson Estimator", fontsize=20)
def K(x, y, sigma2):
return np.exp(-np.linalg.norm(x - y)**2 / 2 / sigma2)
def F(z, sigma2): # Function definition f
S = 0
T = 0
for i in range(n):
S = S + K(x[i], z, sigma2) * y[i]
T = T + K(x[i], z, sigma2)
return S / T
n = 100
x = 2 * np.random.normal(size=n)
y = np.sin(2 * np.pi * x) + np.random.normal(size=n) / 4 # Data Generation
# Illustration of the curve with sigma2 = 0.01, 0.001
plt.figure(num=1, figsize=(15, 8), dpi=80)
plt.scatter(x, y, facecolors="none", edgecolors="k", marker="o")
plt.xlim(-3, 3)
plt.ylim(-2, 3)
plt.xticks(fontsize=14)
plt.yticks(fontsize=14)
xx = np.arange(-3, 3, 0.1)
yy = [[] for _ in range(2)]
sigma2 = [0.001, 0.01]
color = ["g", "b"]
for i in range(2):
for zz in xx:
yy[i].append(F(zz, sigma2[i]))
plt.plot(xx, yy[i], c=color[i], label=sigma2[i])
plt.legend(loc="upper left", frameon=True, prop={"size": 20})
plt.title("Nadaraya-Watson Estimator", fontsize=20)
# Calculate the optimal lambda value
m = int(n / 10)
sigma2_seq = np.arange(0.001, 0.01, 0.001)
SS_min = np.inf
for sigma2 in sigma2_seq:
SS = 0
for k in range(10):
test = range(k*m, (k+1)*m)
train = [x for x in range(n) if x not in test]
for j in test:
u, v = 0, 0
for i in train:
kk = K(x[i], x[j], sigma2)
u = u + kk * y[i]
v = v + kk
if v != 0:
z = u / v
SS = SS + (y[j] - z)**2
if SS < SS_min:
SS_min = SS
sigma2_best = sigma2
print("Best sigma2 =", sigma2_best)
plt.figure(num=1, figsize=(15, 8), dpi=80)
plt.scatter(x, y, facecolors="none", edgecolors="k", marker="o")
plt.xlim(-3, 3)
plt.ylim(-2, 3)
plt.xticks(fontsize=14)
plt.yticks(fontsize=14)
xx = np.arange(-3, 3, 0.1)
yy = [[] for _ in range(3)]
sigma2 = [0.001, 0.01, sigma2_best]
labels = [0.001, 0.01, "sigma2_best"]
color = ["g", "b", "r"]
for i in range(3):
for zz in xx:
yy[i].append(F(zz, sigma2[i]))
plt.plot(xx, yy[i], c=color[i], label=labels[i])
plt.legend(loc="upper left", frameon=True, prop={"size": 20})
plt.title("Nadaraya-Watson Estimator", fontsize=20)
def string_kernel(x, y):
m, n = len(x), len(y)
S = 0
for i in range(m):
for j in range(i, m):
for k in range(n):
if x[(i-1):j] == y[(k-1):(k+j-i)]:
S = S + 1
return S
C = ["a", "b", "c"]
m = 10
w = np.random.choice(C, m, replace=True)
x = ""
for i in range(m):
x = x + w[i]
n = 12
w = np.random.choice(C, n, replace=True)
y = ""
for i in range(n):
y = y + w[i]
x
y
string_kernel(x, y)
def C(i, j):
S, T = s[i], t[j]
# returns 0 if the labels of vertex i in tree s or vertex j in tree t do not match.
if S[0] != T[0]:
return 0
# Return 0 if vertex i of tree s or vertex j of tree t has no descendants.
if S[1] is None:
return 0
if T[1] is None:
return 0
if len(S[1]) != len(T[1]):
return 0
U = []
for x in S[1]:
U.append(s[x][0])
U1 = sorted(U)
V = []
for y in T[1]:
V.append(t[y][0])
V1 = sorted(V)
m = len(U)
# Return 0 if the labels of the descendants do not match.
for h in range(m):
if U1[h] != V1[h]:
return 0
U2 = np.array(S[1])[np.argsort(U)]
V2 = np.array(T[1])[np.argsort(V)]
W = 1
for h in range(m):
W = W * (1 + C(U2[h], V2[h]))
return W
def k(s, t):
m, n = len(s), len(t)
kernel = 0
for i in range(m):
for j in range(n):
if C(i, j) > 0:
kernel = kernel + C(i, j)
return kernel
s = [[] for _ in range(6)]
s[0] = ["G", [1, 3]]
s[1] = ["T", [2]]
s[2] = ["C", None]
s[3] = ["A", [4, 5]]
s[4] = ["C", None]
s[5] = ["T", None]
t = [[] for _ in range(9)]
t[0] = ["G", [1, 4]]
t[1] = ["A", [2, 3]]
t[2] = ["C", None]
t[3] = ["T", None]
t[4] = ["T", [5, 6]]
t[5] = ["C", None]
t[6] = ["A", [7, 8]]
t[7] = ["C", None]
t[8] = ["T", None]
for i in range(6):
for j in range(9):
if C(i, j) > 0:
print(i, j, C(i, j))
k(s, t)
def k(s, p):
return prob(s, p) / len(node)
def prob(s, p):
if len(node[s[0]]) == 0:
return 0
if len(s) == 1:
return p
m = len(s)
S = (1 - p) / len(node[s[0]]) * prob(s[1:m], p)
return S
node = [[] for _ in range(5)]
node[0] = [2, 4]
node[1] = [4]
node[2] = [1, 5]
node[3] = [1, 5]
node[4] = [3]
k([0, 3, 2, 4, 2], 1 / 3)