# 第1章のプログラムは、事前に下記が実行されていることを仮定する。
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import style
style.use("seaborn-ticks")
n = 3
B = np.random.normal(size=n**2).reshape(3, 3)
A = np.dot(B.T, B)
values, vectors = np.linalg.eig(A)
print("values:\n", values, "\n\nvectors:\n", vectors, "\n")
S = []
for i in range(10):
z = np.random.normal(size = n)
y = np.squeeze(z.T.dot(A.dot(z)))
S.append(y)
if (i+1) % 5 == 0:
print("S[%d:%d]:"%((i-4),i), S[i-4:i])
n = 250
x = 2 * np.random.normal(size = n)
y = np.sin(2 * np.pi * x) + np.random.normal(size = n) / 4
def D(t):
return np.maximum(0.75 * (1 - t**2), 0)
def k(x, y, lam):
return D(np.abs((x - y) / lam))
def f(z, lam):
S = 0; T = 0
for i in range(n):
S = S + k(x[i], z, lam) * y[i]
T = T + k(x[i], z, lam)
return S / T
plt.figure(num=1, figsize=(15, 8),dpi=80)
plt.xlim(-3, 3); plt.ylim(-2, 3)
plt.xticks(fontsize = 14); plt.yticks(fontsize = 14)
plt.scatter(x, y, facecolors='none', edgecolors = "k", marker = "o")
xx = np.arange(-3, 3, 0.1)
yy = [[] for _ in range(3)]
lam = [0.05, 0.35, 0.50]
color = ["g", "b", "r"]
for i in range(3):
for zz in xx:
yy[i].append(f(zz, lam[i]))
plt.plot(xx, yy[i], c = color[i], label = lam[i])
plt.legend(loc = "upper left", frameon = True, prop={'size':14})
plt.title("Nadaraya-Watson Estimator", fontsize = 20)
def K(x, y, sigma2):
return np.exp(-np.linalg.norm(x - y)**2/2/sigma2)
def F(z, sigma2): # 関数定義 f
S=0; T=0
for i in range(n):
S = S + K(x[i], z, sigma2) * y[i]
T = T + K(x[i], z, sigma2)
return S / T
n = 100
x = 2 * np.random.normal(size = n)
y = np.sin(2 * np.pi * x) + np.random.normal(size = n) / 4 # データ生成
# sigma2 = 0.01, 0.001 の曲線の図示
plt.figure(num=1, figsize=(15, 8),dpi=80)
plt.scatter(x, y, facecolors='none', edgecolors = "k", marker = "o")
plt.xlim(-3, 3)
plt.ylim(-2, 3)
plt.xticks(fontsize = 14); plt.yticks(fontsize = 14)
xx = np.arange(-3, 3, 0.1)
yy = [[] for _ in range(2)]
sigma2 = [0.001, 0.01]
color = ["g", "b"]
for i in range(2):
for zz in xx:
yy[i].append(F(zz, sigma2[i]))
plt.plot(xx, yy[i], c = color[i], label = sigma2[i])
plt.legend(loc = "upper left", frameon = True, prop={'size':20})
plt.title("Nadaraya-Watson Estimator", fontsize = 20)
m = int(n / 10)
sigma2_seq = np.arange(0.001, 0.01, 0.001)
SS_min = np.inf
for sigma2 in sigma2_seq:
SS = 0
for k in range(10):
test = range(k*m, (k+1)*m)
train = [x for x in range(n) if x not in test]
for j in test:
u, v = 0, 0
for i in train:
kk = K(x[i], x[j], sigma2)
u = u + kk * y[i]
v = v + kk
if v != 0:
z = u/v
SS = SS + (y[j] - z)**2
if SS < SS_min:
SS_min = SS
sigma2_best = sigma2
print("Best sigma2 = ", sigma2_best)
SS
plt.figure(num = 1, figsize=(15, 8),dpi = 80)
plt.scatter(x, y, facecolors = 'none', edgecolors = "k", marker = "o")
plt.xlim(-3, 3)
plt.ylim(-2, 3)
plt.xticks(fontsize = 14); plt.yticks(fontsize = 14)
xx = np.arange(-3, 3, 0.1)
yy = [[] for _ in range(3)]
sigma2 = [0.001, 0.01, sigma2_best]
labels = [0.001, 0.01, "sigma2_best"]
color = ["g", "b", "r"]
for i in range(3):
for zz in xx:
yy[i].append(F(zz, sigma2[i]))
plt.plot(xx, yy[i], c = color[i], label = labels[i])
plt.legend(loc = "upper left", frameon = True, prop={'size': 20})
plt.title("Nadaraya-Watson Estimator", fontsize = 20)
def string_kernel(x, y):
m, n = len(x), len(y)
S = 0
for i in range(m):
for j in range(i, m):
for k in range(n):
if x[(i-1):j] == y[(k-1):(k+j-i)]:
S = S + 1
return S
C = ["a", "b", "c"]
m = 10
w = np.random.choice(C, m, replace = True)
x = ""
for i in range(m):
x = x + w[i]
n = 12
w = np.random.choice(C, n, replace = True)
y = ""
for i in range(n):
y = y + w[i]
x
y
string_kernel(x,y)
def C(i, j):
S, T = s[i], t[j]
if S[0] != T[0]:
return 0
if S[1] is None:
return 0
if T[1] is None:
return 0
if len(S[1]) != len(T[1]):
return 0
U = []
for x in S[1]:
U.append(s[x][0])
U1 = sorted(U)
V = []
for y in T[1]:
V.append(t[y][0])
V1 = sorted(V)
m = len(U)
for h in range(m):
if U1[h] != V1[h]:
return 0
U2 = np.array(S[1])[np.argsort(U)]
V2 = np.array(T[1])[np.argsort(V)]
W = 1
for h in range(m):
W = W * (1 + C(U2[h], V2[h]))
return W
def k(s, t):
m, n = len(s), len(t)
kernel = 0
for i in range(m):
for j in range(n):
if C(i, j) > 0:
kernel = kernel + C(i, j)
return kernel
s = [[] for _ in range(6)]
s[0] = ["G", [1, 3]]; s[1] = ["T", [2]]; s[2] = ["C", None]
s[3] = ["A", [4, 5]]; s[4] = ["C", None]; s[5] = ["T", None]
t = [[] for _ in range(9)]
t[0] = ["G", [1, 4]]; t[1] = ["A", [2, 3]]; t[2] = ["C", None]
t[3] = ["T", None]; t[4] = ["T", [5, 6]]; t[5] = ["C", None]
t[6] = ["A", [7, 8]]; t[7] = ["C", None]; t[8] = ["T", None]
for i in range(6):
for j in range(9):
if C(i, j) > 0:
print(i, j, C(i, j))
k(s, t)
def k(s, p):
return prob(s, p)/ len(node)
def prob(s, p):
if len(node[s[0]]) == 0:
return 0
if len(s) == 1:
return p
m = len(s)
S = (1 - p) / len(node[s[0]]) * prob(s[1:m], p)
return S
node = [[] for _ in range(5)]
node[0] = [2, 4]; node[1] = [4]; node[2] = [1, 5]
node[3] = [1, 5]; node[4] = [3]
k([0, 3, 2, 4, 2], 1 / 3)
def k(x, y, lam):
return D(np.abs((x - y) / lam))
n = 250
x = 2 * np.random.normal(size = n)
y = np.sin(2 * np.pi * x) + np.random.normal(size = n) / 4
plt.figure(num=1, figsize=(15, 8),dpi=80)
plt.xlim(-3, 3); plt.ylim(-2, 3)
plt.xticks(fontsize = 14); plt.yticks(fontsize = 14)
plt.scatter(x, y, facecolors='none', edgecolors = "k", marker = "o")
xx = np.arange(-3, 3, 0.1)
yy = [[] for _ in range(3)]
lam = [0.05, 0.35, 0.50]
color = ["g", "b", "r"]
for i in range(3):
for zz in xx:
yy[i].append(f(zz, lam[i]))
plt.plot(xx, yy[i], c = color[i], label = lam[i])
plt.legend(loc = "upper left", frameon = True, prop={'size':14})
plt.title("Nadaraya-Watson Estimator", fontsize = 20)
def K(x, y, sigma2):
return np.exp(-np.linalg.norm(x - y)**2/2/sigma2)
n = 100
x = 2 * np.random.normal(size = n)
y = np.sin(2 * np.pi * x) + np.random.normal(size = n) / 4
m = int(n / 10)
sigma2_seq = np.arange(0.001, 0.01, 0.001)
SS_min = np.inf
for sigma2 in sigma2_seq:
SS = 0
for k in range(10):
test = range(k*m,(k+1)*m)
train = [x for x in range(n) if x not in test]
for j in test:
u, v = 0, 0
for i in train:
kk = K(x[i], x[j], sigma2)
u = u + kk * y[i]
v = v + kk
if not(v==0):
z=u/v
SS = SS + (y[j] - z)**2
if SS < SS_min:
SS_min = SS
sigma2_best = sigma2
print("Best sigma2 = ", sigma2_best)
def string_kernel(x, y):
m, n = len(x), len(y)
S = 0
for i in range(m):
for j in range(i, m):
for k in range(n):
if x[(i-1):j] == y[(k-1):(k+j-i)]:
S = S + 1
return S
def k(s, p):
return prob(s, p)/ len(node)
def prob(s, p):
if len(node[s[0]]) == 0:
return 0
if len(s) == 1:
return p
m = len(s)
S = (1 - p) / len(node[s[0]]) * prob(s[1:m], p)
return S