4.1 カーネルRidge回帰
alpha = function(k, x, y) {
n = length(x); K = matrix(0, n, n)
for (i in 1:n) for (j in 1:n) K[i, j] = k(x[i], x[j])
return(solve(K + 10^(-5)*diag(n)) %*% y) # K が正則でないかも 10^(-5)I で可逆に
}
例63
k.p = function(x, y) (sum(x*y)+1)^3 # カーネル定義
k.g = function(x, y) exp(-(x-y)^2/2) # カーネル定義
lambda = 0.1
n = 50; x = rnorm(n); y = 1 + x + x^2 + rnorm(n) # データ生成
alpha.p = alpha(k.p, x, y); alpha.g = alpha(k.g, x, y)
z = sort(x); u = array(n); v = array(n)
for (j in 1:n) {
S = 0; for (i in 1:n) S = S + alpha.p[i] * k.p(x[i], z[j]); u[j] = S
S = 0; for (i in 1:n) S = S + alpha.g[i] * k.g(x[i], z[j]); v[j] = S
}
plot(z, u, type = "l", xlim = c(-1, 1), xlab = "x", ylab = "y", ylim = c(-1, 5),
col = "red", main = "カーネル回帰")
lines(z, v, col = "blue"); points(x, y)
legend("topleft", legend = c("多項式カーネル", "Gauss カーネル"),
col = c("red", "blue"), lty = 1)
alpha = function(k, x, y) {
n = length(x); K = matrix(0, n, n)
for (i in 1:n) for (j in 1:n) K[i, j] = k(x[i], x[j])
return(solve(K + lambda * diag(n)) %*% y)
}
例64
k.p = function(x, y) (sum(x*y)+1)^3 # カーネル定義
k.g = function(x, y) exp(-(x-y)^2/2) # カーネル定義
lambda = 0.1
n = 50; x = rnorm(n); y = 1 + x + x^2 + rnorm(n) # データ生成
alpha.p = alpha(k.p, x, y); alpha.g = alpha(k.g, x, y)
z = sort(x); u = array(n); v = array(n)
for (j in 1:n) {
S = 0; for (i in 1:n) S = S + alpha.p[i] * k.p(x[i], z[j]); u[j] = S
S = 0; for (i in 1:n) S = S + alpha.g[i] * k.g(x[i], z[j]); v[j] = S
}
plot(z, u, type = "l", xlim = c(-1, 1), xlab = "x", ylab = "y",
ylim = c(-1, 5), col = "red", main = "Kernel Ridge")
lines(z, v, col = "blue"); points(x, y)
4.2 カーネル主成分分析
kernel.pca.train = function(x, k) {
n = nrow(x); K = matrix(0, n, n); S = rep(0, n); T = rep(0, n)
for (i in 1:n) for(j in 1:n) K[i, j] = k(x[i, ], x[j, ])
for (i in 1:n) S[i] = sum(K[i, ])
for (j in 1:n) T[j] = sum(K[, j])
U = sum(K)
for (i in 1:n) for (j in 1:n) K[i, j] = K[i, j] - S[i]/n - T[j]/n + U/n^2
res = eigen(K)
alpha = matrix(0, n, n)
for (i in 1:n) alpha[, i] = res$vector[, i] / res$value[i] ^ 0.5
return(alpha)
}
kernel.pca.test = function(x, k, alpha, m, z) {
n = nrow(x)
pca = array(0, dim = m)
for (i in 1:n) pca = pca + alpha[i, 1:m] * k(x[i, ], z)
return(pca)
}
例65
# k = function(x, y) sum(x*y)
sigma.2 = 0.01; k = function(x, y) exp(-norm(x-y, "2")^2 / 2 / sigma.2)
x = as.matrix(USArrests); n = nrow(x); p = ncol(x)
alpha = kernel.pca.train(x, k)
z = array(dim = c(n, 2)); for (i in 1:n) z[i, ] = kernel.pca.test(x, k, alpha, 2, x[i, ])
min.1 = min(z[, 1]); min.2 = min(z[, 2]); max.1 = max(z[, 1]); max.2 = max(z[, 2])
plot(0, xlim = c(min.1, max.1), ylim = c(min.2, max.2), xlab = "First", ylab = "Second",
cex.lab = 0.75, cex.axis = 0.75, main = "Kernel PCA (Gauss 0.01)")
for (i in 1:n) if (i != 5) text(z[i, 1], z[i, 2], labels = i, cex = 0.5)
text(z[5, 1], z[5, 2], 5, col = "red")
# 通常の PCA の場合、スコアは下記 1 行で求めることができる。
z = prcomp(x)$x[, 1:2]
4.3 カーネルSVM
例66
library(quadprog)
## Warning: package 'quadprog' was built under R version 4.0.3
K.linear = function(x, y) t(x) %*% y
K.poly = function(x, y) (1 + t(x) %*% y)^2
svm.2 = function(X, y, C, K) { # 関数名を svm.2 とした
eps = 0.0001; n = nrow(X)
Dmat = matrix(nrow = n, ncol = n); Kmat = matrix(nrow = n, ncol = n)
for (i in 1:n) for (j in 1:n) {
Dmat[i, j] = K(X[i, ], X[j, ]) * y[i] * y[j]; Kmat = K(X[i, ], X[j, ])
}
Dmat = Dmat + eps * diag(n); dvec = rep(1, n)
Amat = matrix(nrow = (2*n + 1), ncol = n)
Amat[1, ] = y; Amat[2:(n + 1), 1:n] = -diag(n)
Amat[(n + 2):(2*n + 1), 1:n] = diag(n); Amat = t(Amat)
bvec = c(0, -C*rep(1, n), rep(0, n)); meq = 1
alpha = solve.QP(Dmat, dvec, Amat, bvec = bvec, meq = 1)$solution
index = (1:n)[0 < alpha & alpha < C]
beta = drop(Kmat %*% (alpha * y)); beta.0 = mean(y[index] - beta[index])
return(list(alpha = alpha, beta.0 = beta.0))
}
# 関数の定義
plot.kernel = function(K, lty) { # 引数で、線の種類を指定する lty
qq = svm.2(X, y, 0.1, K); alpha = qq$alpha; beta.0 = qq$beta.0
f = function(u, v) {
x = c(u, v); S = beta.0; for (i in 1:n) S = S + alpha[i] * y[i] * K(X[i, ], x)
return(S)
}
# z は、f(x, y) での方向の高さを求める。これから、輪郭を求めることができる。
u = seq(-2, 2, .1); v = seq(-2, 2, .1); w = array(dim = c(41, 41))
for (i in 1:41) for (j in 1:41) w[i, j] = f(u[i], v[j])
contour(u, v, w, level = 0, add = TRUE, lty = lty)
}
# 実行
a = rnorm(1); b = rnorm(1)
n = 100; X = matrix(rnorm(n * 2), ncol = 2, nrow = n)
y = sign(a*X[, 1] + b*X[, 2] + 0.3*rnorm(n))
plot(-3:3, -3:3, xlab = "X[, 1]", ylab = "X[, 2]", type = "n")
for (i in 1:n) {
if (y[i] == 1) {
points(X[i, 1], X[i, 2], col = "red")
} else {
points(X[i, 1], X[i, 2], col = "blue")
}
}
4.4 スプライン
# d, h で基底を求める関数を構成している
d = function(j, x, knots) {
K = length(knots)
(max((x - knots[j])^3, 0) - max((x - knots[K])^3, 0)) / (knots[K] - knots[j])
}
h = function(j, x, knots) {
K = length(knots)
if (j == 1) return(1) else if (j == 2) return(x)
else return(d(j-2, x, knots) - d(K-1, x, knots))
}
# G は、2 回微分した関数を積分した値になっている。
G = function(x) { # x の各値が昇順になっていることを仮定している
n = length(x); g = matrix(0, nrow = n, ncol = n)
for (i in 3:(n-1)) for (j in i:n) {
g[i, j] = 12 * (x[n] - x[n-1]) * (x[n-1] - x[j-2]) * (x[n-1] - x[i-2]) / (x[n] - x[i-2]) /
(x[n] - x[j-2]) + (12 * x[n-1] + 6 * x[j-2] - 18 * x[i-2]) * (x[n-1] - x[j-2])^2 /
(x[n] - x[i-2]) / (x[n] - x[j-2])
g[j, i] = g[i, j]
}
return(g)
}
# メインの処理
n = 100; x = runif(n, -5, 5); y = x + sin(x)*2 + rnorm(n) # データ生成
index = order(x); x = x[index]; y = y[index]
X = matrix(nrow = n, ncol = n); X[, 1] = 1
for (j in 2:n) for (i in 1:n) X[i, j] = h(j, x[i], x) # 行列 X の生成
GG = G(x); # 行列 G の生成
lambda.set = c(1, 30, 80); col.set = c("red", "blue", "green")
for (i in 1:3) {
lambda = lambda.set[i]
gamma = solve(t(X) %*% X + lambda * GG) %*% t(X) %*% y
g = function(u) {S = gamma[1]; for (j in 2:n) S = S + gamma[j] * h(j, u, x); S}
u.seq = seq(-8, 8, 0.02); v.seq = NULL; for (u in u.seq) v.seq = c(v.seq, g(u))
plot(u.seq, v.seq, type = "l", yaxt = "n", xlab = "x", ylab = "g(x)",
ylim = c(-8, 8), col = col.set[i])
par(new = TRUE)
}
points(x, y); legend("topleft", paste0("lambda = ", lambda.set), col = col.set, lty = 1)
title("平滑化スプライン (n = 100)")
4.5 Random Fourier Features
例68
sigma = 10; sigma2 = sigma^2
k = function(x, y) exp(-(x-y)^2 / 2 / sigma2)
z = function(x) sqrt(2/m) * cos(w*x + b)
zz = function(x, y) sum(z(x) * z(y))
u = matrix(0, 1000, 3)
m_seq = c(20, 100, 400)
for (i in 1:1000) {
x = rnorm(1); y = rnorm(1)
for (j in 1:3) {
m = m_seq[j]; w = rnorm(m) / sigma; b = runif(m) * 2 * pi
u[i, j] = zz(x, y) - k(x, y)
}
}
boxplot(u[, 1], u[, 2], u[, 3], ylab = "k(x, y)との差異", names = paste0("m = ", m_seq),
col = c("red", "blue", "green"), main = "RFF によるカーネルの近似")
例69
sigma = 10; sigma2 = sigma^2
# 関数 z
m = 20; w = rnorm(m) / sigma; b = runif(m) * 2 * pi
z = function(u, m) sqrt(2/m) * cos(w*u + b)
# ガウスカーネル
k.g = function(x, y) exp(-(x-y)^2 / 2 / sigma2)
# データ生成
n = 200; x = rnorm(n) / 2; y = 1 + 5*sin(x/10) + 5*x^2 + rnorm(n)
x.min = min(x); x.max = max(x); y.min = min(y); y.max = max(y)
lambda = 0.001 # lambda = 0.9 も
# 低ランク近似の関数
alpha.rff = function(x, y, m) {
n = length(x)
Z = array(dim = c(n, m))
for (i in 1:n) Z[i, ] = z(x[i], m)
beta = solve(t(Z) %*% Z + lambda * diag(m)) %*% t(Z) %*% y
return(as.vector(beta))
}
# 通常の関数
alpha = function(k, x, y) {
n = length(x); K = matrix(0, n, n)
for (i in 1:n) for (j in 1:n) K[i, j] = k(x[i], x[j])
alpha = solve(K + lambda * diag(n)) %*% y
return(as.vector(alpha))
}
# 数値で比較する
alpha.hat = alpha(k.g, x, y)
beta.hat = alpha.rff(x, y, m)
r = sort(x); u = array(n); v = array(n)
for (j in 1:n) {
S = 0; for (i in 1:n) S = S + alpha.hat[i] * k.g(x[i], r[j]); u[j] = S
v[j] = sum(beta.hat * z(r[j], m))
}
plot(r, u, type = "l", xlim = c(x.min, x.max), ylim = c(y.min, y.max),
xlab = "x", ylab = "y", col = "red", main = "lambda = 10^{-4}, m = 20, n = 200")
lines(r, v, col = "blue"); points(x, y)
legend("topleft", lwd = 1, c("近似なし", "近似あり"), col = c("red", "blue"))
4.7 不完全Cholesky分解
im.ch = function(A, m = ncol(A)) {
n = ncol(A); R = matrix(0, n, n); P = diag(n)
for (i in 1:n) R[i, i] = sqrt(A[i, i])
max.R = 0; for (i in 1:n) if (R[i, i] > max.R) {k = i; max.R = R[i, i]}
R[1, 1] = max.R
if (k != 1) {
w = A[, k]; A[, k] = A[, 1]; A[, 1] = w
w = A[k, ]; A[k, ] = A[1, ]; A[1, ] = w
P[1, 1] = 0; P[k, k] = 0; P[1, k] = 1; P[k, 1] = 1
}
for (i in 2:n) R[i, 1] = A[i, 1] / R[1, 1]
if (m > 1) for (i in 2:m) {
for (j in i:n) R[j, j] = sqrt(A[j, j] - sum(R[j, 1:(i-1)]^2))
max.R = 0; for (j in i:n) if (R[j, j] > max.R) {k = j; max.R = R[j, j]}
R[i, i] = max.R
if (k != i) {
w = R[i, 1:(i-1)]; R[i, 1:(i-1)] = R[k, 1:(i-1)]; R[k, 1:(i-1)] = w
w = A[, k]; A[, k] = A[, i]; A[, i] = w
w = A[k, ]; A[k, ] = A[i, ]; A[i, ] = w
Q = diag(n); Q[i, i] = 0; Q[k, k] = 0; Q[i, k] = 1; Q[k, i] = 1; P = P %*% Q
}
if (i < n) for (j in (i+1):n)
R[j, i] = (A[j, i] - sum(R[i, 1:(i-1)] * R[j, 1:(i-1)])) / R[i, i]
}
if (m < n) for (i in (m+1):n) R[, i] = 0
return(list(P = P, R = R))
}
# データ生成
n = 4; range = -5:5
D = matrix(sample(range, n*n, replace = TRUE), n, n)
A = t(D) %*% D
# 実行例
im.ch(A)
## $P
## [,1] [,2] [,3] [,4]
## [1,] 0 0 0 1
## [2,] 1 0 0 0
## [3,] 0 1 0 0
## [4,] 0 0 1 0
##
## $R
## [,1] [,2] [,3] [,4]
## [1,] 6.0000000 0.000000 0.000000 0.000000
## [2,] -1.5000000 5.722762 0.000000 0.000000
## [3,] 0.8333333 1.092130 5.011268 0.000000
## [4,] 5.1666667 1.878464 0.527395 1.580754
L = im.ch(A)$R; L %*% t(L)
## [,1] [,2] [,3] [,4]
## [1,] 36 -9 5 31
## [2,] -9 35 5 3
## [3,] 5 5 27 9
## [4,] 31 3 9 33
P = im.ch(A)$P; t(P) %*% A %*% P
## [,1] [,2] [,3] [,4]
## [1,] 36 -9 5 31
## [2,] -9 35 5 3
## [3,] 5 5 27 9
## [4,] 31 3 9 33
P %*% (L %*% t(L)) %*% t(P) # A に一致する
## [,1] [,2] [,3] [,4]
## [1,] 33 31 3 9
## [2,] 31 36 -9 5
## [3,] 3 -9 35 5
## [4,] 9 5 5 27
im.ch(A, 2)
## $P
## [,1] [,2] [,3] [,4]
## [1,] 0 0 1 0
## [2,] 1 0 0 0
## [3,] 0 1 0 0
## [4,] 0 0 0 1
##
## $R
## [,1] [,2] [,3] [,4]
## [1,] 6.0000000 0.000000 0 0
## [2,] -1.5000000 5.722762 0 0
## [3,] 5.1666667 1.878464 0 0
## [4,] 0.8333333 1.092130 0 0
L = im.ch(A, 2)$R; L %*% t(L)
## [,1] [,2] [,3] [,4]
## [1,] 36 -9 31.000000 5.000000
## [2,] -9 35 3.000000 5.000000
## [3,] 31 3 30.223070 6.357082
## [4,] 5 5 6.357082 1.887193
P = im.ch(A, 2)$P; t(P) %*% A %*% P
## [,1] [,2] [,3] [,4]
## [1,] 36 -9 31 5
## [2,] -9 35 3 5
## [3,] 31 3 33 9
## [4,] 5 5 9 27
P %*% (L %*% t(L)) %*% t(P) # A の低ランク近似
## [,1] [,2] [,3] [,4]
## [1,] 30.223070 31 3 6.357082
## [2,] 31.000000 36 -9 5.000000
## [3,] 3.000000 -9 35 5.000000
## [4,] 6.357082 5 5 1.887193