Ch.4 カーネル計算の実際

In [1]:
# 下記によって,skfda を事前にインストールする。
!pip install cvxopt
Requirement already satisfied: cvxopt in c:\users\prof-\anaconda3\lib\site-packages (1.2.0)
In [2]:
# 第 4 章のプログラムは,事前に下記が実行されていることを仮定する。
import numpy as np
import pandas as pd
from sklearn.decomposition import PCA
import cvxopt
from cvxopt import solvers
from cvxopt import matrix
import matplotlib.pyplot as plt
from matplotlib import style
style.use("seaborn-ticks")
from numpy.random import randn  # 正規乱数
from scipy.stats import norm

4.1 カーネル Ridge 回帰

In [3]:
def alpha(k, x, y):
    n = len(x)
    K = np.zeros((n, n))
    for i in range(n):
        for j in range(n):
            K[i, j] = k(x[i], x[j])
    return np.linalg.inv(K + 10e-5 * np.identity(n)).dot(y)
    # K に 10^(-5) I を加えて正則にする

例63

In [4]:
def k_p(x, y):        # カーネルの定義
    return (np.dot(x.T, y) + 1)**3


def k_g(x, y):        # カーネルの定義
    return np.exp(-(x - y)**2 / 2)
In [5]:
lam = 0.1

# データ生成
n = 50
x = np.random.randn(n)
y = 1 + x + x**2 + np.random.randn(n)

alpha_p = alpha(k_p, x, y)
alpha_g = alpha(k_g, x, y)

z = np.sort(x)
u = []
v = []
for j in range(n):
    S = 0
    for i in range(n):
        S = S + alpha_p[i] * k_p(x[i], z[j])
    u.append(S)
    S = 0
    for i in range(n):
        S = S + alpha_g[i] * k_g(x[i], z[j])
    v.append(S)

plt.scatter(x, y, facecolors="none", edgecolors="k", marker="o")
plt.plot(z, u, c="r", label="Polynomial Kernel")
plt.plot(z, v, c="b", label="Gauss Kernel")
plt.xlim(-1, 1)
plt.ylim(-1, 5)
plt.xlabel("x")
plt.ylabel("y")
plt.title("Kernel Regression")
plt.legend(loc="upper left", frameon=True, prop={"size": 14})
Out[5]:
<matplotlib.legend.Legend at 0x190729fc9d0>
In [6]:
def alpha(k, x, y):
    n = len(x)
    K = np.zeros((n, n))
    for i in range(n):
        for j in range(n):
            K[i, j] = k(x[i], x[j])
    return np.linalg.inv(K + lam * np.identity(n)).dot(y)

例64

In [7]:
def k_p(x, y):   # カーネル定義
    return (np.dot(x.T, y) + 1)**3


def k_g(x, y):   # カーネル定義
    return np.exp(-(x - y)**2 / 2)
In [8]:
lam = 0.1

# データ生成
n = 50
x = np.random.randn(n)
y = 1 + x + x**2 + np.random.randn(n)

alpha_p = alpha(k_p, x, y)
alpha_g = alpha(k_g, x, y)

z = np.sort(x)
u = []
v = []
for j in range(n):
    S = 0
    for i in range(n):
        S = S + alpha_p[i] * k_p(x[i], z[j])
    u.append(S)
    S = 0
    for i in range(n):
        S = S + alpha_g[i] * k_g(x[i], z[j])
    v.append(S)
plt.scatter(x, y, facecolors="none", edgecolors="k", marker="o")
plt.plot(z, u, c="r", label="Polynomial Kernel")
plt.plot(z, v, c="b", label="Gauss Kernel")
plt.xlim(-1, 1)
plt.ylim(-1, 5)
plt.xlabel("x")
plt.ylabel("y")
plt.title("Kernel Ridge")
plt.legend(loc="upper left", frameon=True, prop={"size": 14})
Out[8]:
<matplotlib.legend.Legend at 0x19072af8b20>

4.2 カーネル主成分分析

In [9]:
def kernel_pca_train(x, k):
    n = x.shape[0]
    K = np.zeros((n, n))
    S = [0] * n
    T = [0] * n
    for i in range(n):
        for j in range(n):
            K[i, j] = k(x[i, :], x[j, :])
    for i in range(n):
        S[i] = np.sum(K[i, :])
    for j in range(n):
        T[j] = np.sum(K[:, j])
    U = np.sum(K)
    for i in range(n):
        for j in range(n):
            K[i, j] = K[i, j] - S[i] / n - T[j] / n + U / n**2
    val, vec = np.linalg.eig(K)
    idx = val.argsort()[::-1]  # decreasing order as R
    val = val[idx]
    vec = vec[:, idx]
    alpha = np.zeros((n, n))
    for i in range(n):
        alpha[:, i] = vec[:, i] / val[i]**0.5
    return alpha
In [10]:
def kernel_pca_test(x, k, alpha, m, z):
    n = x.shape[0]
    pca = np.zeros(m)
    for i in range(n):
        pca = pca + alpha[i, 0:m] * k(x[i, :], z)
    return pca

例65

In [11]:
# def k(x, y):
#     return np.dot(x.T, y)
sigma2 = 0.01


def k(x, y):
    return np.exp(-np.linalg.norm(x - y)**2 / 2 / sigma2)


X = pd.read_csv(
    "https://raw.githubusercontent.com/selva86/datasets/master/USArrests.csv")
x = X.values[:, :-1]
n = x.shape[0]
p = x.shape[1]
alpha = kernel_pca_train(x, k)
z = np.zeros((n, 2))

for i in range(n):
    z[i, :] = kernel_pca_test(x, k, alpha, 2, x[i, :])

min1 = np.min(z[:, 0])
min2 = np.min(z[:, 1])
max1 = np.max(z[:, 0])
max2 = np.max(z[:, 1])

plt.xlim(min1, max1)
plt.ylim(min2, max2)
plt.xlabel("First")
plt.ylabel("Second")
plt.title("Kernel PCA (Gauss 0.01)")
for i in range(n):
    if i != 4:
        plt.text(x=z[i, 0], y=z[i, 1], s=i)
plt.text(z[4, 0], z[4, 1], 5, c="r")
Out[11]:
Text(-0.08750211763598995, -0.07575001678327324, '5')

4.3 カーネル SVM

例66

In [12]:
def K_linear(x, y):
    return x.T @ y


def K_poly(x, y):
    return (1 + x.T @ y)**2


def svm_2(X, y, C, K):
    eps = 0.0001
    n = X.shape[0]
    P = np.zeros((n, n))
    for i in range(n):
        for j in range(n):
            P[i, j] = K(X[i, :], X[j, :]) * y[i] * y[j]
    # パッケージにある matrix 関数を使って指定する必要がある
    P = matrix(P + np.eye(n) * eps)
    A = matrix(-y.T.astype(np.float))
    b = matrix(np.array([0]).astype(np.float))
    h = matrix(np.array([C] * n + [0] * n).reshape(-1, 1).astype(np.float))
    G = matrix(np.concatenate([np.diag(np.ones(n)), np.diag(-np.ones(n))]))
    q = matrix(np.array([-1] * n).astype(np.float))
    res = cvxopt.solvers.qp(P, q, A=A, b=b, G=G, h=h)
    alpha = np.array(res["x"])  # x が本文中の alpha に対応
    beta = ((alpha * y).T @ X).reshape(2, 1)
    index = (eps < alpha[:, 0]) & (alpha[:, 0] < C - eps)
    beta_0 = np.mean(y[index] - X[index, :] @ beta)
    return {"alpha": alpha, "beta": beta, "beta_0": beta_0}


def plot_kernel(K, line):  # 引数 line で線の種類を指定する
    res = svm_2(X, y, 1, K)
    alpha = res["alpha"][:, 0]
    beta_0 = res["beta_0"]

    def f(u, v):
        S = beta_0
        for i in range(X.shape[0]):
            S = S + alpha[i] * y[i] * K(X[i, :], [u, v])
        return S[0]
    # ww は f(x,y) での高さ。これから輪郭を求めることができる
    uu = np.arange(-2, 2, 0.1)
    vv = np.arange(-2, 2, 0.1)
    ww = []
    for v in vv:
        w = []
        for u in uu:
            w.append(f(u, v))
        ww.append(w)
    plt.contour(uu, vv, ww, levels=0, linestyles=line)
In [13]:
a = 3
b = -1
n = 200
X = randn(n, 2)
y = np.sign(a * X[:, 0] + b * X[:, 1]**2 + 0.3 * randn(n))
y = y.reshape(-1, 1)
for i in range(n):
    if y[i] == 1:
        plt.scatter(X[i, 0], X[i, 1], c="red")
    else:
        plt.scatter(X[i, 0], X[i, 1], c="blue")
plot_kernel(K_poly, line="dashed")
plot_kernel(K_linear, line="solid")
     pcost       dcost       gap    pres   dres
 0: -7.2292e+01 -5.2099e+02  3e+03  3e+00  9e-15
 1: -4.6032e+01 -3.4620e+02  6e+02  5e-01  6e-15
 2: -2.8614e+01 -1.2527e+02  2e+02  1e-01  8e-15
 3: -2.5048e+01 -4.3961e+01  3e+01  2e-02  6e-15
 4: -2.6800e+01 -3.2013e+01  7e+00  4e-03  3e-15
 5: -2.7516e+01 -2.8996e+01  2e+00  7e-04  3e-15
 6: -2.7876e+01 -2.8240e+01  4e-01  1e-04  3e-15
 7: -2.7977e+01 -2.8058e+01  9e-02  3e-05  3e-15
 8: -2.8002e+01 -2.8015e+01  2e-02  3e-06  3e-15
 9: -2.8007e+01 -2.8007e+01  4e-04  7e-08  3e-15
10: -2.8007e+01 -2.8007e+01  7e-06  1e-09  3e-15
Optimal solution found.
     pcost       dcost       gap    pres   dres
 0: -9.1436e+01 -4.5064e+02  2e+03  3e+00  2e-15
 1: -6.2437e+01 -2.8732e+02  4e+02  3e-01  2e-15
 2: -5.1296e+01 -9.9006e+01  6e+01  4e-02  1e-14
 3: -5.6734e+01 -6.7307e+01  1e+01  7e-03  2e-15
 4: -5.9337e+01 -6.2708e+01  4e+00  2e-03  1e-15
 5: -5.9955e+01 -6.1570e+01  2e+00  7e-04  2e-15
 6: -6.0333e+01 -6.0988e+01  7e-01  2e-04  1e-15
 7: -6.0533e+01 -6.0716e+01  2e-01  4e-05  1e-15
 8: -6.0608e+01 -6.0623e+01  2e-02  2e-06  2e-15
 9: -6.0614e+01 -6.0616e+01  2e-03  2e-07  2e-15
10: -6.0615e+01 -6.0615e+01  6e-05  2e-09  2e-15
11: -6.0615e+01 -6.0615e+01  9e-07  2e-11  2e-15
Optimal solution found.

4.4 スプライン

例67

In [14]:
# d, h で基底を求める関数を構成している
def d(j, x, knots):
    K = len(knots)
    return (np.maximum((x - knots[j])**3, 0)
            - np.maximum((x - knots[K-1])**3, 0)) / (knots[K-1] - knots[j])


def h(j, x, knots):
    K = len(knots)
    if j == 0:
        return 1
    elif j == 1:
        return x
    else:
        return d(j-1, x, knots) - d(K-2, x, knots)


# G は,2 回微分した関数を積分した値になっている
def G(x):    # x の各値が昇順になっていることを仮定している
    n = len(x)
    g = np.zeros((n, n))
    for i in range(2, n-1):
        for j in range(i, n):
            g[i, j] = 12 * (x[n-1] - x[n-2]) * (x[n-2] - x[j-2])\
                * (x[n-2] - x[i-2]) / (x[n-1] - x[i-2])\
                / (x[n-1] - x[j-2]) + (12*x[n-2] + 6*x[j-2] - 18*x[i-2])\
                * (x[n-2] - x[j-2])**2 / (x[n-1] - x[i-2]) / (x[n-1]-x[j-2])
            g[j, i] = g[i, j]
    return g
In [15]:
# メインの処理
n = 100
x = np.random.uniform(-5, 5, n)
y = x + np.sin(x)*2 + np.random.randn(n)   # データ生成
index = np.argsort(x)
x = x[index]
y = y[index]
X = np.zeros((n, n))
X[:, 0] = 1
for j in range(1, n):
    for i in range(n):
        X[i, j] = h(j, x[i], x)            # 行列 X の生成
GG = G(x)                                  # 行列 G の生成
lam_set = [1, 30, 80]
col_set = ["red", "blue", "green"]

plt.figure()
plt.ylim(-8, 8)
plt.xlabel("x")
plt.ylabel("g(x)")

for i in range(3):
    lam = lam_set[i]
    gamma = np.dot(np.dot(np.linalg.inv(np.dot(X.T, X) + lam * GG), X.T), y)

    def g(u):
        S = gamma[0]
        for j in range(1, n):
            S = S + gamma[j] * h(j, u, x)
        return S
    u_seq = np.arange(-8, 8, 0.02)
    v_seq = []
    for u in u_seq:
        v_seq.append(g(u))
    plt.plot(u_seq, v_seq, c=col_set[i], label=r"$\lambda = %d$" % lam_set[i])
plt.legend()
plt.scatter(x, y, facecolors="none", edgecolors="k", marker="o")
plt.title("smooth spline (n = 100)")
Out[15]:
Text(0.5, 1.0, 'smooth spline (n = 100)')

4.5 Random Fourier Features

例68

In [16]:
sigma = 10
sigma2 = sigma**2


def k(x, y):
    return np.exp(-(x-y)**2 / (2 * sigma2))


def z(x):
    return np.sqrt(2/m) * np.cos(w*x+b)


def zz(x, y):
    return np.sum(z(x) * z(y))


u = np.zeros((1000, 3))
m_seq = [20, 100, 400]
for i in range(1000):
    x = randn(1)
    y = randn(1)
    for j in range(3):
        m = m_seq[j]
        w = randn(m) / sigma
        b = np.random.rand(m) * 2 * np.pi
        u[i, j] = zz(x, y) - k(x, y)
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)
ax.boxplot([u[:, 0], u[:, 1], u[:, 2]], labels=["20", "100", "400"])
ax.set_xlabel("m")
ax.set_ylim(-0.5, 0.6)
plt.show()

例69

In [17]:
sigma = 10
sigma2 = sigma**2

# 関数 z
m = 20
w = randn(m) / sigma
b = np.random.rand(m) * 2 * np.pi


def z(u, m):
    return np.sqrt(2/m) * np.cos(w * u + b)


# Gauss カーネル
def k(x, y):
    return np.exp(-(x-y)**2 / (2 * sigma2))


# データ生成
n = 200
x = randn(n) / 2
y = 1 + 5 * np.sin(x/10) + 5 * x**2 + randn(n)
x_min = np.min(x)
x_max = np.max(x)
y_min = np.min(y)
y_max = np.max(y)
lam = 0.001
# lam = 0.9


# 低ランク近似の関数
def alpha_rff(x, y, m):
    n = len(x)
    Z = np.zeros((n, m))
    for i in range(n):
        Z[i, :] = z(x[i], m)
    beta = np.dot(np.linalg.inv(np.dot(Z.T, Z) + lam * np.eye(m)),
                  np.dot(Z.T, y))
    return(beta)


# 通常の関数
def alpha(k, x, y):
    n = len(x)
    K = np.zeros((n, n))
    for i in range(n):
        for j in range(n):
            K[i, j] = k(x[i], x[j])
    alpha = np.dot(np.linalg.inv(K + lam * np.eye(n)), y)
    return alpha


# 数値で比較する
alpha_hat = alpha(k, x, y)
beta_hat = alpha_rff(x, y, m)
r = np.sort(x)
u = np.zeros(n)
v = np.zeros(n)
for j in range(n):
    S = 0
    for i in range(n):
        S = S + alpha_hat[i] * k(x[i], r[j])
    u[j] = S
    v[j] = np.sum(beta_hat * z(r[j], m))

plt.scatter(x, y, facecolors="none", edgecolors="k", marker="o")
plt.plot(r, u, c="r", label="w/o Approx")
plt.plot(r, v, c="b", label="with Approx")
plt.xlim(-1.5, 2)
plt.ylim(-2, 8)
plt.xlabel("x")
plt.ylabel("y")
plt.title("Kernel Regression")
plt.legend(loc="upper left", frameon=True, prop={"size": 14})
Out[17]:
<matplotlib.legend.Legend at 0x19072eeedf0>

4.6 Nyström 近似

例70

In [18]:
sigma2 = 1


def k(x, y):
    return np.exp(-(x-y)**2 / (2 * sigma2))


n = 300
x = randn(n) / 2
y = 3 - 2 * x**2 + 3 * x**3 + 2 * randn(n)
lam = 10**(-5)
m = 10

K = np.zeros((n, n))
for i in range(n):
    for j in range(n):
        K[i, j] = k(x[i], x[j])


# 低ランク近似の関数
def alpha_m(K, x, y, m):
    n = len(x)
    U, D, V = np.linalg.svd(K[:m, :m])
    u = np.zeros((n, m))
    for i in range(m):
        for j in range(n):
            u[j, i] = np.sqrt(m/n) * np.sum(K[j, :m] * U[:m, i] / D[i])
    mu = D * n / m
    R = np.zeros((n, m))
    for i in range(m):
        R[:, i] = np.sqrt(mu[i]) * u[:, i]
    Z = np.linalg.inv(np.dot(R.T, R) + lam * np.eye(m))
    alpha = np.dot((np.eye(n) - np.dot(np.dot(R, Z), R.T)), y) / lam
    return(alpha)


# 通常の関数
def alpha(K, x, y):
    alpha = np.dot(np.linalg.inv(K + lam * np.eye(n)), y)
    return alpha


# 数値で比較する
alpha_1 = alpha(K, x, y)
alpha_2 = alpha_m(K, x, y, m)
r = np.sort(x)
w = np.zeros(n)
v = np.zeros(n)
for j in range(n):
    S_1 = 0
    S_2 = 0
    for i in range(n):
        S_1 = S_1 + alpha_1[i] * k(x[i], r[j])
        S_2 = S_2 + alpha_2[i] * k(x[i], r[j])
    w[j] = S_1
    v[j] = S_2
plt.scatter(x, y, facecolors="none", edgecolors="k", marker="o")
plt.plot(r, w, c="r", label="w/o Approx")
plt.plot(r, v, c="b", label="with Approx")
plt.xlim(-1.5, 2)
plt.ylim(-2, 8)
plt.xlabel("x")
plt.ylabel("y")
plt.title("Kernel Regression")
plt.legend(loc="upper left", frameon=True, prop={"size": 14})
Out[18]:
<matplotlib.legend.Legend at 0x19072e83130>

4.7 不完全 Cholesky 分解

In [19]:
def im_ch(A, m):
    n = A.shape[1]
    R = np.zeros((n, n))
    P = np.eye(n)
    for i in range(m):
        max_R = -np.inf
        for j in range(i, n):
            RR = A[j, j]
            for h in range(i):
                RR = RR - R[j, h]**2
            if RR > max_R:
                k = j
                max_R = RR
        R[i, i] = np.sqrt(max_R)
        if k != i:
            for j in range(i):
                w = R[i, j]
                R[i, j] = R[k, j]
                R[k, j] = w
            for j in range(n):
                w = A[j, k]
                A[j, k] = A[j, i]
                A[j, i] = w
            for j in range(n):
                w = A[k, j]
                A[k, j] = A[i, j]
                A[i, j] = w
            Q = np.eye(n)
            Q[i, i] = 0
            Q[k, k] = 0
            Q[i, k] = 1
            Q[k, i] = 1
            P = np.dot(P, Q)
        if i < n:
            for j in range(i+1, n):
                S = A[j, i]
                for h in range(i):
                    S = S - R[i, h] * R[j, h]
                R[j, i] = S / R[i, i]
    return np.dot(P, R)
In [20]:
# データ生成 A が非負定値となるようにする
n = 5
D = np.matrix([[np.random.randint(-n, n) for i in range(n)] for j in range(n)])
A = np.dot(D, D.T)
A
Out[20]:
matrix([[ 47,   2, -25, -11, -18],
        [  2,  40,  -6, -21, -12],
        [-25,  -6,  27,   8,  12],
        [-11, -21,   8,  50, -21],
        [-18, -12,  12, -21,  47]])
In [21]:
L = im_ch(A, 5)
np.dot(L, L.T)
Out[21]:
array([[ 47.,   2., -25., -11., -18.],
       [  2.,  40.,  -6., -21., -12.],
       [-25.,  -6.,  27.,   8.,  12.],
       [-11., -21.,   8.,  50., -21.],
       [-18., -12.,  12., -21.,  47.]])
In [22]:
# 階数 3 の不完全 Cholesky 分解
L = im_ch(A, 3)
np.linalg.eig(A)
Out[22]:
(array([82.77340824, 72.04487579,  3.81288314, 10.25781357, 42.11101927]),
 matrix([[-0.27317994,  0.7660152 ,  0.55200866,  0.18070534,  0.03500652],
         [ 0.63955121,  0.07590874,  0.38663629, -0.44084072, -0.49130841],
         [ 0.34911282, -0.27814841,  0.49196832,  0.05967017,  0.74509083],
         [-0.4562766 , -0.02198021,  0.07084621, -0.85707575,  0.22744343],
         [-0.43160037, -0.57411577,  0.54657736,  0.18672351, -0.38794295]]))
In [23]:
# 復元しようとしても A には戻らない
B = np.dot(L, L.T)
B
Out[23]:
array([[ 50.        , -11.        , -21.        ,   8.        ,
        -21.        ],
       [-11.        ,  47.        ,   2.        , -25.        ,
        -18.        ],
       [-21.        ,   2.        ,  40.        ,  -6.        ,
        -12.        ],
       [  8.        , -25.        ,  -6.        ,  13.91244559,
         11.29178536],
       [-21.        , -18.        , -12.        ,  11.29178536,
         36.10983704]])
In [24]:
# B の最初の 3 個の固有値は,A のそれに近い
np.linalg.eig(B)
Out[24]:
(array([7.89421470e+01+0.00000000e+00j, 6.79857638e+01+0.00000000e+00j,
        4.00943718e+01+0.00000000e+00j, 2.75186612e-15+3.04151101e-15j,
        2.75186612e-15-3.04151101e-15j]),
 array([[ 0.46477802+0.j        , -0.68886109+0.j        ,
         -0.13078191+0.j        , -0.03763862+0.40962613j,
         -0.03763862-0.40962613j],
        [-0.62929665+0.j        , -0.3115869 +0.j        ,
          0.47738305+0.j        ,  0.27195469+0.26767313j,
          0.27195469-0.26767313j],
        [-0.42046595+0.j        ,  0.2041816 +0.j        ,
         -0.76083386+0.j        ,  0.03628343+0.34784402j,
          0.03628343-0.34784402j],
        [ 0.38220146+0.j        ,  0.14572672+0.j        ,
         -0.15286987+0.j        ,  0.5681235 +0.j        ,
          0.5681235 -0.j        ],
        [ 0.2551422 +0.j        ,  0.6045309 +0.j        ,
          0.39085055+0.j        , -0.05192363+0.48724654j,
         -0.05192363-0.48724654j]]))
In [25]:
# B の階数は 3 になっている
np.linalg.matrix_rank(B)
Out[25]:
3