Ch.4　カーネル計算の実際¶

# 下記によって，skfda を事前にインストールする。
!pip install cvxopt

Requirement already satisfied: cvxopt in c:\users\prof-\anaconda3\lib\site-packages (1.2.0)

# 第 4 章のプログラムは，事前に下記が実行されていることを仮定する。
import numpy as np
import pandas as pd
from sklearn.decomposition import PCA
import cvxopt
from cvxopt import solvers
from cvxopt import matrix
import matplotlib.pyplot as plt
from matplotlib import style
style.use("seaborn-ticks")
from numpy.random import randn  # 正規乱数
from scipy.stats import norm

4.1　カーネル Ridge 回帰¶

def alpha(k, x, y):
    n = len(x)
    K = np.zeros((n, n))
    for i in range(n):
        for j in range(n):
            K[i, j] = k(x[i], x[j])
    return np.linalg.inv(K + 10e-5 * np.identity(n)).dot(y)
    # K に 10^(-5) I を加えて正則にする

例63¶

def k_p(x, y):        # カーネルの定義
    return (np.dot(x.T, y) + 1)**3


def k_g(x, y):        # カーネルの定義
    return np.exp(-(x - y)**2 / 2)

lam = 0.1

# データ生成
n = 50
x = np.random.randn(n)
y = 1 + x + x**2 + np.random.randn(n)

alpha_p = alpha(k_p, x, y)
alpha_g = alpha(k_g, x, y)

z = np.sort(x)
u = []
v = []
for j in range(n):
    S = 0
    for i in range(n):
        S = S + alpha_p[i] * k_p(x[i], z[j])
    u.append(S)
    S = 0
    for i in range(n):
        S = S + alpha_g[i] * k_g(x[i], z[j])
    v.append(S)

plt.scatter(x, y, facecolors="none", edgecolors="k", marker="o")
plt.plot(z, u, c="r", label="Polynomial Kernel")
plt.plot(z, v, c="b", label="Gauss Kernel")
plt.xlim(-1, 1)
plt.ylim(-1, 5)
plt.xlabel("x")
plt.ylabel("y")
plt.title("Kernel Regression")
plt.legend(loc="upper left", frameon=True, prop={"size": 14})

<matplotlib.legend.Legend at 0x190729fc9d0>

def alpha(k, x, y):
    n = len(x)
    K = np.zeros((n, n))
    for i in range(n):
        for j in range(n):
            K[i, j] = k(x[i], x[j])
    return np.linalg.inv(K + lam * np.identity(n)).dot(y)

例64¶

def k_p(x, y):   # カーネル定義
    return (np.dot(x.T, y) + 1)**3


def k_g(x, y):   # カーネル定義
    return np.exp(-(x - y)**2 / 2)

lam = 0.1

# データ生成
n = 50
x = np.random.randn(n)
y = 1 + x + x**2 + np.random.randn(n)

alpha_p = alpha(k_p, x, y)
alpha_g = alpha(k_g, x, y)

z = np.sort(x)
u = []
v = []
for j in range(n):
    S = 0
    for i in range(n):
        S = S + alpha_p[i] * k_p(x[i], z[j])
    u.append(S)
    S = 0
    for i in range(n):
        S = S + alpha_g[i] * k_g(x[i], z[j])
    v.append(S)
plt.scatter(x, y, facecolors="none", edgecolors="k", marker="o")
plt.plot(z, u, c="r", label="Polynomial Kernel")
plt.plot(z, v, c="b", label="Gauss Kernel")
plt.xlim(-1, 1)
plt.ylim(-1, 5)
plt.xlabel("x")
plt.ylabel("y")
plt.title("Kernel Ridge")
plt.legend(loc="upper left", frameon=True, prop={"size": 14})

<matplotlib.legend.Legend at 0x19072af8b20>

4.2　カーネル主成分分析¶

def kernel_pca_train(x, k):
    n = x.shape[0]
    K = np.zeros((n, n))
    S = [0] * n
    T = [0] * n
    for i in range(n):
        for j in range(n):
            K[i, j] = k(x[i, :], x[j, :])
    for i in range(n):
        S[i] = np.sum(K[i, :])
    for j in range(n):
        T[j] = np.sum(K[:, j])
    U = np.sum(K)
    for i in range(n):
        for j in range(n):
            K[i, j] = K[i, j] - S[i] / n - T[j] / n + U / n**2
    val, vec = np.linalg.eig(K)
    idx = val.argsort()[::-1]  # decreasing order as R
    val = val[idx]
    vec = vec[:, idx]
    alpha = np.zeros((n, n))
    for i in range(n):
        alpha[:, i] = vec[:, i] / val[i]**0.5
    return alpha

def kernel_pca_test(x, k, alpha, m, z):
    n = x.shape[0]
    pca = np.zeros(m)
    for i in range(n):
        pca = pca + alpha[i, 0:m] * k(x[i, :], z)
    return pca

例65¶

# def k(x, y):
#     return np.dot(x.T, y)
sigma2 = 0.01


def k(x, y):
    return np.exp(-np.linalg.norm(x - y)**2 / 2 / sigma2)


X = pd.read_csv(
    "https://raw.githubusercontent.com/selva86/datasets/master/USArrests.csv")
x = X.values[:, :-1]
n = x.shape[0]
p = x.shape[1]
alpha = kernel_pca_train(x, k)
z = np.zeros((n, 2))

for i in range(n):
    z[i, :] = kernel_pca_test(x, k, alpha, 2, x[i, :])

min1 = np.min(z[:, 0])
min2 = np.min(z[:, 1])
max1 = np.max(z[:, 0])
max2 = np.max(z[:, 1])

plt.xlim(min1, max1)
plt.ylim(min2, max2)
plt.xlabel("First")
plt.ylabel("Second")
plt.title("Kernel PCA (Gauss 0.01)")
for i in range(n):
    if i != 4:
        plt.text(x=z[i, 0], y=z[i, 1], s=i)
plt.text(z[4, 0], z[4, 1], 5, c="r")

Text(-0.08750211763598995, -0.07575001678327324, '5')

4.3　カーネル SVM¶

例66¶

def K_linear(x, y):
    return x.T @ y


def K_poly(x, y):
    return (1 + x.T @ y)**2


def svm_2(X, y, C, K):
    eps = 0.0001
    n = X.shape[0]
    P = np.zeros((n, n))
    for i in range(n):
        for j in range(n):
            P[i, j] = K(X[i, :], X[j, :]) * y[i] * y[j]
    # パッケージにある matrix 関数を使って指定する必要がある
    P = matrix(P + np.eye(n) * eps)
    A = matrix(-y.T.astype(np.float))
    b = matrix(np.array([0]).astype(np.float))
    h = matrix(np.array([C] * n + [0] * n).reshape(-1, 1).astype(np.float))
    G = matrix(np.concatenate([np.diag(np.ones(n)), np.diag(-np.ones(n))]))
    q = matrix(np.array([-1] * n).astype(np.float))
    res = cvxopt.solvers.qp(P, q, A=A, b=b, G=G, h=h)
    alpha = np.array(res["x"])  # x が本文中の alpha に対応
    beta = ((alpha * y).T @ X).reshape(2, 1)
    index = (eps < alpha[:, 0]) & (alpha[:, 0] < C - eps)
    beta_0 = np.mean(y[index] - X[index, :] @ beta)
    return {"alpha": alpha, "beta": beta, "beta_0": beta_0}


def plot_kernel(K, line):  # 引数 line で線の種類を指定する
    res = svm_2(X, y, 1, K)
    alpha = res["alpha"][:, 0]
    beta_0 = res["beta_0"]

    def f(u, v):
        S = beta_0
        for i in range(X.shape[0]):
            S = S + alpha[i] * y[i] * K(X[i, :], [u, v])
        return S[0]
    # ww は f(x,y) での高さ。これから輪郭を求めることができる
    uu = np.arange(-2, 2, 0.1)
    vv = np.arange(-2, 2, 0.1)
    ww = []
    for v in vv:
        w = []
        for u in uu:
            w.append(f(u, v))
        ww.append(w)
    plt.contour(uu, vv, ww, levels=0, linestyles=line)

a = 3
b = -1
n = 200
X = randn(n, 2)
y = np.sign(a * X[:, 0] + b * X[:, 1]**2 + 0.3 * randn(n))
y = y.reshape(-1, 1)
for i in range(n):
    if y[i] == 1:
        plt.scatter(X[i, 0], X[i, 1], c="red")
    else:
        plt.scatter(X[i, 0], X[i, 1], c="blue")
plot_kernel(K_poly, line="dashed")
plot_kernel(K_linear, line="solid")

     pcost       dcost       gap    pres   dres
 0: -7.2292e+01 -5.2099e+02  3e+03  3e+00  9e-15
 1: -4.6032e+01 -3.4620e+02  6e+02  5e-01  6e-15
 2: -2.8614e+01 -1.2527e+02  2e+02  1e-01  8e-15
 3: -2.5048e+01 -4.3961e+01  3e+01  2e-02  6e-15
 4: -2.6800e+01 -3.2013e+01  7e+00  4e-03  3e-15
 5: -2.7516e+01 -2.8996e+01  2e+00  7e-04  3e-15
 6: -2.7876e+01 -2.8240e+01  4e-01  1e-04  3e-15
 7: -2.7977e+01 -2.8058e+01  9e-02  3e-05  3e-15
 8: -2.8002e+01 -2.8015e+01  2e-02  3e-06  3e-15
 9: -2.8007e+01 -2.8007e+01  4e-04  7e-08  3e-15
10: -2.8007e+01 -2.8007e+01  7e-06  1e-09  3e-15
Optimal solution found.
     pcost       dcost       gap    pres   dres
 0: -9.1436e+01 -4.5064e+02  2e+03  3e+00  2e-15
 1: -6.2437e+01 -2.8732e+02  4e+02  3e-01  2e-15
 2: -5.1296e+01 -9.9006e+01  6e+01  4e-02  1e-14
 3: -5.6734e+01 -6.7307e+01  1e+01  7e-03  2e-15
 4: -5.9337e+01 -6.2708e+01  4e+00  2e-03  1e-15
 5: -5.9955e+01 -6.1570e+01  2e+00  7e-04  2e-15
 6: -6.0333e+01 -6.0988e+01  7e-01  2e-04  1e-15
 7: -6.0533e+01 -6.0716e+01  2e-01  4e-05  1e-15
 8: -6.0608e+01 -6.0623e+01  2e-02  2e-06  2e-15
 9: -6.0614e+01 -6.0616e+01  2e-03  2e-07  2e-15
10: -6.0615e+01 -6.0615e+01  6e-05  2e-09  2e-15
11: -6.0615e+01 -6.0615e+01  9e-07  2e-11  2e-15
Optimal solution found.

4.4　スプライン¶

例67¶

# d, h で基底を求める関数を構成している
def d(j, x, knots):
    K = len(knots)
    return (np.maximum((x - knots[j])**3, 0)
            - np.maximum((x - knots[K-1])**3, 0)) / (knots[K-1] - knots[j])


def h(j, x, knots):
    K = len(knots)
    if j == 0:
        return 1
    elif j == 1:
        return x
    else:
        return d(j-1, x, knots) - d(K-2, x, knots)


# G は，2 回微分した関数を積分した値になっている
def G(x):    # x の各値が昇順になっていることを仮定している
    n = len(x)
    g = np.zeros((n, n))
    for i in range(2, n-1):
        for j in range(i, n):
            g[i, j] = 12 * (x[n-1] - x[n-2]) * (x[n-2] - x[j-2])\
                * (x[n-2] - x[i-2]) / (x[n-1] - x[i-2])\
                / (x[n-1] - x[j-2]) + (12*x[n-2] + 6*x[j-2] - 18*x[i-2])\
                * (x[n-2] - x[j-2])**2 / (x[n-1] - x[i-2]) / (x[n-1]-x[j-2])
            g[j, i] = g[i, j]
    return g

# メインの処理
n = 100
x = np.random.uniform(-5, 5, n)
y = x + np.sin(x)*2 + np.random.randn(n)   # データ生成
index = np.argsort(x)
x = x[index]
y = y[index]
X = np.zeros((n, n))
X[:, 0] = 1
for j in range(1, n):
    for i in range(n):
        X[i, j] = h(j, x[i], x)            # 行列 X の生成
GG = G(x)                                  # 行列 G の生成
lam_set = [1, 30, 80]
col_set = ["red", "blue", "green"]

plt.figure()
plt.ylim(-8, 8)
plt.xlabel("x")
plt.ylabel("g(x)")

for i in range(3):
    lam = lam_set[i]
    gamma = np.dot(np.dot(np.linalg.inv(np.dot(X.T, X) + lam * GG), X.T), y)

    def g(u):
        S = gamma[0]
        for j in range(1, n):
            S = S + gamma[j] * h(j, u, x)
        return S
    u_seq = np.arange(-8, 8, 0.02)
    v_seq = []
    for u in u_seq:
        v_seq.append(g(u))
    plt.plot(u_seq, v_seq, c=col_set[i], label=r"$\lambda = %d$" % lam_set[i])
plt.legend()
plt.scatter(x, y, facecolors="none", edgecolors="k", marker="o")
plt.title("smooth spline (n = 100)")

Text(0.5, 1.0, 'smooth spline (n = 100)')

4.5　Random Fourier Features¶

例68¶

sigma = 10
sigma2 = sigma**2


def k(x, y):
    return np.exp(-(x-y)**2 / (2 * sigma2))


def z(x):
    return np.sqrt(2/m) * np.cos(w*x+b)


def zz(x, y):
    return np.sum(z(x) * z(y))


u = np.zeros((1000, 3))
m_seq = [20, 100, 400]
for i in range(1000):
    x = randn(1)
    y = randn(1)
    for j in range(3):
        m = m_seq[j]
        w = randn(m) / sigma
        b = np.random.rand(m) * 2 * np.pi
        u[i, j] = zz(x, y) - k(x, y)
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)
ax.boxplot([u[:, 0], u[:, 1], u[:, 2]], labels=["20", "100", "400"])
ax.set_xlabel("m")
ax.set_ylim(-0.5, 0.6)
plt.show()

例69¶

sigma = 10
sigma2 = sigma**2

# 関数 z
m = 20
w = randn(m) / sigma
b = np.random.rand(m) * 2 * np.pi


def z(u, m):
    return np.sqrt(2/m) * np.cos(w * u + b)


# Gauss カーネル
def k(x, y):
    return np.exp(-(x-y)**2 / (2 * sigma2))


# データ生成
n = 200
x = randn(n) / 2
y = 1 + 5 * np.sin(x/10) + 5 * x**2 + randn(n)
x_min = np.min(x)
x_max = np.max(x)
y_min = np.min(y)
y_max = np.max(y)
lam = 0.001
# lam = 0.9


# 低ランク近似の関数
def alpha_rff(x, y, m):
    n = len(x)
    Z = np.zeros((n, m))
    for i in range(n):
        Z[i, :] = z(x[i], m)
    beta = np.dot(np.linalg.inv(np.dot(Z.T, Z) + lam * np.eye(m)),
                  np.dot(Z.T, y))
    return(beta)


# 通常の関数
def alpha(k, x, y):
    n = len(x)
    K = np.zeros((n, n))
    for i in range(n):
        for j in range(n):
            K[i, j] = k(x[i], x[j])
    alpha = np.dot(np.linalg.inv(K + lam * np.eye(n)), y)
    return alpha


# 数値で比較する
alpha_hat = alpha(k, x, y)
beta_hat = alpha_rff(x, y, m)
r = np.sort(x)
u = np.zeros(n)
v = np.zeros(n)
for j in range(n):
    S = 0
    for i in range(n):
        S = S + alpha_hat[i] * k(x[i], r[j])
    u[j] = S
    v[j] = np.sum(beta_hat * z(r[j], m))

plt.scatter(x, y, facecolors="none", edgecolors="k", marker="o")
plt.plot(r, u, c="r", label="w/o Approx")
plt.plot(r, v, c="b", label="with Approx")
plt.xlim(-1.5, 2)
plt.ylim(-2, 8)
plt.xlabel("x")
plt.ylabel("y")
plt.title("Kernel Regression")
plt.legend(loc="upper left", frameon=True, prop={"size": 14})

<matplotlib.legend.Legend at 0x19072eeedf0>

4.6　Nyström 近似

例70¶

sigma2 = 1


def k(x, y):
    return np.exp(-(x-y)**2 / (2 * sigma2))


n = 300
x = randn(n) / 2
y = 3 - 2 * x**2 + 3 * x**3 + 2 * randn(n)
lam = 10**(-5)
m = 10

K = np.zeros((n, n))
for i in range(n):
    for j in range(n):
        K[i, j] = k(x[i], x[j])


# 低ランク近似の関数
def alpha_m(K, x, y, m):
    n = len(x)
    U, D, V = np.linalg.svd(K[:m, :m])
    u = np.zeros((n, m))
    for i in range(m):
        for j in range(n):
            u[j, i] = np.sqrt(m/n) * np.sum(K[j, :m] * U[:m, i] / D[i])
    mu = D * n / m
    R = np.zeros((n, m))
    for i in range(m):
        R[:, i] = np.sqrt(mu[i]) * u[:, i]
    Z = np.linalg.inv(np.dot(R.T, R) + lam * np.eye(m))
    alpha = np.dot((np.eye(n) - np.dot(np.dot(R, Z), R.T)), y) / lam
    return(alpha)


# 通常の関数
def alpha(K, x, y):
    alpha = np.dot(np.linalg.inv(K + lam * np.eye(n)), y)
    return alpha


# 数値で比較する
alpha_1 = alpha(K, x, y)
alpha_2 = alpha_m(K, x, y, m)
r = np.sort(x)
w = np.zeros(n)
v = np.zeros(n)
for j in range(n):
    S_1 = 0
    S_2 = 0
    for i in range(n):
        S_1 = S_1 + alpha_1[i] * k(x[i], r[j])
        S_2 = S_2 + alpha_2[i] * k(x[i], r[j])
    w[j] = S_1
    v[j] = S_2
plt.scatter(x, y, facecolors="none", edgecolors="k", marker="o")
plt.plot(r, w, c="r", label="w/o Approx")
plt.plot(r, v, c="b", label="with Approx")
plt.xlim(-1.5, 2)
plt.ylim(-2, 8)
plt.xlabel("x")
plt.ylabel("y")
plt.title("Kernel Regression")
plt.legend(loc="upper left", frameon=True, prop={"size": 14})

<matplotlib.legend.Legend at 0x19072e83130>

4.7　不完全 Cholesky 分解¶

def im_ch(A, m):
    n = A.shape[1]
    R = np.zeros((n, n))
    P = np.eye(n)
    for i in range(m):
        max_R = -np.inf
        for j in range(i, n):
            RR = A[j, j]
            for h in range(i):
                RR = RR - R[j, h]**2
            if RR > max_R:
                k = j
                max_R = RR
        R[i, i] = np.sqrt(max_R)
        if k != i:
            for j in range(i):
                w = R[i, j]
                R[i, j] = R[k, j]
                R[k, j] = w
            for j in range(n):
                w = A[j, k]
                A[j, k] = A[j, i]
                A[j, i] = w
            for j in range(n):
                w = A[k, j]
                A[k, j] = A[i, j]
                A[i, j] = w
            Q = np.eye(n)
            Q[i, i] = 0
            Q[k, k] = 0
            Q[i, k] = 1
            Q[k, i] = 1
            P = np.dot(P, Q)
        if i < n:
            for j in range(i+1, n):
                S = A[j, i]
                for h in range(i):
                    S = S - R[i, h] * R[j, h]
                R[j, i] = S / R[i, i]
    return np.dot(P, R)

# データ生成 A が非負定値となるようにする
n = 5
D = np.matrix([[np.random.randint(-n, n) for i in range(n)] for j in range(n)])
A = np.dot(D, D.T)
A

matrix([[ 47,   2, -25, -11, -18],
        [  2,  40,  -6, -21, -12],
        [-25,  -6,  27,   8,  12],
        [-11, -21,   8,  50, -21],
        [-18, -12,  12, -21,  47]])

L = im_ch(A, 5)
np.dot(L, L.T)

array([[ 47.,   2., -25., -11., -18.],
       [  2.,  40.,  -6., -21., -12.],
       [-25.,  -6.,  27.,   8.,  12.],
       [-11., -21.,   8.,  50., -21.],
       [-18., -12.,  12., -21.,  47.]])

# 階数 3 の不完全 Cholesky 分解
L = im_ch(A, 3)
np.linalg.eig(A)

(array([82.77340824, 72.04487579,  3.81288314, 10.25781357, 42.11101927]),
 matrix([[-0.27317994,  0.7660152 ,  0.55200866,  0.18070534,  0.03500652],
         [ 0.63955121,  0.07590874,  0.38663629, -0.44084072, -0.49130841],
         [ 0.34911282, -0.27814841,  0.49196832,  0.05967017,  0.74509083],
         [-0.4562766 , -0.02198021,  0.07084621, -0.85707575,  0.22744343],
         [-0.43160037, -0.57411577,  0.54657736,  0.18672351, -0.38794295]]))

# 復元しようとしても A には戻らない
B = np.dot(L, L.T)
B

array([[ 50.        , -11.        , -21.        ,   8.        ,
        -21.        ],
       [-11.        ,  47.        ,   2.        , -25.        ,
        -18.        ],
       [-21.        ,   2.        ,  40.        ,  -6.        ,
        -12.        ],
       [  8.        , -25.        ,  -6.        ,  13.91244559,
         11.29178536],
       [-21.        , -18.        , -12.        ,  11.29178536,
         36.10983704]])

# B の最初の 3 個の固有値は，A のそれに近い
np.linalg.eig(B)

(array([7.89421470e+01+0.00000000e+00j, 6.79857638e+01+0.00000000e+00j,
        4.00943718e+01+0.00000000e+00j, 2.75186612e-15+3.04151101e-15j,
        2.75186612e-15-3.04151101e-15j]),
 array([[ 0.46477802+0.j        , -0.68886109+0.j        ,
         -0.13078191+0.j        , -0.03763862+0.40962613j,
         -0.03763862-0.40962613j],
        [-0.62929665+0.j        , -0.3115869 +0.j        ,
          0.47738305+0.j        ,  0.27195469+0.26767313j,
          0.27195469-0.26767313j],
        [-0.42046595+0.j        ,  0.2041816 +0.j        ,
         -0.76083386+0.j        ,  0.03628343+0.34784402j,
          0.03628343-0.34784402j],
        [ 0.38220146+0.j        ,  0.14572672+0.j        ,
         -0.15286987+0.j        ,  0.5681235 +0.j        ,
          0.5681235 -0.j        ],
        [ 0.2551422 +0.j        ,  0.6045309 +0.j        ,
          0.39085055+0.j        , -0.05192363+0.48724654j,
         -0.05192363-0.48724654j]]))

# B の階数は 3 になっている
np.linalg.matrix_rank(B)

3

Ch.4 カーネル計算の実際¶

4.1 カーネル Ridge 回帰¶

例63¶

例64¶

4.2 カーネル主成分分析¶

例65¶

4.3 カーネル SVM¶

例66¶

4.4 スプライン¶

例67¶

4.5 Random Fourier Features¶

例68¶

例69¶

4.6 Nyström 近似

例70¶

4.7 不完全 Cholesky 分解¶

Ch.4　カーネル計算の実際¶

4.1　カーネル Ridge 回帰¶

4.2　カーネル主成分分析¶

4.3　カーネル SVM¶

4.4　スプライン¶

4.5　Random Fourier Features¶

4.6　Nyström 近似

4.7　不完全 Cholesky 分解¶