In [5]:
## cvxoptをインストールする
pip install cvxopt
Requirement already satisfied: cvxopt in c:\users\prof-\anaconda3\lib\site-packages (1.2.0)
Note: you may need to restart the kernel to use updated packages.
In [1]:
# 第4章のプログラムは、事前に下記が実行されていることを仮定する。
import numpy as np
import pandas as pd
from sklearn.decomposition import PCA
import cvxopt
from cvxopt import solvers
from cvxopt import matrix
import matplotlib.pyplot as plt
from matplotlib import style
style.use("seaborn-ticks")
from numpy.random import randn #正規乱数
from scipy.stats import norm

4.1 カーネル Ridge回帰

In [2]:
def alpha(k, x, y):
    n = len(x)
    K = np.zeros((n, n))
    for i in range(n):
        for j in range(n):
            K[i, j] = k(x[i], x[j])
    return np.linalg.inv(K + 10e-5 * np.identity(n)).dot(y)
In [3]:
def k_p(x, y):
    return (np.dot(x.T, y) + 1)**3

def k_g(x, y):
    return np.exp(-(x - y)**2 / 2)
In [4]:
lam = 0.1
n = 50; x = np.random.randn(n); y = 1 + x + x**2 + np.random.randn(n)
alpha_p = alpha(k_p, x, y)
alpha_g = alpha(k_g, x, y)

z = np.sort(x); u = []; v = []

for j in range(n):
    S = 0
    for i in range(n):
        S = S + alpha_p[i] * k_p(x[i], z[j])
    u.append(S)
    S = 0
    for i in range(n):
        S = S + alpha_g[i] * k_g(x[i], z[j])
    v.append(S)

plt.scatter(x, y, facecolors='none', edgecolors = "k", marker = "o")
plt.plot(z, u, c = "r", label = "Polynomial Kernel")
plt.plot(z, v, c = "b", label = "Gauss Kernel")
plt.xlim(-1, 1)
plt.ylim(-1, 5)
plt.xlabel("x")
plt.ylabel("y")
plt.title("Kernel Regression")
plt.legend(loc = "upper left", frameon = True, prop={'size':14})
Out[4]:
<matplotlib.legend.Legend at 0x27bffba8910>
In [5]:
def alpha(k, x, y):
    n = len(x)
    K = np.zeros((n, n))
    for i in range(n):
        for j in range(n):
            K[i, j] = k(x[i], x[j])
    return np.linalg.inv(K + lam * np.identity(n)).dot(y)
In [6]:
def k_p(x, y):
    return (np.dot(x.T, y) + 1)**3

def k_g(x, y):
    return np.exp(-(x - y)**2 / 2)
In [7]:
lam = 0.1
n = 50; x = np.random.randn(n); y = 1 + x + x**2 + np.random.randn(n)
alpha_p = alpha(k_p, x, y)
alpha_g = alpha(k_g, x, y)

z = np.sort(x); u = []; v = []

for j in range(n):
    S = 0
    for i in range(n):
        S = S + alpha_p[i] * k_p(x[i], z[j])
    u.append(S)
    S = 0
    for i in range(n):
        S = S + alpha_g[i] * k_g(x[i], z[j])
    v.append(S)
plt.scatter(x, y, facecolors='none', edgecolors = "k", marker = "o")
plt.plot(z, u, c = "r", label = "Polynomial Kernel")
plt.plot(z, v, c = "b", label = "Gauss Kernel")
plt.xlim(-1, 1)
plt.ylim(-1, 5)
plt.xlabel("x")
plt.ylabel("y")
plt.title("Kernel Ridge")
plt.legend(loc = "upper left", frameon = True, prop={'size':14})
Out[7]:
<matplotlib.legend.Legend at 0x27bffcab9a0>

4.2 カーネル主成分分析

In [8]:
def kernel_pca_train(x, k):
    n = x.shape[0]
    K = np.zeros((n, n))
    S = [0] * n; T = [0] * n
    for i in range(n):
        for j in range(n):
            K[i, j] = k(x[i, :], x[j, :])
    for i in range(n):
        S[i] = np.sum(K[i, :])
    for j in range(n):
        T[j] = np.sum(K[:, j])
    U = np.sum(K)
    for i in range(n):
        for j in range(n):
            K[i, j] = K[i, j] - S[i] / n - T[j] / n + U / n**2
    val, vec = np.linalg.eig(K)
    idx = val.argsort()[::-1]  # decreasing order as R
    val = val[idx]
    vec = vec[:,idx]
    alpha = np.zeros((n, n))
    for i in range(n):
        alpha[:, i] = vec[:, i] / val[i]**0.5
    return alpha
In [9]:
def kernel_pca_test(x, k, alpha, m, z):
    n = x.shape[0]
    pca = np.zeros(m)
    for i in range(n):
        pca = pca + alpha[i, 0:m] * k(x[i, :], z)
    return pca
In [10]:
# def k(x, y):
#     return np.dot(x.T, y)
sigma2 = 0.01

def k(x, y):
    return np.exp(-np.linalg.norm(x - y)**2 / 2 / sigma2)

X = pd.read_csv('https://raw.githubusercontent.com/selva86/datasets/master/USArrests.csv')
x = X.values[:,:-1]
n = x.shape[0]; p = x.shape[1]
alpha = kernel_pca_train(x, k)
z = np.zeros((n, 2))

for i in range(n):
    z[i, :] = kernel_pca_test(x, k, alpha, 2, x[i, :])

min1 = np.min(z[:, 0]); min2 = np.min(z[:, 1])
max1 = np.max(z[:, 0]); max2 = np.max(z[:, 1])

plt.xlim(min1, max1)
plt.ylim(min2, max2)
plt.xlabel("First")
plt.ylabel("Second")
plt.title("Kernel PCA (Gauss 0.01)")
for i in range(n):
    if i != 4:
        plt.text(x = z[i, 0], y = z[i, 1], s = i)
plt.text(z[4, 0], z[4, 1], 5, c = "r")

# general
pca = PCA(n_components=X.shape[1]-1)
X1=pca.fit_transform(x)
z=X1[:,:2]

4.3 カーネルSVM

In [11]:
def K_linear(x,y):
    return x.T@y
def K_poly(x,y):
    return (1+x.T@y)**2
def svm_2(X,y,C,K):
    eps=0.0001
    n=X.shape[0]
    P=np.zeros((n,n))
    for i in range(n):
        for j in range(n):
            P[i,j]=K(X[i,:],X[j,:])*y[i]*y[j]
    #パッケージにあるmatrix関数を使って指定する必要がある
    P=matrix(P+np.eye(n)*eps)                
    A=matrix(-y.T.astype(np.float))
    b=matrix(np.array([0]).astype(np.float))
    h=matrix(np.array([C]*n+[0]*n).reshape(-1,1).astype(np.float))
    G=matrix(np.concatenate([np.diag(np.ones(n)),np.diag(-np.ones(n))]))       
    q=matrix(np.array([-1]*n).astype(np.float))
    res=cvxopt.solvers.qp(P,q, A=A, b=b,G=G, h=h)
    alpha=np.array(res['x'])  #xが本文中のalphaに対応
    beta=((alpha*y).T@X).reshape(2,1)
    index = (eps < alpha[:, 0]) & (alpha[:, 0] < C - eps)
    beta_0=np.mean(y[index]-X[index,:]@beta)
    return {'alpha':alpha, 'beta':beta, 'beta_0':beta_0}

def plot_kernel(K,line): #引数lineで線の種類を指定する
    res=svm_2(X,y,1,K)
    alpha=res['alpha'][:,0]
    beta_0=res['beta_0']
    def f(u,v):
        S=beta_0
        for i in range(X.shape[0]):
            S=S+alpha[i]*y[i]*K(X[i,:],[u,v])
        return S[0]
    uu=np.arange(-2,2,0.1); vv=np.arange(-2,2,0.1); ww=[]
    for v in vv:
        w=[]
        for u in uu:
            w.append(f(u,v))
        ww.append(w)
    plt.contour(uu,vv,ww,levels=0,linestyles=line)
In [12]:
a=3;b=-1
n=200
X=randn(n,2)
y=np.sign(a*X[:,0]+b*X[:,1]**2+0.3*randn(n))
y=y.reshape(-1,1)
for i in range(n):
    if y[i]==1:
        plt.scatter(X[i,0],X[i,1],c="red")
    else:
        plt.scatter(X[i,0],X[i,1],c="blue")
plot_kernel(K_poly,line="dashed")
plot_kernel(K_linear,line="solid")
     pcost       dcost       gap    pres   dres
 0: -6.6927e+01 -4.6679e+02  2e+03  3e+00  1e-14
 1: -4.2949e+01 -2.9229e+02  5e+02  4e-01  8e-15
 2: -2.8717e+01 -1.0653e+02  1e+02  1e-01  6e-15
 3: -2.5767e+01 -4.7367e+01  3e+01  2e-02  4e-15
 4: -2.6165e+01 -3.1836e+01  8e+00  5e-03  4e-15
 5: -2.6940e+01 -2.8267e+01  2e+00  7e-04  3e-15
 6: -2.7243e+01 -2.7483e+01  3e-01  1e-04  3e-15
 7: -2.7325e+01 -2.7330e+01  6e-03  1e-06  3e-15
 8: -2.7327e+01 -2.7328e+01  9e-05  2e-08  3e-15
 9: -2.7328e+01 -2.7328e+01  9e-07  2e-10  3e-15
Optimal solution found.
     pcost       dcost       gap    pres   dres
 0: -8.1804e+01 -4.7816e+02  2e+03  3e+00  3e-15
 1: -5.3586e+01 -3.0647e+02  4e+02  4e-01  3e-15
 2: -4.1406e+01 -8.6880e+01  6e+01  3e-02  5e-15
 3: -4.7360e+01 -5.9604e+01  1e+01  6e-03  2e-15
 4: -4.9819e+01 -5.5157e+01  6e+00  2e-03  2e-15
 5: -5.0999e+01 -5.3276e+01  2e+00  7e-04  2e-15
 6: -5.1869e+01 -5.2122e+01  3e-01  9e-06  3e-15
 7: -5.1966e+01 -5.2010e+01  4e-02  1e-06  2e-15
 8: -5.1986e+01 -5.1988e+01  3e-03  5e-15  3e-15
 9: -5.1987e+01 -5.1987e+01  8e-05  1e-15  2e-15
10: -5.1987e+01 -5.1987e+01  2e-06  3e-15  3e-15
Optimal solution found.

4.4 スプライン

In [13]:
# define the function of d,h as basis
def d(j, x, knots):
    K = len(knots)
    return (np.maximum((x-knots[j])**3, 0)
    - np.maximum((x-knots[K-1])**3,0))/(knots[K-1]-knots[j])

def h(j, x, knots):
    K = len(knots)
    if j == 0:
        return 1
    elif j == 1:
        return x
    else:
        return d(j-1, x, knots)-d(K-2, x, knots)

def G(x):
    n = len(x)
    g = np.zeros((n, n))
    for i in range(2, n-1):
        for j in range(i, n):
            g[i, j] = 12*(x[n-1]-x[n-2])*(x[n-2]-x[j-2])\
                      *(x[n-2]-x[i-2])/(x[n-1]-x[i-2])/\
            (x[n-1]-x[j-2])+(12*x[n-2]+6*x[j-2]-18*x[i-2])\
            *(x[n-2]-x[j-2])**2/(x[n-1]-x[i-2])/(x[n-1]-x[j-2])
            g[j, i] = g[i, j]
    return g
In [14]:
n = 100
x = np.random.uniform(-5, 5, n)
y = x + np.sin(x)*2 + np.random.randn(n)
index = np.argsort(x)
x = x[index]; y = y[index]
X = np.zeros((n, n))
X[:, 0] = 1
for j in range(1, n):
    for i in range(n):
        X[i, j] = h(j, x[i], x)
GG = G(x)
lam_set = [1, 30, 80]
col_set = ["red", "blue", "green"]

plt.figure()
plt.ylim(-8, 8)
plt.xlabel("x")
plt.ylabel("g(x)")

for i in range(3):
    lam = lam_set[i]
    gamma = np.dot(np.dot(np.linalg.inv(np.dot(X.T, X)+lam*GG),X.T),y)
    def g(u):
        S = gamma[0]
        for j in range(1, n):
            S = S + gamma[j]*h(j, u, x)
        return S
    u_seq = np.arange(-8, 8, 0.02)
    v_seq = []
    for u in u_seq:
        v_seq.append(g(u))
    plt.plot(u_seq, v_seq, c = col_set[i], label = "$\lambda = %d$"%lam_set[i])
plt.legend()
plt.scatter(x, y, facecolors='none', edgecolors = "k", marker = "o")
plt.title("smooth spline (n=100)")
Out[14]:
Text(0.5, 1.0, 'smooth spline (n=100)')

4.5 Random Fourie Features

In [15]:
sigma=10
sigma2=sigma**2
def k(x,y):
    return np.exp(-(x-y)**2/(2*sigma2))
def z(x):
    return np.sqrt(2/m)*np.cos(w*x+b)
def zz(x,y):
    return np.sum(z(x)*z(y))
u=np.zeros((1000,3))
m_seq=[20,100,400]
for i in range(1000):
    x=randn(1)
    y=randn(1)
    for j in range(3):
        m=m_seq[j]
        w=randn(m)/sigma
        b=np.random.rand(m)*2*np.pi
        u[i,j]=zz(x,y)-k(x,y)
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)
ax.boxplot([u[:,0], u[:,1], u[:,2]], labels=['20', '100', '400'])
ax.set_xlabel('m')
ax.set_ylim(-0.5, 0.6)
plt.show()
In [54]:
sigma=10
sigma2=sigma**2
m=20
w=randn(m)/sigma
b=np.random.rand(m)*2*np.pi
def k(x,y):
    return np.exp(-(x-y)**2/(2*sigma2))
def z(u,m):
    return np.sqrt(2/m)*np.cos(w*u+b)

n=200
x=randn(n)/2
y=1+5*np.sin(x/10)+5*x**2+randn(n)
x_min=np.min(x);x_max=np.max(x);y_min=np.min(y);y_max=np.max(y)
lam=0.001
#lam=0.9

def alpha_rff(x,y,m):
    n=len(x)
    Z=np.zeros((n,m))
    for i in range(n):
        Z[i,:]=z(x[i],m)
    beta=np.dot(np.linalg.inv(np.dot(Z.T, Z)+lam*np.eye(m)),np.dot(Z.T,y))
    return(beta)

def alpha(k,x,y):
    n=len(x)
    K=np.zeros((n,n))
    for i in range(n):
        for j in range(n):
            K[i,j]=k(x[i],x[j])
    alpha=np.dot(np.linalg.inv(K+lam*np.eye(n)),y)
    return alpha

alpha_hat=alpha(k,x,y)
beta_hat=alpha_rff(x,y,m)
r=np.sort(x)
u=np.zeros(n)
v=np.zeros(n)
for j in range(n):
    S=0
    for i in range(n):
        S=S+alpha_hat[i]*k(x[i],r[j])
    u[j]=S
    v[j]=np.sum(beta_hat*z(r[j],m))

plt.scatter(x, y, facecolors='none', edgecolors = "k", marker = "o")
plt.plot(r, u, c = "r", label = "w/o Approx")
plt.plot(r, v, c = "b", label = "with Approx")
plt.xlim(-1.5, 2)
plt.ylim(-2, 8)
plt.xlabel("x")
plt.ylabel("y")
plt.title("Kernel Regression")
plt.legend(loc = "upper left", frameon = True, prop={'size':14})
Out[54]:
<matplotlib.legend.Legend at 0x27ba1915e20>

4.6 Nystrom 近似

In [68]:
sigma2=1
def k(x,y):
    return np.exp(-(x-y)**2/(2*sigma2))
n=300
x=randn(n)/2
y=3 - 2*x**2 + 3*x**3 + 2*randn(n)
lam=10**(-5)
m=10

K=np.zeros((n,n))
for i in range(n):
    for j in range(n):
        K[i,j]=k(x[i],x[j])

def alpha_m(K,x,y,m):
    n=len(x)
    U,D,V=np.linalg.svd(K[:m,:m])
    u=np.zeros((n,m))
    for i in range(m):
        for j in range(n):
            u[j,i]= np.sqrt(m/n)*np.sum(K[j,:m]*U[:m,i]/D[i])
    mu=D*n/m
    R=np.zeros((n,m))
    for i in range(m):
        R[:,i]=np.sqrt(mu[i])*u[:,i]
    Z=np.linalg.inv(np.dot(R.T,R)+lam*np.eye(m))
    alpha=np.dot((np.eye(n)-np.dot(np.dot(R,Z),R.T)),y)/lam
    return(alpha)

def alpha(K,x,y):
    alpha=np.dot(np.linalg.inv(K+lam*np.eye(n)),y)
    return alpha

alpha_1=alpha(K,x,y)
alpha_2=alpha_m(K,x,y,m)
r=np.sort(x)
w=np.zeros(n)
v=np.zeros(n)
for j in range(n):
    S_1=0
    S_2=0
    for i in range(n):
        S_1=S_1+alpha_1[i]*k(x[i],r[j])
        S_2=S_2+alpha_2[i]*k(x[i],r[j])
    w[j]=S_1
    v[j]=S_2
plt.scatter(x, y, facecolors='none', edgecolors = "k", marker = "o")
plt.plot(r, w, c = "r", label = "w/o Approx")
plt.plot(r, v, c = "b", label = "with Approx")
plt.xlim(-1.5, 2)
plt.ylim(-2, 8)
plt.xlabel("x")
plt.ylabel("y")
plt.title("Kernel Regression")
plt.legend(loc = "upper left", frameon = True, prop={'size':14})
Out[68]:
<matplotlib.legend.Legend at 0x27ba50d1b50>

4.7 不完全Cholesky分解

In [108]:
def im_ch(A,m):
    n=A.shape[1]
    R=np.zeros((n,n))
    P=np.eye(n)
    for i in range(m):
        max_R=-np.inf
        for j in range(i,n):
            RR=A[j,j]
            for h in range(i):
                RR=RR-R[j,h]**2
            if RR>max_R:
                k=j
                max_R=RR
        R[i,i]=np.sqrt(max_R)
        if k!=i:
            for j in range(i):
                w=R[i,j]; R[i,j]=R[k,j]; R[k,j]=w
            for j in range(n):
                w=A[j,k]; A[j,k]=A[j,i]; A[j,i]=w
            for j in range(n):
                w=A[k,j]; A[k,j]=A[i,j]; A[i,j]=w
            Q=np.eye(n); Q[i,i]=0; Q[k,k]=0; Q[i,k]=1; Q[k,i]=1
            P=np.dot(P,Q)
        if i<n:
            for j in range(i+1,n):
                S=A[j,i]
                for h in range(i):
                    S=S-R[i,h]*R[j,h]
                R[j,i]=S/R[i,i]
    return np.dot(P,R)
In [137]:
n=5
D=np.matrix([[np.random.randint(-n, n) for i in range(n)] for j in range(n)])
A=np.dot(D,D.T)
A
Out[137]:
matrix([[ 68,  11,   6, -14,  13],
        [ 11,  23,   0, -17,  11],
        [  6,   0,  24,  36, -16],
        [-14, -17,  36,  75, -24],
        [ 13,  11, -16, -24,  45]])
In [125]:
L=im_ch(A,5)
In [126]:
np.dot(L,L.T)
Out[126]:
array([[ 46.,  22.,  36.,   2.,  -7.],
       [ 22.,  31.,  14., -19.,  -8.],
       [ 36.,  14.,  41.,  -6., -20.],
       [  2., -19.,  -6.,  46.,  21.],
       [ -7.,  -8., -20.,  21.,  22.]])
In [136]:
## 階数 3 の不完全 Cholesky 分解
L=im_ch(A,3)np.linalg.eig(A)
Out[136]:
(array([1.01074272e+02, 5.66112516e+01, 2.41344415e+01, 9.62653508e-02,
        4.08376941e+00]),
 matrix([[-0.56411909, -0.45552615, -0.22499121, -0.46541646,  0.45500775],
         [ 0.30282894, -0.79305647,  0.04313883, -0.11440623, -0.51420456],
         [-0.40884563,  0.15490099, -0.68851469,  0.1243158 , -0.56510533],
         [ 0.31554644, -0.28590181, -0.51655213,  0.58928787,  0.45233208],
         [-0.56862992, -0.24046456,  0.45457608,  0.63842315, -0.06792108]]))
In [131]:
## 復元しようとしても A には戻らない
B=np.dot(L,L.T)
B
Out[131]:
array([[ 46.        ,   2.        ,  22.        ,  -7.        ,
         36.        ],
       [  2.        ,  46.        , -19.        ,  21.        ,
         -6.        ],
       [ 22.        , -19.        ,  31.        ,  -8.        ,
         14.        ],
       [ -7.        ,  21.        ,  -8.        ,  12.74957882,
        -11.52827918],
       [ 36.        ,  -6.        ,  14.        , -11.52827918,
         33.00601685]])
In [135]:
# B の最初の 3 個の固有値は、A のそれに近い
np.linalg.eig(B)
Out[135]:
(array([ 9.53379063e+01,  5.65627665e+01,  1.68549229e+01,  1.42156846e-14,
        -3.05830677e-15]),
 array([[-0.60391559, -0.44498737, -0.0412799 ,  0.6556516 ,  0.01706317],
        [ 0.31630878, -0.79905862, -0.14333816, -0.30429563, -0.42390544],
        [-0.44500967,  0.16578492, -0.79173017, -0.36203582, -0.1781387 ],
        [ 0.24801368, -0.27858603, -0.38477667,  0.11142099,  0.84426293],
        [-0.52506222, -0.24165419,  0.45040025, -0.57796243,  0.27477214]]))
In [130]:
# B の階数は 3 になっている
np.linalg.matrix_rank(B)
Out[130]:
3

問題

In [138]:
def kernel_pca_train(x, k):
    n = x.shape[0]
    K = np.zeros((n, n))
    S = [0] * n; T = [0] * n
    for i in range(n):
        for j in range(n):
            K[i, j] = k(x[i, :], x[j, :])
    for i in range(n):
        S[i] = np.sum(K[i, :])
    for j in range(n):
        T[j] = np.sum(K[:, j])
    U = np.sum(K)
    for i in range(n):
        for j in range(n):
            K[i, j] = K[i, j] - S[i] / n - T[j] / n + U / n**2
    val, vec = np.linalg.eig(K)
    idx = val.argsort()[::-1]  # decreasing order as R
    val = val[idx]
    vec = vec[:,idx]
    alpha = np.zeros((n, n))
    for i in range(n):
        alpha[:, i] = vec[:, i] / val[i]**0.5
    return alpha
In [139]:
def kernel_pca_test(x, k, alpha, m, z):
    n = x.shape[0]
    pca = np.zeros(m)
    for i in range(n):
        pca = pca + alpha[i, 0:m] * k(x[i, :], z)
    return pca
In [ ]:
sigma2 = 0.01
def k(x, y):
    return np.exp(-np.linalg.norm(x - y)**2 / 2 / sigma2)
X = pd.read_csv('https://raw.githubusercontent.com/selva86/datasets/master/USArrests.csv')
x = X.values[:,:-1]
n = x.shape[0]; p = x.shape[1]
alpha = kernel_pca_train(x, k)
z = np.zeros((n, 2))

for i in range(n):
    z[i, :] = kernel_pca_test(x, k, alpha, 2, x[i, :])

min1 = np.min(z[:, 0]); min2 = np.min(z[:, 1])
max1 = np.max(z[:, 0]); max2 = np.max(z[:, 1])

plt.xlim(min1, max1)
plt.ylim(min2, max2)
plt.xlabel("First")
plt.ylabel("Second")
plt.title("Kernel PCA (Gauss 0.01)")
for i in range(n):
    if i != 4:
        plt.text(x = z[i, 0], y = z[i, 1], s = i)
plt.text(z[4, 0], z[4, 1], 5, c = "r")
In [ ]:
sigma=10
sigma2=sigma**2
def z(x):
    return np.sqrt(2/m)*np.cos(w*x+b)
def zz(x,y):
    return np.sum(z(x)*z(y))
In [ ]:
def alpha_m(K,x,y,m):
    n=len(x)
    U,D,V=np.linalg.svd(K[:m,:m])
    u=np.zeros((n,m))
    for i in range(m):
        for j in range(n):
            u[j,i]= np.sqrt(m/n)*np.sum(K[j,:m]*U[:m,i]/D[i])
    mu=D*n/m
    R=np.zeros((n,m))
    for i in range(m):
        R[:,i]=np.sqrt(mu[i])*u[:,i]
    Z=np.linalg.inv(np.dot(R.T,R)+lam*np.eye(m))
    alpha=np.dot((np.eye(n)-np.dot(np.dot(R,Z),R.T)),y)/lam
    return(alpha)