#install scikit-fda for the Canadian Weather Dataset
!pip install scikit-fda

Collecting scikit-fda
  Using cached scikit_fda-0.6-py2.py3-none-any.whl (339 kB)
Requirement already satisfied: numpy>=1.16 in c:\users\prof-\anaconda3\lib\site-packages (from scikit-fda) (1.19.5)
Requirement already satisfied: findiff in c:\users\prof-\anaconda3\lib\site-packages (from scikit-fda) (0.8.9)
Collecting scikit-datasets[cran]>=0.1.24
  Using cached scikit_datasets-0.1.38-py3-none-any.whl (30 kB)
Collecting dcor
  Using cached dcor-0.5.3-py2.py3-none-any.whl (35 kB)
Requirement already satisfied: multimethod>=1.5 in c:\users\prof-\anaconda3\lib\site-packages (from scikit-fda) (1.6)
Collecting fdasrsf>=2.2.0
  Using cached fdasrsf-2.3.3-cp38-cp38-win_amd64.whl (422 kB)
Requirement already satisfied: cython in c:\users\prof-\anaconda3\lib\site-packages (from scikit-fda) (0.29.21)
Requirement already satisfied: typing-extensions in c:\users\prof-\anaconda3\lib\site-packages (from scikit-fda) (3.7.4.2)
Requirement already satisfied: matplotlib in c:\users\prof-\anaconda3\lib\site-packages (from scikit-fda) (3.3.4)
Requirement already satisfied: scipy>=1.3.0 in c:\users\prof-\anaconda3\lib\site-packages (from scikit-fda) (1.5.0)
Requirement already satisfied: rdata in c:\users\prof-\anaconda3\lib\site-packages (from scikit-fda) (0.5)
Requirement already satisfied: pandas in c:\users\prof-\anaconda3\lib\site-packages (from scikit-fda) (1.0.5)
Requirement already satisfied: scikit-learn>=0.20 in c:\users\prof-\anaconda3\lib\site-packages (from scikit-fda) (0.23.1)
Requirement already satisfied: sympy in c:\users\prof-\anaconda3\lib\site-packages (from findiff->scikit-fda) (1.6.1)
Collecting numba>=0.51
  Using cached numba-0.54.1-cp38-cp38-win_amd64.whl (2.3 MB)
Requirement already satisfied: tqdm in c:\users\prof-\anaconda3\lib\site-packages (from fdasrsf>=2.2.0->scikit-fda) (4.47.0)
Collecting GPy
  Using cached GPy-1.10.0-cp38-cp38-win_amd64.whl (1.4 MB)
Requirement already satisfied: six in c:\users\prof-\anaconda3\lib\site-packages (from fdasrsf>=2.2.0->scikit-fda) (1.15.0)
Requirement already satisfied: pyparsing in c:\users\prof-\anaconda3\lib\site-packages (from fdasrsf>=2.2.0->scikit-fda) (2.4.7)
Requirement already satisfied: patsy in c:\users\prof-\anaconda3\lib\site-packages (from fdasrsf>=2.2.0->scikit-fda) (0.5.1)
Requirement already satisfied: cffi>=1.0.0 in c:\users\prof-\anaconda3\lib\site-packages (from fdasrsf>=2.2.0->scikit-fda) (1.14.0)
Requirement already satisfied: joblib in c:\users\prof-\anaconda3\lib\site-packages (from fdasrsf>=2.2.0->scikit-fda) (0.16.0)
Requirement already satisfied: python-dateutil>=2.1 in c:\users\prof-\anaconda3\lib\site-packages (from matplotlib->scikit-fda) (2.8.1)
Requirement already satisfied: pillow>=6.2.0 in c:\users\prof-\anaconda3\lib\site-packages (from matplotlib->scikit-fda) (7.2.0)
Requirement already satisfied: kiwisolver>=1.0.1 in c:\users\prof-\anaconda3\lib\site-packages (from matplotlib->scikit-fda) (1.2.0)
Requirement already satisfied: cycler>=0.10 in c:\users\prof-\anaconda3\lib\site-packages (from matplotlib->scikit-fda) (0.10.0)
Requirement already satisfied: xarray in c:\users\prof-\anaconda3\lib\site-packages (from rdata->scikit-fda) (0.19.0)
Requirement already satisfied: pytz>=2017.2 in c:\users\prof-\anaconda3\lib\site-packages (from pandas->scikit-fda) (2020.1)
Requirement already satisfied: threadpoolctl>=2.0.0 in c:\users\prof-\anaconda3\lib\site-packages (from scikit-learn>=0.20->scikit-fda) (2.1.0)
Requirement already satisfied: mpmath>=0.19 in c:\users\prof-\anaconda3\lib\site-packages (from sympy->findiff->scikit-fda) (1.1.0)
Requirement already satisfied: setuptools in c:\users\prof-\anaconda3\lib\site-packages (from numba>=0.51->dcor->scikit-fda) (49.2.0.post20200714)
Collecting llvmlite<0.38,>=0.37.0rc1
  Using cached llvmlite-0.37.0-cp38-cp38-win_amd64.whl (17.0 MB)
Processing c:\users\prof-\appdata\local\pip\cache\wheels\66\78\6c\d98cb437834de5e29381786b4ba8a77ea68cca74653ab62713\paramz-0.9.5-py3-none-any.whl
Requirement already satisfied: pycparser in c:\users\prof-\anaconda3\lib\site-packages (from cffi>=1.0.0->fdasrsf>=2.2.0->scikit-fda) (2.20)
Requirement already satisfied: decorator>=4.0.10 in c:\users\prof-\anaconda3\lib\site-packages (from paramz>=0.9.0->GPy->fdasrsf>=2.2.0->scikit-fda) (4.4.2)
Installing collected packages: scikit-datasets, llvmlite, numba, dcor, paramz, GPy, fdasrsf, scikit-fda
  Attempting uninstall: llvmlite
    Found existing installation: llvmlite 0.33.0+1.g022ab0f

ERROR: Cannot uninstall 'llvmlite'. It is a distutils installed project and thus we cannot accurately determine which files belong to it which would lead to only a partial uninstall.

# 第6章のプログラムは、事前に下記が実行されていることを仮定する。
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import style
from sklearn.decomposition import PCA
import skfda

6.1 回帰¶

def m(x):
    return 0

def k(x, y):
    return np.exp(-(x-y)**2/2)

def gp_sample(x, m, k):
    n = len(x)
    m_x = m(x)
    k_xx = np.zeros((n, n))
    for i in range(n):
        for j in range(n):
            k_xx[i, j] = k(x[i], x[j])
    R = np.linalg.cholesky(k_xx) #lower triangular matrix
    u = np.random.randn(n)
    return R.dot(u) + m_x

x = np.arange(-2, 3, 1)
n = len(x)
r = 100
z = np.zeros((r, n))
for i in range(r):
    z[i, :] = gp_sample(x, m, k)
k_xx = np.zeros((n, n))
for i in range(n):
    for j in range(n):
        k_xx[i, j] = k(x[i], x[j])

print("cov(z):\n",np.cov(z),"\n")
print("k_xx:\n",k_xx)

cov(z):
 [[ 2.37424382e-01  1.09924256e-03  4.32681142e-01 ... -1.92328002e-01
   1.46703390e-03 -7.31927129e-01]
 [ 1.09924256e-03  6.17989727e-02 -8.41300879e-02 ...  1.37955079e-01
   1.22155905e-01  3.53607212e-02]
 [ 4.32681142e-01 -8.41300879e-02  1.51374514e+00 ... -6.82936090e-01
  -8.44371416e-02 -1.82595208e+00]
 ...
 [-1.92328002e-01  1.37955079e-01 -6.82936090e-01 ...  1.16493686e+00
   4.07945653e-01  7.10299634e-01]
 [ 1.46703390e-03  1.22155905e-01 -8.44371416e-02 ...  4.07945653e-01
   2.88425891e-01 -4.34847631e-03]
 [-7.31927129e-01  3.53607212e-02 -1.82595208e+00 ...  7.10299634e-01
  -4.34847631e-03  2.60520510e+00]] 

k_xx:
 [[1.00000000e+00 6.06530660e-01 1.35335283e-01 1.11089965e-02
  3.35462628e-04]
 [6.06530660e-01 1.00000000e+00 6.06530660e-01 1.35335283e-01
  1.11089965e-02]
 [1.35335283e-01 6.06530660e-01 1.00000000e+00 6.06530660e-01
  1.35335283e-01]
 [1.11089965e-02 1.35335283e-01 6.06530660e-01 1.00000000e+00
  6.06530660e-01]
 [3.35462628e-04 1.11089965e-02 1.35335283e-01 6.06530660e-01
  1.00000000e+00]]

def m(x):
    return 0

def k(x, y):
    return np.exp(-np.sum((x-y)**2)/2)

def gp_sample(x, m, k):
    n = x.shape[0]
    m_x = m(x)
    k_xx = np.zeros((n, n))
    for i in range(n):
        for j in range(n):
            k_xx[i, j] = k(x[i], x[j])
    R = np.linalg.cholesky(k_xx) #lower triangular matrix
    u = np.random.randn(n)
    return R.dot(u) + m_x

n = 5
r = 100
z = np.zeros((r, n))
for i in range(r):
    z[i, :] = gp_sample(x, m, k)
k_xx = np.zeros((n, n))
for i in range(n):
    for j in range(n):
        k_xx[i, j] = k(x[i], x[j])

print("cov(z):\n",np.cov(z),"\n")
print("k_xx:\n",k_xx)

cov(z):
 [[ 1.52140938  0.2585143   0.67840535 ... -0.57075902 -0.53404263
   0.20940014]
 [ 0.2585143   0.47450778  0.36954164 ... -0.30224707 -0.47093046
  -0.03341687]
 [ 0.67840535  0.36954164  0.77124364 ... -0.34846439 -0.40226404
   0.37837525]
 ...
 [-0.57075902 -0.30224707 -0.34846439 ...  0.42199909  0.46715201
   0.04255153]
 [-0.53404263 -0.47093046 -0.40226404 ...  0.46715201  0.59461419
   0.09420804]
 [ 0.20940014 -0.03341687  0.37837525 ...  0.04255153  0.09420804
   0.40676413]] 

k_xx:
 [[1.00000000e+00 6.06530660e-01 1.35335283e-01 1.11089965e-02
  3.35462628e-04]
 [6.06530660e-01 1.00000000e+00 6.06530660e-01 1.35335283e-01
  1.11089965e-02]
 [1.35335283e-01 6.06530660e-01 1.00000000e+00 6.06530660e-01
  1.35335283e-01]
 [1.11089965e-02 1.35335283e-01 6.06530660e-01 1.00000000e+00
  6.06530660e-01]
 [3.35462628e-04 1.11089965e-02 1.35335283e-01 6.06530660e-01
  1.00000000e+00]]

def gp_1(x_pred):
    h = np.zeros(n)
    for i in range(n):
        h[i] = k(x_pred, x[i])
    R = np.linalg.inv(K + sigma_2*np.identity(n))
    mm = mu(x_pred) + np.dot(np.dot(h.T, R), (y-mu(x)))
    ss = k(x_pred, x_pred) - np.dot(np.dot(h.T, R), h)
    return {"mm":mm, "ss":ss}

def gp_2(x_pred):
    h = np.zeros(n)
    for i in range(n):
        h[i] = k(x_pred, x[i])
    L = np.linalg.cholesky(K + sigma_2*np.identity(n))
    alpha = np.linalg.solve(L, np.linalg.solve(L.T, (y - mu(x))))
    mm = mu(x_pred) + np.sum(np.dot(h.T, alpha))
    gamma = np.linalg.solve(L.T, h)
    ss = k(x_pred, x_pred) - np.sum(gamma**2)
    return {"mm":mm, "ss":ss}

sigma_2 = 0.2

def k(x, y):
    return np.exp(-(x - y)**2 / 2 / sigma_2)

def mu(x):
    return x

n = 100
x = np.random.uniform(size = n) * 6 - 3
y = np.sin(x / 2) + np.random.randn(n)
K = np.zeros((n, n))

for i in range(n):
    for j in range(n):
        K[i, j] = k(x[i], x[j])
# count
import time

start1 = time.time()
gp_1(0)
end1 = time.time()
print("time1 =", end1-start1)

start2 = time.time()
gp_2(0)
end2 = time.time()
print("time2 = ", end2-start2)

u_seq = np.arange(-3, 3.1, 0.1)
v_seq = []; w_seq = []
for u in u_seq:
    res = gp_1(u)
    v_seq.append(res["mm"])
    w_seq.append(res["ss"])

plt.figure()
plt.xlim(-3, 3)
plt.ylim(-3, 3)
plt.scatter(x, y, facecolors='none', edgecolors = "k", marker = "o")
plt.plot(u_seq, v_seq)
plt.plot(u_seq, np.sum([v_seq, [i * 3 for i in w_seq]], axis = 0), c = "b")
plt.plot(u_seq, np.sum([v_seq, [i * (-3) for i in w_seq]], axis = 0), c = "b")
plt.show()

n = 100
plt.figure()
plt.xlim(-3, 3)
plt.ylim(-3, 3)

color = ["r", "g", "b", "k", "m"]

for h in range(5):
    x = np.random.uniform(size = n) * 6 - 3
    y = np.sin(np.pi * x / 2) + np.random.randn(n)
    sigma_2 = 0.2
    K = np.zeros((n, n))
    for i in range(n):
        for j in range(n):
            K[i, j] = k(x[i], x[j])
    u_seq = np.arange(-3, 3.1, 0.1)
    v_seq = []
    for u in u_seq:
        res = gp_1(u)
        v_seq.append(res["mm"])
    plt.plot(u_seq, v_seq, c = color[h])

time1 = 0.009966373443603516
time2 =  0.057814598083496094

6.2 分類¶

from sklearn.datasets import load_iris
df = load_iris()
x = df.data[0:100, 0:4]
y = np.array([1]*50 + [-1]*50)
n = len(y)
def k(x, y):
    return np.exp(np.sum(-(x - y)**2) / 2)

K = np.zeros((n, n))
for i in range(n):
    for j in range(n):
        K[i, j] = k(x[i, :], x[j, :])
eps = 0.00001
f = [0] * n
g = [0.1] * n

while np.sum((np.array(f)-np.array(g))**2) > eps:
    i = i + 1
    g = f
    f = np.array(f)
    y = np.array(y)
    v = np.exp(-y*f)
    u = y * v / (1 + v)
    w = (v / (1 + v)**2)
    W = np.diag(w)
    W_p = np.diag(w**0.5)
    W_m = np.diag(w**(-0.5))
    L = np.linalg.cholesky(np.identity(n)+np.dot(np.dot(W_p, K), W_p))
    gamma = W.dot(f) + u
    beta = np.linalg.solve(L, np.dot(np.dot(W_p, K), gamma))
    alpha = np.linalg.solve(np.dot(L.T, W_m), beta)
    f = np.dot(K, (gamma-alpha))

print(list(f))

[2.9017597728506903, 2.6661877410648125, 2.735999714975541, 2.5962146616446793, 2.8888259653434902, 2.4229904289075734, 2.7128370653298717, 2.8965829899125057, 2.263839959450692, 2.722794155018708, 2.6757868220665557, 2.80427691289934, 2.629916582197861, 2.129058875598969, 1.9947371858903622, 1.7255773341842824, 2.502403800007298, 2.894767948521167, 2.211715451090947, 2.7578887454424845, 2.5807025000167654, 2.7884335002993703, 2.4501472162360978, 2.598252566107158, 2.49363291477457, 2.5892721299617927, 2.7995603132602014, 2.854337885531593, 2.8580336326051525, 2.682198311711416, 2.6631803480277263, 2.6529515170091735, 2.409809417765029, 2.1570288906747956, 2.738196179682446, 2.777507355522734, 2.6054932709605585, 2.8486244905053546, 2.342636360704147, 2.8826825981318938, 2.887406385036485, 1.561916989035174, 2.4541693614670925, 2.64939855108404, 2.4071165717812315, 2.633906076076528, 2.7271240196093944, 2.6732162909902857, 2.749570997237667, 2.884288422112919, -1.870441763888594, -2.5373817133813237, -1.9327372577182746, -2.5318858849974895, -2.579366785986732, -2.7850146869568233, -2.381782737110541, -1.4675208896283152, -2.48651854962823, -2.356973904782517, -1.6007721537062138, -2.8113237365649777, -2.3813705315823492, -2.734406212419866, -2.3307697118699044, -2.3416642385980246, -2.6148174861101388, -2.748088114594368, -2.3729716167430452, -2.6288003128782993, -2.2519078422840106, -2.7892659188354783, -2.3456928166252453, -2.7042381425156226, -2.712488583609864, -2.502955837329986, -2.1624797840208307, -2.01571028937641, -2.8402609758000015, -2.19474914254942, -2.474090451450695, -2.3426425865837377, -2.755051287500358, -2.1890836679248933, -2.415174453933202, -2.3822860688193157, -2.275106022961632, -2.4967546817420585, -2.6958800533399927, -2.6643571619340682, -2.64863153643803, -2.7718459450746087, -2.8075095809378467, -1.5468264491591686, -2.81472767183598, -2.756340788852421, -2.8346439145882236, -2.8498009687645762, -1.1822836980388691, -2.8458627499007214]

def pred(z):
    kk = np.zeros(n)
    for i in range(n):
        kk[i] = k(z, x[i, :])
    mu = np.sum(kk * u)
    alpha = np.linalg.solve(L, np.dot(W_p, kk))
    sigma2 = k(z, z) - np.sum(alpha**2)
    m = 1000
    b = np.random.normal(mu, sigma2, size = m)
    pi = np.sum((1 + np.exp(-b))**(-1)) / m
    return pi

z = np.zeros(4)
for j in range(4):
    z[j] = np.mean(x[:50, j])
pred(z)

0.9466371383148896

for j in range(4):
    z[j] = np.mean(x[50:100, j])
pred(z)

0.05301765489687672

6.3 補助変数法¶

sigma_2 = 0.05

def k(x, y):
    return np.exp(-(x - y)**2 / 2 / sigma_2)

def mu(x):
    return x

n = 200
x = np.random.uniform(size = n) * 6 - 3
y = np.sin(x / 2) + np.random.randn(n)
eps = 10**(-6)

m = 100
K = np.zeros((n, n))
for i in range(n):
    for j in range(n):
        K[i, j] = k(x[i], x[j])
index = np.random.choice(n, size = m, replace = False)
z = x[index]
m_x = 0
m_z = 0
K_zz = np.zeros((m, m))
for i in range(m):
    for j in range(m):
        K_zz[i, j] = k(z[i], z[j])

K_xz = np.zeros((n, m))
for i in range(n):
    for j in range(m):
        K_xz[i, j] = k(x[i], z[j])
K_zz_inv = np.linalg.inv(K_zz + np.diag([10**eps]*m))
lam = np.zeros(n)
for i in range(n):
    lam[i] = k(x[i], x[i]) - np.dot(np.dot(K_xz[i, 0:m], K_zz_inv), K_xz[i, 0:m])
lam_0_inv = np.diag(1/(lam+sigma_2))
Q = K_zz + np.dot(np.dot(K_xz.T, lam_0_inv), K_xz)
Q_inv = np.linalg.inv(Q + np.diag([eps] * m))
muu = np.dot(np.dot(np.dot(Q_inv, K_xz.T), lam_0_inv), y-m_x)
dif = K_zz_inv - Q_inv
R = np.linalg.inv(K + sigma_2 * np.identity(n))

def gp_ind(x_pred):
    h = np.zeros(m)
    for i in range(m):
        h[i] = k(x_pred, z[i])
    mm = mu(x_pred) + h.dot(muu)
    ss = k(x_pred, x_pred) - h.dot(dif).dot(h)
    return {"mm": mm, "ss": ss}

def gp_1(x_pred):
    h = np.zeros(n)
    for i in range(n):
        h[i] = k(x_pred, x[i])
    mm = mu(x_pred) + np.dot(np.dot(h.T, R), y-mu(x))
    ss = k(x_pred, x_pred) - np.dot(np.dot(h.T, R), h)
    return {"mm": mm, "ss": ss}

x_seq = np.arange(-2, 2.1, 0.1)
mmv = []; ssv = []
for u in x_seq:
    mmv.append(gp_ind(u)["mm"])
    ssv.append(gp_ind(u)["ss"])

plt.figure()
plt.plot(x_seq, mmv, c = "r")
plt.plot(x_seq, np.array(mmv) + 3 * np.sqrt(np.array(ssv)),
         c = "r", linestyle = "--")
plt.plot(x_seq, np.array(mmv) - 3 * np.sqrt(np.array(ssv)),
         c = "r", linestyle = "--")
plt.xlim(-2, 2)
plt.plot(np.min(mmv), np.max(mmv))

x_seq = np.arange(-2, 2.1, 0.1)
mmv = []; ssv =[]
for u in x_seq:
    mmv.append(gp_1(u)["mm"])
    ssv.append(gp_1(u)["ss"])

mmv = np.array(mmv)
ssv = np.array(ssv)

plt.plot(x_seq, mmv, c = "b")
plt.plot(x_seq, mmv + 3 * np.sqrt(ssv), c = "b", linestyle = "--")
plt.plot(x_seq, mmv - 3 * np.sqrt(ssv), c = "b", linestyle = "--")
plt.scatter(x, y, facecolors='none', edgecolors = "k", marker = "o")

<matplotlib.collections.PathCollection at 0x1fb1ac07190>

6.4 Karhunen-Loeve 展開¶

# Eigenvalue
def lam(j):
    return 4 / ((2 * j - 1) * np.pi)**2
# EigenFunction
def ee(j, x):
    return np.sqrt(2) * np.sin((2 * j - 1) * np.pi / 2 * x)

n = 10; m = 7

# Definition of Gaussian Process
def f(z, x):
    n = len(z)
    S = 0
    for i in range(n):
        S = S + z[i] * ee(i, x) * np.sqrt(lam(i))
    return S

plt.figure()
plt.xlim(0, 1)
plt.xlabel("x")
plt.ylabel("f(omega, x)")
colormap = plt.cm.gist_ncar #nipy_spectral, Set1,Paired
colors = [colormap(i) for i in np.linspace(0, 0.8, m)]

for j in range(m):
    z = np.random.randn(n)
    x_seq = np.arange(0, 3.001, 0.001)
    y_seq = []
    for x in x_seq:
        y_seq.append(f(z, x))
    plt.plot(x_seq, y_seq, c = colors[j])

plt.title("Brown Motion")

Text(0.5, 1.0, 'Brown Motion')

from scipy.special import gamma

def matern(nu, l, r):
    p = nu - 1 / 2
    S = 0
    for i in range(int(p+1)):
        S = S + gamma(p + i + 1) / gamma(i + 1) / gamma(p - i + 1)\
        * (np.sqrt(8 * nu) * r / l)**(p - i)
    S = S * gamma(p + 2) / gamma(2 * p + 1) * np.exp(-np.sqrt(2 * nu) * r / l)
    return S

m = 10
l = 0.1
colormap = plt.cm.gist_ncar #nipy_spectral, Set1,Paired
color = [colormap(i) for i in np.linspace(0, 1, len(range(m)))]

x = np.linspace(0, 0.5, 200)
plt.plot(x, matern(1 - 1/2, l, x), c = color[0], label = r"$\nu=%d$"%1)
plt.ylim(0, 10)
for i in range(2, m + 1):
    plt.plot(x, matern(i - 1/2, l, x), c = color[i - 1], label = r"$\nu=%d$"%i)

plt.legend(loc = "upper right", frameon = True, prop={'size':14})

<matplotlib.legend.Legend at 0x1fb1acc6ee0>

colormap = plt.cm.gist_ncar #nipy_spectral, Set1,Paired
colors = [colormap(i) for i in np.linspace(0, 0.8, 5)]

def rand_100(Sigma):
    L = np.linalg.cholesky(Sigma)
    u = np.random.randn(100)
    y = L.dot(u)
    return y

x = np.linspace(0, 1, 100)
z = np.abs(np.subtract.outer(x,x))
l = 0.1

Sigma_OU = np.exp(-z / l)
y = rand_100(Sigma_OU)

plt.figure()
plt.plot(x, y)
plt.ylim(-3,3)
for i in range(5):
    y = rand_100(Sigma_OU)
    plt.plot(x, y, c = colors[i])
plt.title("OU process (nu = 1/2, l = 0.1)")

Sigma_M = matern(3/2, l, z)
y = rand_100(Sigma_M)
plt.figure()
plt.plot(x, y)
plt.ylim(-3, 3)
for i in range(5):
    y = rand_100(Sigma_M)
    plt.plot(x, y, c = colors[i])
plt.title("Matern process (nu = 3/2, l = 0.1)")

Text(0.5, 1.0, 'Matern process (nu = 3/2, l = 0.1)')

6.5 関数データ解析¶

X, y = skfda.datasets.fetch_weather(return_X_y=True, as_frame=True)
df = X.iloc[:, 0].values
def g(j, x):
    if j == 0:
        return 1 / np.sqrt(2 * np.pi)
    if j % 1 == 0:
        return np.cos((j // 2) * x) / np.sqrt(np.pi)
    else:
        return np.sin((j // 2) * x) / np.sqrt(np.pi)

def beta(x, y):
    X = np.zeros((N, p))
    for i in range(N):
        for j in range(p):
            X[i, j] = g(j, x[i])
    beta = np.dot(np.dot(np.linalg.inv(np.dot(X.T, X)
                        + 0.0001*np.identity(p)), X.T), y)
    return np.squeeze(beta)

N = 365; n = 35; m = 5; p = 100; df = df.coordinates[0].data_matrix
C = np.zeros((n, p))
for i in range(n):
    x = np.arange(1, N+1) * (2 * np.pi / N) - np.pi
    y = df[i]
    C[i, :] = beta(x, y)

pca = PCA()
pca.fit(C)
B = pca.components_.T
xx = C.dot(B)

def z(i, m, x):
    S = 0
    for j in range(p):
        for k in range(m):
            for r in range(p):
                S = S + C[i, j] * B[j, k] * B[r, k] * g(r, x)
    return S

x_seq = np.arange(-np.pi, np.pi, 2 * np.pi / 100)
plt.figure()
plt.xlim(-np.pi, np.pi)
# plt.ylim(-15, 25)
plt.xlabel("Days")
plt.ylabel("Temp(C)")
plt.title("Reconstruction for each m")
plt.plot(x, df[13], label = "Original")
for m in range(2, 7):
    plt.plot(x_seq, z(13, m, x_seq), c = color[m], label = "m=%d"%m)
plt.legend(loc = "lower center", ncol = 2)

<matplotlib.legend.Legend at 0x18eada2e520>

lam = pca.explained_variance_
ratio = lam / sum(lam) # Or use pca.explained_variance_ratio_
plt.plot(range(1, 6), ratio[:5])
plt.xlabel("PC1 through PC5")
plt.ylabel("Ratio")
plt.title("Ratio")

Text(0.5, 1.0, 'Ratio')

def h(coef, x):
    S = 0
    for j in range(p):
        S = S + coef[j] * g(j, x)
    return S
print(B)
plt.figure()
plt.xlim(-np.pi, np.pi)
plt.ylim(-1, 1)
for j in range(3):
    plt.plot(x_seq, h(B[:, j], x_seq), c = colors[j], label = "PC%d"%(j+1))
plt.legend(loc = "best")

[[-5.17047156e-01 -2.43880782e-01  7.48988279e-02 ...  5.48412387e-04
  -1.22748578e-03  8.07866592e-01]
 [-7.31215100e-01 -3.44899509e-01  1.05922938e-01 ...  7.75572240e-04
  -1.73592707e-03 -5.71437836e-01]
 [ 3.13430279e-01 -6.12932605e-01  1.50738649e-01 ... -6.99018707e-03
   1.19586600e-03 -5.41866413e-02]
 ...
 [ 3.08129173e-05 -2.83373696e-03  8.28893867e-03 ...  1.35931675e-01
  -2.34867483e-01  1.23616273e-03]
 [ 1.47021532e-03  3.25749669e-03 -4.83933350e-03 ... -1.32596334e-01
   1.60270831e-01  8.94103792e-03]
 [ 1.47021531e-03  3.25749669e-03 -4.83933350e-03 ... -1.32596334e-01
   1.60270831e-01 -1.17997796e-02]]

<matplotlib.legend.Legend at 0x18eae2fa9d0>

place = X.iloc[:, 1]
index = [9, 11, 12, 13, 16, 23, 25, 26]
others = [x for x in range(34) if x not in index]
first = [place[i][0] for i in index]
print(first)
plt.figure()
plt.xlim(-15, 25)
plt.ylim(-25, -5)
plt.xlabel("PC1")
plt.ylabel("PC2")
plt.title("Canadian Weather")
plt.scatter(xx[others, 0], xx[others, 1], marker = "x", c = "k")
for i in range(8):
    l = plt.text(xx[index[i], 0], xx[index[i], 1],
             s = first[i], c = color[i])

['Q', 'M', 'O', 'T', 'W', 'C', 'V', 'V']

問題¶

def gp_2(x_pred):
    h = np.zeros(n)
    for i in range(n):
        h[i] = k(x_pred, x[i])
    L = np.linalg.cholesky(K + sigma_2*np.identity(n))
    alpha = np.linalg.solve(L, np.linalg.solve(L.T, (y - mu(x))))
    mm = mu(x_pred) + np.sum(np.dot(h.T, alpha))
    gamma = np.linalg.solve(L.T, h)
    ss = k(x_pred, x_pred) - np.sum(gamma**2)
    return {"mm":mm, "ss":ss}