第9章 教師なし学習(問題88~100)
88
k.means <- function(X, K, iteration = 20) {
n <- nrow(X)
p <- ncol(X)
center <- array(dim = c(K, p))
y <- sample(1:K, n, replace = TRUE) # ベクトルyにクラスタが格納
for (h in 1:iteration) {
for (k in 1:K) {
if (sum(y[] == k) == 0) {
center[k, ] <- Inf
} else {
for (j in 1:p)
center[k, j] <- ## 空欄(1) ##
}
}
for (i in 1:n) {
S.min <- Inf
for (k in 1:K) {
S <- sum((X[i, ] - center[k, ]) ^ 2)
if (S < S.min) {
S.min <- S
## 空欄(2) ##
}
}
}
}
return(list(clusters = y, scores = scores))
}
# データ生成
p <- 2
n <- 1000
X <- matrix(rnorm(p * n), nrow = n, ncol = p)
# 各サンプルのクラスタを得る
y <- k.means(X, 5)$clusters
# クラスタごとに色を変えて,点を描く
plot(-3:3, -3:3, xlab = "第1成分", ylab = "第2成分", type = "n")
points(X[, 1], X[, 2], col = y + 1)
92
hc <- function(X, dd = "complete") {
n <- nrow(X)
index <- list()
for (i in 1:n)
index[[i]] <- list(i)
cluster <- list()
for (k in n:2) {
dist.min <- Inf
for (i in 1:(k - 1)) {
for (j in (i + 1):k) {
i.0 <- unlist(index[[i]])
j.0 <- unlist(index[[j]])
d <- switch(dist,
"complete" = dist.complete(X[i.0, ], X[j.0, ]),
"single" = dist.single(X[i.0, ], X[j.0, ]),
"centroid" = dist.centroid(X[i.0, ], X[j.0, ]),
"average" = dist.average(X[i.0, ], X[j.0, ]))
if (d < dist.min) {
## 空欄(1) ##
i.1 <- i
j.1 <- j
}
}
}
index[[i.1]] <- append(index[[i.1]], index[[j.1]])
if (j.1 < k)
for (h in (j.1 + 1):k)
index[[h - 1]] <- ## 空欄(2) ##
index[[k]] <- NULL
cluster[[k-1]] <- index
}
return(cluster)
}
# データ生成
n <- 200
p <- 2
X <- matrix(rnorm(n * p), ncol = p, nrow = n)
cluster <- hc(X)
par(mfrow = c(2, 2))
for (K in c(3, 5, 7, 9)) {
grp <- cluster[[K]]
plot(-3:3, -3:3, xlab = "第1成分", ylab = "第2成分", type = "n",
main = paste("K = ", K))
for (k in 1:K) {
z <- unlist(grp[[k]])
x <- X[z, 1]
y <- X[z, 2]
points(x, y, col = k + 1)
}
}
95
n <- 100
p <- 5
x <- matrix(rnorm(n * p), ncol = p, nrow = n)
pca(x)$lambdas
pca(x)$vectors
pca(x)$centers
prcomp(x)$rotation
(prcomp(x)$sdev) ^ 2
prcomp(x)$center
names(prcomp(x))
96
#データ生成
n <- 100
a <- 0.7
b <- sqrt(1 - a ^ 2)
u <- rnorm(n)
v <- rnorm(n)
x <- u
y <- u * a + v * b
plot(x, y)
plot(x, y, xlim = c(-4, 4), ylim = c(-4, 4))
T <- prcomp(cbind(x, y))$rotation
T[2, 1] / T[1, 1] * T[2, 2] / T[1, 2]
abline(## 空欄(1) ##, ## 空欄(2) ##, col = "red")
abline(## 空欄(3) ##, ## 空欄(4) ##, col = "blue")
98
library(ISLR)
pr.out <- prcomp(USArrests, scale = TRUE)
99
pr.var <- ## 空欄(1) ##
pve <- ## 空欄(2) ##
par(mfrow = c(1, 2))
plot(pve, xlab = "主成分", ylab = "寄与率", ylim = c(0, 1) , type = "b")
plot(cumsum(pve), xlab = "主成分", ylab = "累積の寄与率",
ylim = c(0, 1), type = "b")
100
pca.regression <- function(X, y, m) {
pr <- prcomp(X)
Z <- pr$x[, 1:m]
phi <- pr$rotation[, 1:m]
theta <- ## 空欄 ##
return(list(theta = theta, beta = beta))
}
# データ生成
n <- 100
p <- 5
X <- matrix(rnorm(n * p), nrow = n, ncol = p)
for (j in 1:p)
X[, j] <- X[, j] - mean(X[, j])
y <- X[, 1] + X[, 2] + X[, 3] + X[, 4] + X[, 5] + rnorm(n)
y <- y - mean(y)
# 実行
pca.regression(X, y, 3)
pca.regression(X, y, 5)$beta
solve(t(X) %*% X) %*% t(X) %*% y