第9章 教師なし学習

9.1 \(K\)-meansクラスタリング

k.means <- function(X, K, iteration = 20) {
  n <- nrow(X)
  p <- ncol(X)
  center <- array(dim = c(K, p))
  y <- sample(1:K, n, replace = TRUE)
  scores <- NULL            #
  for (h in 1:iteration) {
    for (k in 1:K) {
      if (sum(y[] == k) == 0) {
        # sum(y[] == k)でy[i] = kなるiの個数を意味する。
        # サンプルを含まないクラスタは消滅
        center[k, ] <- Inf
      } else {
        for (j in 1:p)
          center[k, j] <- mean(X[y[] == k, j])
      }
    }
    S.total <- 0                        #
    for (i in 1:n) {
      S.min <- Inf
      for (k in 1:K) {
        S <- sum((X[i, ] - center[k, ]) ^ 2)
        if (S < S.min) {
          S.min <- S
          y[i] <- k
        }
      }
      S.total <- S.total + S.min        #
    }
    scores <- c(scores, S.total)        #
  }
  return(list(clusters = y, scores = scores))
}

例80

# データ生成
p <- 2
n <- 1000
X <- matrix(rnorm(p * n), nrow = n, ncol = p)

# 各サンプルのクラスタを得る
y <- k.means(X, 5)$clusters

# クラスタごとに色を変えて,点を描く
plot(-3:3, -3:3, xlab = "第1成分", ylab = "第2成分", type = "n")
points(X[, 1], X[, 2], col = y + 1)

例82

# データ生成
p <- 2
n <- 1000
X <- matrix(rnorm(p * n), nrow = n, ncol = p)

input <- 1:20
output <- k.means(X, 5)$scores
plot(input, log(output), ylim = c(6.2, 7.5),
     xlab = "繰り返し回数", ylab = "log(スコア)",
     type = "l", col = 1, main = "初期値ごとに,スコアの変化をみる")
for (r in 2:10) {
  output <- k.means(X, 5)$scores
  lines(input, log(output), col = r)}

9.2 階層的クラスタリング

dist.complete <- function(x, y) {
  x <- as.matrix(x)
  y <- as.matrix(y)
  r <- nrow(x)
  s <- nrow(y)
  dist.max <- 0
  for (i in 1:r) {
    for (j in 1:s) {
      d <- norm(x[i, ] - y[j, ], "2")
      if (d > dist.max)
        dist.max <- d
    }
  }
  return(dist.max)
}

dist.single <- function(x, y) {
  x <- as.matrix(x)
  y <- as.matrix(y)
  r <- nrow(x)
  s <- nrow(y)
  dist.min <- Inf
  for (i in 1:r) {
    for (j in 1:s) {
      d <- norm(x[i, ] - y[j, ], "2")
      if (d < dist.min)
        dist.min <- d
    }
  }
  return(dist.min)
}

dist.centroid <- function(x, y) {
  x <- as.matrix(x)
  y <- as.matrix(y)
  r <- nrow(x)
  s <- nrow(y)
  x.bar <- 0
  for (i in 1:r)
    x.bar <- x.bar + x[i, ]
  x.bar <- x.bar/r
  y.bar <- 0
  for (i in 1:s)
    y.bar <- y.bar + y[i, ]
  y.bar <- y.bar / s
  return(norm(x.bar - y.bar, "2") ^ 2)
}

dist.average <- function(x, y) {
  x <- as.matrix(x)
  y <- as.matrix(y)
  r <- nrow(x)
  s <- nrow(y)
  S <- 0
  for (i in 1:r) {
    for (j in 1:s) {
      d <- norm(x[i, ] - y[j, ], "2")
      S <- S + d
    }
  }
  return(S / r / s)
}
hc <- function(X, dd = "complete") {
  n <- nrow(X)
  index <- list()
  for (i in 1:n)
    index[[i]] <- list(i)
  cluster <- list()
  for (k in n:2) {
    dist.min <- Inf
    for (i in 1:(k - 1)) {
      for (j in (i + 1):k) {
        i.0 <- unlist(index[[i]])
        j.0 <- unlist(index[[j]])
        d <- switch(dd,
                    "complete" = dist.complete(X[i.0, ], X[j.0, ]),
                    "single" = dist.single(X[i.0, ], X[j.0, ]),
                    "centroid" = dist.centroid(X[i.0, ], X[j.0, ]),
                    "average" = dist.average(X[i.0, ], X[j.0, ]))
        if (d < dist.min) {
          dist.min <- d
          i.1 <- i
          j.1 <- j
        }
      }
    }
    index[[i.1]] <- append(index[[i.1]], index[[j.1]])
    if (j.1 < k)
      for (h in (j.1 + 1):k)
        index[[h - 1]] <- index[[h]]
    index[[k]]=NULL
    cluster[[k-1]]=index
  }
  return(cluster)
}

例83

# データ生成
n <- 200
p <- 2
X <- matrix(rnorm(n * p), ncol = p, nrow = n)

cluster <- hc(X)
par(mfrow = c(2, 2))
for (K in c(3, 5, 7, 9)) {
  grp <- cluster[[K]]
  plot(-3:3, -3:3, xlab = "第1成分", ylab = "第2成分",
       type = "n", main = paste("K = ", K))
  for (k in 1:K) {
    z <- unlist(grp[[k]])
    x <- X[z, 1]
    y <- X[z, 2]
    points(x, y, col = k + 1)
  }
}

# データ生成
n <- 100
p <- 2
K <- 7
X <- matrix(rnorm(n * p), ncol = p, nrow = n)

for (d in c("complete", "single", "centroid", "average")) {
  cluster <- hc(X, dd = d)
  grp <- cluster[[K]]
  plot(-3:3, -3:3, xlab = "第1成分", ylab = "第2成分", type = "n", main = d)
  for (k in 1:K) {
    z <- unlist(grp[[k]])
    x <- X[z, 1]
    y <- X[z, 2]
    points(x, y, col = k + 1)
  }
}

par(mfrow = c(1, 1))

例85

n <- 100
x <- matrix(rnorm(n * 2), ncol = 2)
par(mfrow = c(2, 2))

hc.complete <- hclust(dist(x), method = "complete")
plot(hc.complete)

hc.single <- hclust(dist(x), method = "single")
plot(hc.single)

hc.centroid <- hclust(dist(x), method = "centroid")
plot(hc.centroid)

hc.average <- hclust(dist(x), method = "average")
plot(hc.average)

9.3 主成分分析

pca <- function(x) {
  n <- nrow(x)
  p <- ncol(x)
  center <- array(dim = p)
  for (j in 1:p)
    center[j] <- mean(x[, j])
  for (j in 1:p)
    x[, j] <- x[, j] - center[j]
  sigma <- t(x) %*% x / n
  lambda <- eigen(sigma)$values
  phi <- eigen(sigma)$vectors
  return(list(lambdas = lambda, vectors = phi, centers = center))
}

例86

n <- 100
p <- 5
x <- matrix(rnorm(n * p), ncol = p, nrow = n)
pca(x)$lambdas
## [1] 1.2667853 1.0748733 1.0037504 0.9334545 0.5990494
pca(x)$vectors
##            [,1]       [,2]       [,3]        [,4]       [,5]
## [1,]  0.4385672  0.6905285  0.4430918 -0.01409345 -0.3664699
## [2,] -0.3564274  0.5700479 -0.6670760 -0.28478816 -0.1480210
## [3,] -0.4201573 -0.2820769  0.1556820  0.03707042 -0.8475195
## [4,]  0.4805386 -0.3225419 -0.1579887 -0.77621617 -0.1938491
## [5,]  0.5226556 -0.1208929 -0.5563110  0.56108049 -0.2965178
pca(x)$centers
## [1] 0.05804285 0.12217964 0.11283619 0.09725292 0.14572367
prcomp(x)$rotation
##             PC1        PC2        PC3         PC4        PC5
## [1,] -0.4385672 -0.6905285 -0.4430918 -0.01409345 -0.3664699
## [2,]  0.3564274 -0.5700479  0.6670760 -0.28478816 -0.1480210
## [3,]  0.4201573  0.2820769 -0.1556820  0.03707042 -0.8475195
## [4,] -0.4805386  0.3225419  0.1579887 -0.77621617 -0.1938491
## [5,] -0.5226556  0.1208929  0.5563110  0.56108049 -0.2965178
(prcomp(x)$sdev) ^ 2
## [1] 1.2795812 1.0857306 1.0138893 0.9428834 0.6051005
prcomp(x)$center
## [1] 0.05804285 0.12217964 0.11283619 0.09725292 0.14572367
names(prcomp(x))
## [1] "sdev"     "rotation" "center"   "scale"    "x"
pr.var <- (prcomp(x)$sdev) ^ 2
pve <- pr.var / sum(pr.var)
par(mfrow = c(1, 2))
plot(pve, xlab = "主成分", ylab = "寄与率", ylim = c(0, 1), type = "b")
plot(cumsum(pve), xlab = "主成分", ylab = "累積の寄与率",
     ylim = c(0, 1), type = "b")

例87

n <- 100
a <- 0.7
b <- sqrt(1 - a ^ 2)
u <- rnorm(n)
v <- rnorm(n)
x <- u
y <- u * a + v * b
plot(x, y)

plot(x, y, xlim = c(-4, 4), ylim = c(-4, 4))
T <- prcomp(cbind(x, y))$rotation
T[2, 1] / T[1, 1] * T[2, 2] / T[1, 2]
## [1] -1
abline(0, T[2, 1] / T[1, 1], col = "red")
abline(0, T[2, 2] / T[1, 2], col = "blue")

例88

pr.out <- prcomp(USArrests, scale = TRUE)
biplot(pr.out)

pr.out$x <- -pr.out$x
pr.out$rotation <- -pr.out$rotation
biplot(pr.out)

例89

library(MASS)

z <- as.matrix(Boston)
y <- k.means(z,5)$clusters
w <- prcomp(z)$x[, 1:2]
plot(w, col = y + 1, xlab = "第1主成分", ylab = "第2主成分",
     main = "Bostonデータのクラスタリング")

pca.regression <- function(X, y, m) {
  pr <- prcomp(X)
  Z <- pr$x[, 1:m]
  phi <- pr$rotation[, 1:m]
  theta <- solve(t(Z) %*% Z) %*% t(Z) %*% y
  beta <- phi %*% theta
  return(list(theta = theta, beta = beta))
}

例90

# データ生成
n <- 100
p <- 5
X <- matrix(rnorm(n * p), nrow = n, ncol = p)
for (j in 1:p)
  X[, j] <- X[, j] - mean(X[, j])
y <- X[, 1] + X[, 2] + X[, 3] + X[, 4] + X[, 5] + rnorm(n)
y <- y - mean(y)

# 実行
pca.regression(X, y, 3)
## $theta
##           [,1]
## PC1  0.5181417
## PC2 -1.1049787
## PC3  1.1818900
## 
## $beta
##            [,1]
## [1,]  1.2962792
## [2,] -0.1341256
## [3,]  0.1453994
## [4,]  0.8155858
## [5,]  0.7082811
pca.regression(X, y, 5)$beta
##           [,1]
## [1,] 1.1840991
## [2,] 1.0791514
## [3,] 0.9567213
## [4,] 0.9105458
## [5,] 0.8674473
solve(t(X) %*% X) %*% t(X) %*% y
##           [,1]
## [1,] 1.1840991
## [2,] 1.0791514
## [3,] 0.9567213
## [4,] 0.9105458
## [5,] 0.8674473

付録 プログラム

hc.dendroidgram <- function(cluster, dd = "complete") {
  index <- unlist(cluster[[1]])
  y <- unlist(index)
  n <- length(y)
  height <- array(dim = n)
  z <- matrix(0, ncol = 5, nrow = n)
  for (i in 1:n)
    index[[i]] <- list(y[i])
  for (i in 1:n)
    height[i] <- 0
  for (k in n:2) {
    dist.min <- Inf
    for (i in 1:(k - 1)) {
      i.0 <- unlist(index[[i]])
      j.0 <- unlist(index[[i + 1]])
      d <- switch(dd,
                  "complete" = dist.complete(X[i.0, ], X[j.0, ]),
                  "single" = dist.single(X[i.0, ], X[j.0, ]),
                  "centroid" = dist.centroid(X[i.0, ], X[j.0, ]),
                  "average" = dist.average(X[i.0, ], X[j.0, ]))
      if (d < dist.min) {
        dist.min <- d
        i.1 <- i
        j.1 <- i + 1
      }
    }
    i <- 0
    for (h in 1:i.1)
      i <- i + length(index[[h]])
    j <- i + length(index[[j.1]])
    z[k, 1] <- i - length(index[[i.1]]) / 2 + 0.5
    z[k, 2] <- j - length(index[[j.1]]) / 2 + 0.5
    z[k, 3] <- height[i.1]
    z[k, 4] <- height[j.1]
    z[k, 5] <- dist.min
    index[[i.1]] <- append(index[[i.1]], index[[j.1]])
    if (j.1 < k) {
      for (h in (j.1 + 1):k) {
        index[[h - 1]] <- index[[h]]
        height[h - 1] <- height[h]
      }
    }
    index[[k]] <- NULL
    height[i.1] <- dist.min
  }
  plot(1:n, 1:n, ylim = c(0, 100), xlab = "", ylab = "", type = "n",
       xaxt = "n", yaxt = "n", bty = "n", main = dd)
  r <- z[2, 5] / 100
  for (k in n:2)
    z[k, 3:5] <- z[k, 3:5] / r
  for (k in n:2) {
    segments(z[k, 1], z[k, 3], z[k, 1], z[k, 5])
    segments(z[k, 1], z[k, 5], z[k, 2], z[k, 5])
    segments(z[k, 2], z[k, 5], z[k, 2], z[k, 4])
  }
  for (i in 1:n)
    text(i, 0, y[i])
}

# データ生成
n <- 30
p <- 3
X <- matrix(rnorm(n * p), ncol = p, nrow = n)

par(mfrow = c(2, 2))
for (d in c("complete", "single", "centroid", "average")) {
  cluster <- hc(X, dd = d)
  hc.dendroidgram(cluster, dd = d)
}