7.1 主成分分析(1):SCoTLASS
soft.th <- function(lambda, z) {
return(sign(z) * pmax(abs(z) - lambda, 0))
} ## zがベクトルでも,soft.thは動作する
SCoTLASS <- function(lambda, X) {
n <- nrow(X)
p <- ncol(X)
v <- rnorm(p)
v <- v / norm(v, "2")
for (k in 1:200) {
u <- X %*% v
u <- u / norm(u, "2")
v <- t(X) %*% u
v <- soft.th(lambda, v)
size <- norm(v, "2")
if (size > 0) {
v = v / size
} else {
break
}
}
if (norm(v, "2") == 0)
print("vの全要素が0になった")
return(v)
}
例61
## データ生成
n <- 100
p <- 50
X <- matrix(rnorm(n * p), nrow = n)
lambda.seq <- 0:10 / 10
m <- 5
SS <- array(dim = c(m, 11))
TT <- array(dim = c(m, 11))
for (j in 1:m) {
S <- NULL
T <- NULL
for (lambda in lambda.seq) {
v <- SCoTLASS(lambda, X)
S <- c(S, sum(sign(v ^ 2)))
T <- c(T, norm(X %*% v, "2"))
}
SS[j, ] <- S
TT[j, ] <- T
}
## 作図
par(mfrow = c(1, 2))
SS.min <- min(SS)
SS.max <- max(SS)
plot(lambda.seq, xlim = c(0, 1), ylim = c(SS.min, SS.max),
xlab = "lambda", ylab = "非ゼロベクトルの個数")
for (j in 1:m)
lines(lambda.seq, SS[j, ], col = j + 1)
legend("bottomleft", paste0(1:5, "回目"), lwd = 1, col = 2:(m + 1))
TT.min <- min(TT)
TT.max <- max(TT)
plot(lambda.seq, xlim = c(0, 1), ylim = c(TT.min, TT.max),
xlab = "lambda", ylab = "分散の和")
for (j in 1:m)
lines(lambda.seq, TT[j, ], col = j + 1)
legend("bottomleft", paste0(1:5, "回目"), lwd = 1, col = 2:(m + 1))
par(mfrow = c(1, 1))
7.3 K-meansクラスタリング
k.means <- function(X, K, weights = w) {
n <- nrow(X)
p <- ncol(X)
y <- sample(1:K, n, replace = TRUE)
center <- array(dim = c(K, p))
for (h in 1:10) {
for (k in 1:K) {
if (sum(y[] == k) == 0) {
center[k, ] <- Inf
} else {
for (j in 1:p)
center[k, j] <- mean(X[y[] == k, j])
}
}
for (i in 1:n) {
S.min <- Inf
for (k in 1:K) {
if (center[k, 1] == Inf)
break
S <- sum((X[i, ] - center[k, ]) ^ 2 * w)
if (S < S.min) {
S.min <- S
y[i] <- k
}
}
}
}
return(y)
}
例63
## データの生成
K <- 10
p <- 2
n <- 1000
X <- matrix(rnorm(p * n), nrow = n, ncol = p)
w <- c(1, 1)
y <- k.means(X, K, w)
## 結果の出力
plot(-3:3, -3:3, xlab = "x", ylab = "y", type = "n")
points(X[, 1], X[, 2], col = y + 1)
sparse.k.means <- function(X, K, s) {
p <- ncol(X)
w <- rep(1, p)
for (h in 1:10) {
y <- k.means(X, K, w)
a <- comp.a(X, y)
w <- w.a(a, s)
}
return(list(w = w, y = y))
}
w.a <- function(a, s) {
w <- rep(1, p)
a <- a / sqrt(sum(a ^ 2))
if (sum(a) < s)
return(a)
p <- length(a)
lambda <- max(a) / 2
delta <- lambda / 2
for (h in 1:10) {
for (j in 1:p)
w[j] <- soft.th(lambda, a[j])
ww <- sqrt(sum(w ^ 2))
if (ww == 0) {
w <- 0
} else {
w <- w / ww
}
if (sum(w) > s) {
lambda <- lambda + delta
} else {
lambda <- lambda - delta
}
delta <- delta / 2
}
return(w)
}
comp.a <- function(X, y) {
n <- nrow(X)
p <- ncol(X)
a <- array(dim = p)
for (j in 1:p) {
a[j] <- 0
for (i in 1:n)
for (h in 1:n)
a[j] <- a[j] + (X[i, j] - X[h, j]) ^ 2 / n
for (k in 1:K) {
S <- 0
index <- which(y == k)
if (length(index) == 0)
break
for (i in index)
for (h in index)
S <- S + (X[i, j] - X[h, j]) ^ 2
a[j] <- a[j] - S / length(index)
}
}
return(a)
}
例64
p <- 10
n <- 100
X <- matrix(rnorm(p * n), nrow = n, ncol = p)
sparse.k.means(X, 5, 1.5)
## $w
## [1] 0.1067211 0.1274793 0.3423918 0.0000000 0.0000000 0.0000000 0.0000000
## [8] 0.0000000 0.0000000 0.9247311
##
## $y
## [1] 1 2 4 1 5 2 3 5 5 1 5 5 1 2 1 1 1 5 3 1 2 5 5 2 2 2 1 5 3 4 4 4 1 5 3 4 3
## [38] 4 4 4 1 5 1 3 3 3 2 1 2 2 1 4 4 5 1 1 1 3 2 4 3 2 4 2 1 1 2 4 2 4 4 2 3 5
## [75] 1 4 2 3 1 5 4 5 4 4 3 3 2 4 5 4 4 1 5 5 4 1 4 4 3 3
7.4 凸クラスタリング
## 重みを計算
ww <- function(x, mu = 1, dd = 0) {
n <- nrow(x)
w <- array(dim = c(n, n))
for (i in 1:n)
for (j in 1:n)
w[i, j] <- exp(-mu * sum((x[i, ] - x[j, ]) ^ 2))
if (dd > 0) {
for (i in 1:n) {
dis <- NULL
for (j in 1:n)
dis <- c(dis, sqrt(sum((x[i, ] - x[j, ]) ^ 2)))
index <- which(dis > dd)
w[i, index] <- 0
}
}
return(w)
}
## L2の場合のprox(グループLasso)
prox <- function(x, tau) {
if (sum(x ^ 2) == 0) {
return(x)
} else {
return(max(0, 1 - tau / sqrt(sum(x ^ 2))) * x)
}
}
## uの更新
update.u <- function(v, lambda) {
u <- array(dim = c(n, d))
z <- 0
for (i in 1:n)
z <- z + x[i, ]
y <- x
for (i in 1:n) {
if (i < n)
for (j in (i + 1):n)
y[i, ] <- y[i, ] + lambda[i, j, ] + nu * v[i, j, ]
if (1 < i)
for (j in 1:(i - 1))
y[i, ] <- y[i, ] - lambda[j, i, ] - nu * v[j, i, ]
u[i, ] <- (y[i, ] + nu * z) / (n * nu + 1)
}
return(u)
}
## vの更新
update.v <- function(u, lambda) {
v <- array(dim = c(n, n, d))
for (i in 1:(n - 1))
for (j in (i + 1):n)
v[i, j, ] <- prox(u[i, ] - u[j, ] - lambda[i, j, ] / nu, gamma * w[i, j] / nu)
return(v)
}
## lambdaの更新
update.lambda <- function(u, v, lambda) {
for (i in 1:(n - 1))
for (j in (i + 1):n)
lambda[i, j, ] <- lambda[i, j, ] + nu * (v[i, j, ] - u[i, ] + u[j, ])
return(lambda)
}
## u,v,lambdaの更新のサイクルをmax_iterだけ繰り返す
convex.cluster <- function() {
v <- array(rnorm(n * n * d), dim = c(n, n, d))
lambda <- array(rnorm(n * n * d), dim = c(n, n, d))
for (iter in 1:max_iter) {
u <- update.u(v, lambda)
v <- update.v(u, lambda)
lambda <- update.lambda(u, v, lambda)
}
return(list(u = u, v = v))
}
例65
## データ生成
n <- 50
d <- 2
x <- matrix(rnorm(n * d), n, d)
## 凸クラスタリングの実行
w <- ww(x, 1, dd = 0.5)
gamma <- 1 # gamma <- 10
nu <- 1
max_iter <- 1000
v <- convex.cluster()$v
## 隣接行列の計算
a <- array(0, dim = c(n, n))
for (i in 1:(n - 1)) {
for (j in (i + 1):n) {
if (sqrt(sum(v[i, j, ] ^ 2)) < 1 / 10 ^ 4) {
a[i, j] <- 1
a[j, i] <- 1
}
}
}
## 作図
k <- 0
y <- rep(0, n)
for (i in 1:n) {
if (y[i] == 0) {
k <- k + 1
y[i] <- k
if (i < n)
for (j in (i + 1):n)
if (a[i, j] == 1)
y[j] <- k
}
}
plot(0, xlim = c(-3, 3), ylim = c(-3, 3), type = "n", main = "gamma = 1, dd = 0.5")
points(x[, 1], x[, 2], col = y + 1)
s.update.u <- function(G, G.inv, v, lambda) {
u <- array(dim = c(n, d))
y <- x
for (i in 1:n) {
if (i < n)
for (j in (i + 1):n)
y[i, ] <- y[i, ] + lambda[i, j, ] + nu * v[i, j, ]
if (1 < i)
for (j in 1:(i - 1))
y[i, ] <- y[i, ] - lambda[j, i, ] - nu * v[j, i, ]
}
for (j in 1:d)
u[, j] <- gr(G, G.inv %*% y[, j], gamma.2 * r[j]) ##
for (j in 1:d)
u[, j] <- u[, j] - mean(u[, j])
return(u)
}
s.convex.cluster <- function() {
## gamma.2,r[1],...,r[p]を設定しておく
G <- sqrt(1 + n * nu) * diag(n) - (sqrt(1 + n * nu) - 1) / n * matrix(1, n, n)
G.inv <- (1 + n * nu) ^ (-0.5) *
(diag(n) + (sqrt(1 + n * nu) - 1) / n * matrix(1, n, n))
v <- array(rnorm(n * n * d), dim = c(n, n, d))
lambda <- array(rnorm(n * n * d), dim = c(n, n, d))
for (iter in 1:max_iter) {
u <- s.update.u(G, G.inv, v, lambda)
v <- update.v(u, lambda)
lambda <- update.lambda(u, v, lambda)
}
return(list(u = u, v = v))
}
例66
## データの生成
n <- 50
d <- 10
x <- matrix(rnorm(n * d), n, d)
## 実行前の設定
w <- ww(x, 1 / d, dd = sqrt(d)) ## dが大きいので調節
gamma <- 10
nu <- 1
max_iter <- 1000
r <- rep(1, d)
## gamma.2を変えて実行し,係数の値をグラフで表示
gamma.2.seq <- seq(1, 10, 1)
m <- length(gamma.2.seq)
z <- array(dim = c(m, d))
h <- 0
for (gamma.2 in gamma.2.seq) {
h <- h + 1
u <- s.convex.cluster()$u
print(gamma.2)
for (j in 1:d)
z[h, j] <- u[5, j]
}
## [1] 1
## [1] 2
## [1] 3
## [1] 4
## [1] 5
## [1] 6
## [1] 7
## [1] 8
## [1] 9
## [1] 10
plot(0, xlim = c(1, 10), ylim = c(-2, 2), type = "n",
xlab = "gamma.2", ylab = "係数", main = "gamma = 10")
for (j in 1:d)
lines(gamma.2.seq, z[, j], col = j + 1)