第4章 Fused Lasso
47(a)
library(genlasso)
df <- read.table("cgh.txt")
y <- df[[1]]
N <- length(y)
theta <- ## 空欄 ##
plot(1:N, theta,
xlab = "遺伝子番号", ylab = "コピー数比(対数値)",
col = "red", type = "l")
points(1:N, y, col = "blue")
47(b)
library(genlasso)
N <- 100
y <- sin(1:N/(N * 2 * pi)) + rnorm(N, sd = 0.3) # データ生成
out <- ## 空欄 ##
plot(out, lambda = lambda) # 平滑化と出力
51
lars <- function(X, y) {
X <- as.matrix(X)
n <- nrow(X)
p <- ncol(X)
X.bar <- array(dim = p)
for (j in 1:p) {
X.bar[j] <- mean(X[, j])
X[, j] <- X[, j] - X.bar[j]
}
y.bar <- mean(y)
y <- y - y.bar
scale <- array(dim = p)
for (j in 1:p) {
scale[j] <- sqrt(sum(X[, j] ^ 2) / n)
X[, j] <- X[, j] / scale[j]
}
beta <- matrix(0, p + 1, p)
lambda <- rep(0, p + 1)
for (i in 1:p) {
lam <- abs(sum(X[, i] * y))
if (lam > lambda[1]) {
i.max <- i
lambda[1] <- lam
}
}
r <- y
index <- i.max
Delta <- rep(0, p)
for (k in 2:p) {
Delta[index] <- solve(t(X[, index]) %*% X[, index]) %*%
t(X[, index]) %*% r / lambda[k - 1]
u <- t(X[, -index]) %*% (r - lambda[k - 1] * X %*% Delta)
v <- -t(X[, -index]) %*% (X %*% Delta)
t <- ## 空欄(1) ##
for (i in 1:(p - k + 1)) {
if (t[i] > lambda[k]) {
lambda[k] <- t[i]
i.max <- i
}
}
t <- u / (v - 1)
for (i in 1:(p - k + 1)) {
if (t[i] > lambda[k]) {
lambda[k] <- t[i]
i.max <- i
}
}
j <- setdiff(1:p, index)[i.max]
index <- c(index, j)
beta[k, ] <- ## 空欄(2) ##
r <- y - X %*% beta[k, ]
}
for (k in 1:(p + 1)) {
for (j in 1:p)
beta[k, j] <- beta[k, j] / scale[j]
}
return(list(beta = beta, lambda = lambda))
}
df <- read.table("crime.txt")
X <- as.matrix(df[, 3:7])
y <- df[, 1]
res <- lars(X, y)
beta <- res$beta
lambda <- res$lambda
p <- ncol(beta)
plot(0:8000, ylim = c(-7.5, 15), type = "n",
xlab = "lambda", ylab = "beta", main = "LARS(米国犯罪データ)")
abline(h = 0)
for (j in 1:p)
lines(lambda[1:(p)], beta[1:(p), j], col = j)
legend("topright",
legend = c("警察への年間資金",
"25 歳以上で高校を卒業した人の割合",
"16-19 歳で高校に通っていない人の割合",
"18-24 歳で大学生の割合",
"25 歳以上で 4 年制大学を卒業した人の割合"),
col = 1:p, lwd = 2, cex = .8)
56
fused.dual <- function(y, D) {
m <- nrow(D)
lambda <- rep(0, m)
s <- rep(0, m)
alpha <- matrix(0, m, m)
alpha[1, ] <- solve(D %*% t(D)) %*% D %*% y
for (j in 1:m) {
if (abs(alpha[1, j]) > lambda[1]) {
lambda[1] <- abs(alpha[1, j])
index <- j
if (alpha[1, j] > 0) {
## 空欄(1) ##
}
}
}
for (k in 2:m) {
U <- solve(D[-index, ] %*% t(as.matrix(D[-index, , drop = FALSE])))
V <- D[-index, ] %*% t(as.matrix(D[index, , drop = FALSE]))
u <- U %*% D[-index, ] %*% y
v <- U %*% V %*% s[index]
t <- u / (v + 1)
for (j in 1:(m - k + 1)) {
if (t[j] > lambda[k]) {
lambda[k] <- t[j]
h <- j
r <- 1
}
}
t <- u / (v - 1)
for (j in 1:(m - k + 1)) {
if (t[j] > lambda[k]) {
lambda[k] <- t[j]
h <- j
r <- -1
}
}
alpha[k, index] <- ## 空欄(2) ##
alpha[k, -index] <- ## 空欄(3) ##
h <- setdiff(1:m, index)[h]
if (r == 1) {
s[h] <- 1
} else {
s[h] <- -1
}
index <- c(index, h)
}
return(list(alpha = alpha, lambda = lambda))
}
m <- p - 1
D <- matrix(0, m, p)
for (i in 1:m) {
D[i, i] <- 1
D[i, i + 1] <- -1
}
fused.prime <- function(y, D){
res <- fused.dual(y, D)
return(list(beta = t(y - t(D) %*% t(res$alpha)), lambda = res$lambda))
}
p <- 8
y <- sort(rnorm(p))
m <- p - 1
s <- 2 * rbinom(m, 1, 0.5) - 1
D <- matrix(0, m, p)
for (i in 1:m) {
D[i, i] <- s[i]
D[i, i + 1] <- -s[i]
}
par(mfrow = c(1, 2))
res <- fused.dual(y, D)
alpha <- res$alpha
lambda <- res$lambda
lambda.max <- max(lambda)
m <- nrow(alpha)
alpha.min <- min(alpha)
alpha.max <- max(alpha)
plot(0:lambda.max,
xlim = c(0, lambda.max), ylim = c(alpha.min, alpha.max), type = "n",
xlab = "lambda", ylab = "alpha", main = "双対問題")
u <- c(0, lambda)
v <- rbind(0, alpha)
for (j in 1:m)
lines(u, v[, j], col = j)
res <- fused.prime(y, D)
beta <- res$beta
beta.min <- min(beta)
beta.max <- max(beta)
plot(0:lambda.max,
xlim = c(0, lambda.max), ylim = c(beta.min, beta.max), type = "n",
xlab = "lambda", ylab = "beta", main = "主問題")
w <- rbind(0, beta)
for (j in 1:p)
lines(u, w[, j], col = j)
par(mfrow = c(1, 1))
57
n <- 20
p <- 10
beta <- rnorm(p + 1)
X <- matrix(rnorm(n * p), n, p)
y <- cbind(1, X) %*% beta + rnorm(n)
# D <- diag(p) # どちらかの D を用いる
D <- array(dim = c(p - 1, p))
for (i in 1:(p - 1)) {
D[i, ] <- 0
D[i, i] <- 1
D[i, i + 1] <- -1
}
par(mfrow = c(1, 2))
res <- fused.dual.general(X, y, D)
alpha <- res$alpha
lambda <- res$lambda
lambda.max <- max(lambda)
m <- nrow(alpha)
alpha.min <- min(alpha)
alpha.max <- max(alpha)
plot(0:lambda.max,
xlim = c(0, lambda.max), ylim = c(alpha.min, alpha.max), type = "n",
xlab = "lambda", ylab = "alpha", main = "双対問題")
u <- c(0, lambda)
v <- rbind(0, alpha)
for (j in 1:m)
lines(u, v[, j], col = j)
res <- fused.prime.general(X, y, D)
beta <- res$beta
beta.min <- min(beta)
beta.max <- max(beta)
plot(0:lambda.max,
xlim = c(0, lambda.max), ylim = c(beta.min, beta.max), type = "n",
xlab = "lambda", ylab = "beta", main = "主問題")
w <- rbind(0, beta)
for (j in 1:p)
lines(u, w[, j], col = j)
par(mfrow = c(1, 1))
59
admm <- function(y, D, lambda) {
K <- ncol(D)
L <- nrow(D)
theta.old <- rnorm(K)
theta <- rnorm(K)
gamma <- rnorm(L)
mu <- rnorm(L)
rho <- 1
while (max(abs(theta - theta.old) / theta.old) > 0.001) {
theta.old <- theta
theta <- ## 空欄(1) ##
gamma <- ## 空欄(2) ##
mu <- mu + ## 空欄(3) ##
}
return(theta)
}
59(a)
## ベクトル y の入力
df <- read.table("airpolution.txt", header = TRUE)
index <- order(df[[3]])
y <- df[[1]][index]
N <- length(y)
x <- df[[3]] + rnorm(N) * 0.01
# もとのデータが整数値で,同じ値のものが含まれていたため,摂動を加えた
x <- x[index]
## 行列 D の設定
D <- matrix(0, ncol = N, nrow = N - 2)
for (i in 1:(N - 2))
D[i, ] <- 0
for (i in 1:(N - 2))
D[i, i] <- 1 / (x[i + 1] - x[i])
for (i in 1:(N - 2))
D[i, i + 1] <- -1 / (x[i + 1] - x[i]) - 1 / (x[i + 2] - x[i + 1])
for (i in 1:(N - 2))
D[i, i + 2] <- ## 空欄(1) ##
## theta の計算と出力
theta <- ## 空欄(2) ##
plot(x, theta, xlab = "気温(F)", ylab = "オゾン", col = "red", type = "l")
points(x, y, col = "blue")
59(b)
df <- read.table("cgh.txt")
y <- df[[1]][101:110]
N <- length(y)
D <- array(dim = c(N - 1, N))
for (i in 1:(N - 1)) {
D[i, ] <- 0
D[i, i] <- 1
D[i, i + 1] <- -1
}
lambda.seq <- seq(0, 0.5, 0.01)
M <- length(lambda.seq)
theta <- list()
for (k in 1:M) {
theta[[k]] <- ## 空欄(3) ##
}
x.min <- min(lambda.seq)
x.max <- max(lambda.seq)
y.min <- min(theta[[1]])
y.max <- max(theta[[1]])
plot(lambda.seq,
xlim = c(x.min, x.max), ylim = c(y.min, y.max), type = "n",
xlab = "lambda の値", ylab = "係数の値", main = "Fused Lasso の解パス")
for (k in 1:N) {
value <- NULL
for (j in 1:M)
value <- c(value, theta[[j]][k])
lines(lambda.seq, value, col = k)
}