centralize <- function(X, y, standardize = TRUE) {
X <- as.matrix(X)
n <- nrow(X)
p <- ncol(X)
X.bar <- array(dim = p) ## Xの各列の平均
X.sd <- array(dim = p) ## Xの各列の標準偏差
for (j in 1:p) {
X.bar[j] <- mean(X[, j])
X[, j] <- (X[, j] - X.bar[j]) ## Xの各列の中心化
X.sd[j] <- sqrt(var(X[, j]))
if (standardize == TRUE)
X[, j] <- X[, j] / X.sd[j] ## Xの各列の標準化
}
if (is.matrix(y)) { ## yが行列の場合
K <- ncol(y)
y.bar <- array(dim = K) ## yの平均
for (k in 1:K) {
y.bar[k] <- mean(y[, k])
y[, k] <- y[, k] - y.bar[k] ## yの中心化
}
} else { ## yがベクトルの場合
y.bar <- mean(y)
y <- y - y.bar
}
return(list(X = X, y = y, X.bar = X.bar, X.sd = X.sd, y.bar = y.bar))
}
gr <- function(X, y, lambda) {
nu <- 1 / max(eigen(t(X) %*% X)$values)
p <- ncol(X)
beta <- rep(1, p)
beta.old <- rep(0, p)
while (max(abs(beta - beta.old)) > 0.001) {
beta.old <- beta
gamma <- beta + nu * t(X) %*% (y - X %*% beta)
beta <- max(1 - lambda * nu / norm(gamma, "2"), 0) * gamma
}
return(beta)
}
## データの生成
n <- 100
p <- 3
X <- matrix(rnorm(n * p), ncol = p)
beta <- rnorm(p)
epsilon <- rnorm(n)
y <- 0.1 * X %*% beta + epsilon
## 係数の値の変化を表示
lambda <- seq(1, 50, 0.5)
m <- length(lambda)
beta <- matrix(nrow = m, ncol = p)
for (i in 1:m) {
est <- gr(X, y, lambda[i])
for (j in 1:p)
beta[i, j] <- est[j]
}
y.max <- max(beta)
y.min <- min(beta)
plot(lambda[1]:lambda[m], ylim = c(y.min, y.max),
xlab = "lambda", ylab = "係数の値", type = "n")
for (j in 1:p)
lines(lambda, beta[, j], col = j + 1)
legend("topright", legend = paste("係数", 1:p), lwd = 2, col = 2:(p + 1))
segments(lambda[1], 0, lambda[m], 0)
fista <- function(X, y, lambda) {
nu <- 1 / max(eigen(t(X) %*% X)$values)
p <- ncol(X)
alpha <- 1
beta <- rep(1, p)
beta.old <- rep(1, p)
gamma <- beta
while (max(abs(beta - beta.old)) > 0.001) {
print(beta)
beta.old <- beta
w <- gamma + nu * t(X) %*% (y - X %*% gamma)
beta <- max(1 - lambda * nu / norm(w, "2"), 0) * w
alpha.old <- alpha
alpha <- (1 + sqrt(1 + 4 * alpha ^ 2)) / 2
gamma <- beta + (alpha.old - 1) / alpha * (beta - beta.old)
}
return(beta)
}
group.lasso <- function(z, y, lambda = 0) {
J <- length(z)
theta <- list()
for (j in 1:J)
theta[[j]] <- rep(0, ncol(z[[j]]))
for (m in 1:10) {
for (j in 1:J) {
r <- y
for (k in 1:J)
if (k != j)
r = r - z[[k]] %*% theta[[k]]
theta[[j]] <- gr(z[[j]], r, lambda) # fista(X, y, lambda) でもよい
}
}
return(theta)
}
## データの生成
n <- 100
J <- 2
u <- rnorm(n)
v <- u + rnorm(n)
s <- 0.1 * rnorm(n)
t <- 0.1 * s + rnorm(n)
y <- u + v + s + t + rnorm(n)
z <- list()
z[[1]] <- cbind(u, v)
z[[2]] <- cbind(s, t)
## 係数の値の変化を表示
lambda <- seq(1, 500, 10)
m <- length(lambda)
beta <- matrix(nrow = m, ncol = 4)
for (i in 1:m) {
est <- group.lasso(z, y, lambda[i])
beta[i, ] <- c(est[[1]][1], est[[1]][2], est[[2]][1], est[[2]][2])
}
y.max <- max(beta)
y.min <- min(beta)
plot(lambda[1]:lambda[m], ylim = c(y.min, y.max),
xlab = "lambda", ylab = "係数の値", type = "n")
lines(lambda, beta[, 1], lty = 1, col = 2)
lines(lambda, beta[, 2], lty = 2, col = 2)
lines(lambda, beta[, 3], lty = 1, col = 4)
lines(lambda, beta[, 4], lty = 2, col = 4)
legend("topright", legend = c("グループ1", "グループ1", "グループ2", "グループ2"),
lwd = 1, lty = c(1, 2), col = c(2, 2, 4, 4))
segments(lambda[1], 0, lambda[m], 0)
sparse.group.lasso <- function(z, y, lambda = 0, alpha = 0) {
J <- length(z)
theta <- list()
for (j in 1:J)
theta[[j]] <- rep(0, ncol(z[[j]]))
for (m in 1:10) {
for (j in 1:J) {
r <- y
for (k in 1:J)
if (k != j)
r <- r - z[[k]] %*% theta[[k]]
theta[[j]] <- sparse.gr(z[[j]], r, lambda, alpha) ##
}
}
return(theta)
}
sparse.gr <- function(X, y, lambda, alpha = 0) {
nu <- 1 / max(2 * eigen(t(X) %*% X)$values)
p <- ncol(X)
beta <- rnorm(p)
beta.old <- rnorm(p)
while (max(abs(beta - beta.old)) > 0.001) {
beta.old <- beta
gamma <- beta + nu * t(X) %*% (y - X %*% beta)
delta <- soft.th(lambda * alpha, gamma) ##
beta <- max(1 - lambda * nu * (1 - alpha) / norm(delta, "2"), 0) * delta ##
}
return(beta)
}
gr.multi.linear.lasso <- function(X, Y, lambda) {
n <- nrow(X)
p <- ncol(X)
K <- ncol(Y)
## 中心化:関数centralizeは第1章で定義されている
res <- centralize(X, Y)
X <- res$X
Y <- res$y
## 係数の計算
beta <- matrix(rnorm(p * K), p, K)
gamma <- matrix(0, p, K)
while (norm(beta - gamma, "F") / norm(beta, "F") > 10 ^ (-2)) {
gamma <- beta ## betaの値を退避(比較のため)
R <- Y - X %*% beta
for (j in 1:p) {
r <- R + as.matrix(X[, j]) %*% t(beta[j, ])
M <- t(X[, j]) %*% r
beta[j, ] <- sum(X[, j] ^ 2) ^ (-1) * max(1 - lambda / sqrt(sum(M ^ 2)), 0) * M
R <- r - as.matrix(X[, j]) %*% t(beta[j, ])
}
}
## 切片の計算
for (j in 1:p)
beta[j, ] <- beta[j, ] / res$X.sd[j]
beta.0 <- res$y.bar - as.vector(res$X.bar %*% beta)
return(rbind(beta.0, beta))
}
df <- read.csv("giants_2019.csv")
X <- as.matrix(df[, -c(2, 3)])
Y <- as.matrix(df[, c(2, 3)])
lambda.min <- 0
lambda.max <- 200
lambda.seq <- seq(lambda.min, lambda.max, 5)
m <- length(lambda.seq)
beta.1 <- matrix(0, m, 7)
beta.2 <- matrix(0, m, 7)
j <- 0
for (lambda in lambda.seq) {
j <- j + 1
beta <- gr.multi.linear.lasso(X, Y, lambda)
for (k in 1:7) {
beta.1[j, k] <- beta[k + 1, 1]
beta.2[j, k] <- beta[k + 1, 2]
}
}
beta.max <- max(beta.1, beta.2)
beta.min <- min(beta.1, beta.2)
plot(0, xlim = c(lambda.min, lambda.max), ylim = c(beta.min, beta.max),
xlab = "lambda", ylab = "係数", main = "本塁打と打点の多い打者")
for (k in 1:7) {
lines(lambda.seq, beta.1[, k], lty = 1, col = k + 1)
lines(lambda.seq, beta.2[, k], lty = 2, col = k + 1)
}
legend("bottomright", c("安打", "盗塁", "四球", "死球", "三振", "犠打", "併殺打"),
lty = 2, col = 2:8)
gr.multi.lasso <- function(X, y, lambda) {
n <- nrow(X)
p <- ncol(X)
K <- length(table(y))
beta <- matrix(1, p, K)
gamma <- matrix(0, p, K)
Y <- matrix(0, n, K)
for (i in 1:n)
Y[i, y[i]] <- 1
while (norm(beta - gamma, "F") > 10 ^ (-4)) {
gamma <- beta
eta <- X %*% beta
P <- exp(eta)
for (i in 1:n)
P[i, ] <- P[i, ] / sum(P[i, ])
t <- 2 * max(P * (1 - P))
R <- (Y - P) / t
for (j in 1:p) {
r <- R + as.matrix(X[, j]) %*% t(beta[j, ])
M <- t(X[, j]) %*% r
beta[j, ] <- sum(X[, j] ^ 2) ^ (-1) * max(1 - lambda / t / sqrt(sum(M ^ 2)), 0) * M
R <- r - as.matrix(X[, j]) %*% t(beta[j, ])
}
}
return(beta)
}
df <- iris
X <- cbind(df[[1]], df[[2]], df[[3]], df[[4]])
y <- c(rep(1, 50), rep(2, 50), rep(3, 50))
lambda.seq <- c(10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 125, 150)
m <- length(lambda.seq)
p <- ncol(X)
K <- length(table(y))
alpha <- array(dim = c(m, p, K))
for (i in 1:m) {
res <- gr.multi.lasso(X, y, lambda.seq[i])
for (j in 1:p)
for (k in 1:K)
alpha[i, j, k] <- res[j, k]
}
plot(0, xlim = c(0, 150), ylim = c(min(alpha), max(alpha)), type = "n",
xlab = "lambda", ylab = "係数の値", main = "lambda の値と各係数の値の推移")
for (j in 1:p)
for (k in 1:K)
lines(lambda.seq, alpha[, j, k], col = j + 1)
legend("topright", legend = c("がく片の長さ", "がく片の幅", "花びらの長さ", "花びらの幅"),
lwd = 2, col = 2:5)
## データの生成
n <- 100
J <- 2
x <- rnorm(n)
y <- x + cos(x)
z[[1]] <- cbind(rep(1, n), x)
z[[2]] <- cbind(cos(x), cos(2 * x), cos(3 * x))
## 係数の値の変化を表示
lambda <- seq(1, 200, 5)
m <- length(lambda)
beta <- matrix(nrow = m, ncol = 5)
for (i in 1:m) {
est <- group.lasso(z, y, lambda[i])
beta[i, ] <- c(est[[1]][1], est[[1]][2], est[[2]][1], est[[2]][2], est[[2]][3])
}
y.max <- max(beta)
y.min <- min(beta)
plot(lambda[1]:lambda[m], ylim = c(y.min, y.max),
xlab = "lambda", ylab = "係数の値", type = "n")
lines(lambda, beta[, 1], lty = 1, col = 2)
lines(lambda, beta[, 2], lty = 2, col = 2)
lines(lambda, beta[, 3], lty = 1, col = 4)
lines(lambda, beta[, 4], lty = 2, col = 4)
lines(lambda, beta[, 5], lty = 3, col = 4)
legend("topright", legend = c("1", "x", "cos x", "cos 2x", "cos 3x"),
lwd = 1, lty = c(1, 2, 1, 2, 3), col = c(2, 2, 4, 4, 4))
segments(lambda[1], 0, lambda[m], 0)
i <- 5 # lambda[5]の値を用いた
f.1 <- function(x) {
return(beta[i, 1] + beta[i, 2] * x)
}
f.2 <- function(x) {
return(beta[i, 3] * cos(x) + beta[i, 4] * cos(2 * x) + beta[i, 5] * cos(3 * x))
}
f <- function(x) {
return(f.1(x) + f.2(x))
}
curve(f.1(x), -5, 5, col = "red", ylab = "関数の値")
curve(f.2(x), -5, 5, col = "blue", add = TRUE)
curve(f(x), -5, 5, add = TRUE)
legend("topleft", legend = c("f = f.1 + f.2", "f.1", "f.2"),
col = c(1, "red", "blue"), lwd = 1)