第5章 グラフィカルモデル
69
library(glasso)
solve(s)
glasso(s)
glasso(s, lambda = 0.01)
inner.prod <- function(x, y) {
return(sum(x * y))
}
soft.th <- function(lambda, x) {
return(sign(x) * pmax(abs(x) - lambda, 0))
}
graph.lasso <- function(s, lambda = 0) {
W <- s
p <- ncol(s)
beta <- matrix(0, nrow = p - 1, ncol = p)
beta.out <- beta
eps.out <- 1
while (eps.out > 0.01) {
for (j in 1:p) {
a <- W[-j, -j]
b <- s[-j, j]
beta.in <- beta[, j]
eps.in <- 1
while (eps.in > 0.01) {
for (h in 1:(p - 1)) {
cc <- b[h] - inner.prod(a[h, -h], beta[-h, j])
beta[h, j] <- soft.th(lambda, cc) / a[h, h]
}
eps.in <- max(beta[, j] - beta.in)
beta.in <- beta[, j]
}
W[-j, j] <- W[-j, -j] %*% beta[, j]
}
eps.out <- max(beta - beta.out)
beta.out <- beta
}
theta <- matrix(nrow = p, ncol = p)
for (j in 1:p) {
theta[j, j] <- 1 / (W[j, j] - W[j, -j] %*% beta[, j])
theta[-j, j] <- -beta[, j] * theta[j, j]
}
return(theta)
}
library(MultiRNG)
Theta <- matrix(c( 2, 0.6, 0, 0, 0.5,
0.6, 2, -0.4, 0.3, 0,
0, -0.4, 2, -0.2, 0,
0, 0.3, -0.2, 2, -0.2,
0.5, 0, 0, -0.2, 2),
nrow = 5)
Sigma <- ## 空欄 ##
meanvec <- rep(0, 5)
## 平均 mean.vec,共分散行列 cov.mat,サンプル数 no.row,変数の個数 d から
## サンプル行列を生成
dat <- draw.d.variate.normal(no.row = 20, d = 5, mean.vec = meanvec, cov.mat = Sigma)
s <- t(dat) %*% dat / nrow(dat)
graph.lasso(s)
graph.lasso(s, lambda = 0.01)
70
library(igraph)
library(glasso)
adj <- function(mat) {
p <- ncol(mat)
ad <- matrix(0, nrow = p, ncol = p)
for (i in 1:(p - 1)) {
for (j in (i + 1):p) {
if (## 空欄 ##) {
ad[i, j] <- 0
} else {
ad[i, j] <- 1
}
}
}
g <- graph.adjacency(ad, mode = "undirected")
plot(g)
}
df <- read.csv("breastcancer.csv")
w <- matrix(nrow = 250, ncol = 1000)
for (i in 1:1000)
w[, i] <- as.numeric(df[[i]])
x <- w
s <- t(x) %*% x / 250
fit <- glasso(s, rho = 1)
sum(fit$wi == 0)
adj(fit$wi)
fit <- glasso(s, rho = 0.5)
sum(fit$wi == 0)
adj(fit$wi)
y <- NULL
z <- NULL
for (i in 1:999) {
for (j in (i + 1):1000) {
if (mat[i, j] != 0) {
y <- c(y, i)
z <- c(z, j)
}
}
}
cbind(y,z)
## 最初の100遺伝子に限定
x <- x[, 1:100]
s <- t(x) %*% x / 250
fit <- glasso(s, rho = 0.75)
sum(fit$wi == 0)
adj(fit$wi)
y <- NULL
z <- NULL
for (i in 1:99) {
for (j in (i + 1):100) {
if (fit$wi[i, j] != 0) {
y <- c(y, i)
z <- c(z, j)
}
}
}
cbind(y, z)
fit <- glasso(s, rho = 0.25)
sum(fit$wi == 0)
adj(fit$wi)
71
library(glmnet)
df <- read.csv("breastcancer.csv")
n <- 250
p <- 50
w <- matrix(nrow = n, ncol = p)
for (i in 1:p)
w[, i] <- as.numeric(df[[i]])
x <- w[, 1:p]
fm <- rep("gaussian", p)
lambda <- 0.1
fit <- list()
for (j in 1:p)
fit[[j]] <- glmnet(x[, -j], x[, j], family = fm[j], lambda = lambda)
ad <- matrix(0, p, p)
for (i in 1:p) {
for (j in 1:(p - 1)) {
if (j >= i)
k <- j + 1
if (fit[[i]]$beta[j] != 0) {
ad[i, k] <- 1
} else {
ad[i, k] <- 0
}
}
}
## ANDの場合
for (i in 1:(p - 1)) {
for (j in (i + 1):p) {
if (ad[i, j] != ad[i, j]) {
ad[i, j] <- 0
ad[j, i] <- 0
}
}
}
u <- NULL
v <- NULL
for (i in 1:(p - 1)) {
for (j in (i + 1):p) {
if (ad[i, j] == 1) {
u <- c(u, i)
v <- c(v, j)
}
}
}
## ORの場合
library(glmnet)
df <- read.csv("breastcancer.csv")
w <- matrix(nrow = 250, ncol = 1000)
for (i in 1:1000)
w[, i] <- as.numeric(df[[i]])
w <- (sign(w) + 1) / 2 # 2値データに変換する
p <- 50
x <- w[, 1:p]
fm <- rep("binomial", p)
lambda <- 0.15
fit <- list()
for (j in 1:p)
fit[[j]] = glmnet(x[, -j], x[, j], family = fm[j], lambda = lambda)
ad <- matrix(0, nrow = p, ncol = p)
for (i in 1:p) {
for (j in 1:(p - 1)) {
if (j >= i)
k <- j + 1
if (fit[[i]]$beta[j] != 0) {
ad[i, k] <- 1
} else {
ad[i, k] <- 0
}
}
}
for (i in 1:(p - 1)) {
for (j in (i + 1):p) {
if (ad[i, j] != ad[i, j]) {
ad[i, j] <- 0
ad[j, i] <- 0
}
}
}
sum(ad)
adj(ad)
73
## 大きさが 2 の場合の Fused Lasso
b.fused <- function(y, lambda) {
if (y[1] > y[2] + 2 * lambda) {
a <- y[1] - lambda
b <- y[2] + lambda
} else if (y[1] < y[2] - 2 * lambda) {
a <- y[1] + lambda
b <- y[2] - lambda
} else {
a <- (y[1] + y[2]) / 2
b <- a
}
return(c(a, b))
}
## 隣接項だけではなく,離接するすべての値と比較する Fused Lasso
fused <- function(y, lambda.1, lambda.2) {
K <- length(y)
if (K == 1) {
theta <- y
} else if (K == 2) {
theta <- b.fused(y, lambda.2)
} else {
L <- K * (K - 1) / 2
D <- matrix(0, nrow = L, ncol = K)
k <- 0
for (i in 1:(K - 1)) {
for (j in (i + 1):K) {
k <- k + 1
## 空欄(1) ##
}
}
out <- genlasso(y, D = D)
theta <- coef(out, lambda = lambda.2)
}
theta <- soft.th(lambda.1, theta)
return(theta)
}
## Joint Graphical Lasso
jgl <- function(X, lambda.1, lambda.2) { # Xはリストで与える
K <- length(X)
p <- ncol(X[[1]])
n <- array(dim = K)
S <- list()
for (k in 1:K) {
n[k] <- nrow(X[[k]])
S[[k]] <- t(X[[k]]) %*% X[[k]] / n[k]
}
rho <- 1
lambda.1 <- lambda.1 / rho
lambda.2 <- lambda.2 / rho
Theta <- list()
for (k in 1:K)
Theta[[k]] <- diag(p)
Theta.old <- list()
for (k in 1:K)
Theta.old[[k]] <- diag(rnorm(p))
U <- list()
for (k in 1:K)
U[[k]] <- matrix(0, nrow = p, ncol = p)
Z <- list()
for (k in 1:K)
Z[[k]] <- matrix(0, nrow = p, ncol = p)
epsilon <- 0
epsilon.old <- 1
while (abs((epsilon - epsilon.old) / epsilon.old) > 0.0001) {
Theta.old <- Theta
epsilon.old <- epsilon
## (a)に関する更新
for (k in 1:K) {
mat <- S[[k]] - rho * Z[[k]] / n[k] + rho * U[[k]] / n[k]
svd.mat <- svd(mat)
V <- svd.mat$v
D <- svd.mat$d
DD <- ## 空欄(2) ##
Theta[[k]] <- ## 空欄(3) ##
}
## (b)に関する更新
for (i in 1:p) {
for (j in 1:p) {
A <- NULL
for (k in 1:K)
A <- c(A, Theta[[k]][i, j] + U[[k]][i, j])
if (i == j) {
B <- fused(A, 0, lambda.2)
} else {
B <- fused(A, lambda.1, lambda.2)
}
for (k in 1:K)
Z[[k]][i,j] <- B[k]
}
}
## (c)に関する更新
for (k in 1:K)
U[[k]] <- U[[k]] + Theta[[k]] - Z[[k]]
## 収束したかどうかの検査
epsilon <- 0
for (k in 1:K) {
epsilon.new <- max(abs(Theta[[k]] - Theta.old[[k]]))
if (epsilon.new > epsilon)
epsilon <- epsilon.new
}
}
return(Z)
}
## データ生成と実行
p <- 10
K <- 2
N <- 100
n <- array(dim = K)
for (k in 1:K)
n[k] <- N / K
X <- list()
X[[1]] <- matrix(rnorm(n[k] * p), ncol = p)
for (k in 2:K)
X[[k]] <- X[[k - 1]] + matrix(rnorm(n[k] * p) * 0.1, ncol = p)
## lambda.1, lambda.2 を変えて実行
Theta <- jgl(X, 3, 0.01)
par(mfrow = c(1, 2))
adj(Theta[[1]])
adj(Theta[[2]])
74
for (i in 1:p) {
for (j in 1:p) {
A <- NULL
for (k in 1:K)
A <- c(A, Theta[[k]][i, j] + U[[k]][i, j])
if (i == j) {
B <- A
} else {
B <- ## 空欄 ##
}
for (k in 1:K)
Z[[k]][i, j] <- B[k]
}
}