Chapter 5 Graphical Model

5.1 Graphical Model

5.2 Graphical Lasso

inner.prod = function(x, y) {
  return(sum(x * y))
} ## already appeared

soft.th = function(lambda, x) {
  return(sign(x) * pmax(abs(x) - lambda, 0))
} ## already appeared

graph.lasso = function(s, lambda = 0) {
  W = s; p = ncol(s); beta = matrix(0, nrow = p - 1, ncol = p)
  beta.out = beta; eps.out = 1
  while (eps.out > 0.01) {
    for (j in 1:p) {
      a = W[-j, -j]; b = s[-j, j]
      beta.in = beta[, j]; eps.in = 1
      while (eps.in > 0.01) {
        for (h in 1:(p - 1)) {
          cc = b[h] - inner.prod(a[h, -h], beta[-h, j])
          beta[h, j] = soft.th(lambda, cc) / a[h, h]         
        }
        eps.in = max(beta[, j] - beta.in); beta.in = beta[, j]
      }
      W[-j, j] = W[-j, -j] %*% beta[, j]     
    }
    eps.out = max(beta - beta.out); beta.out = beta
  }
  theta = matrix(nrow = p, ncol = p)
  for (j in 1:p) {
    theta[j, j] = 1 / (W[j, j] - W[j, -j] %*% beta[, j]) 
    theta[-j, j] = -beta[, j] * theta[j, j]       
  }
  return(theta)
}

Example 47

library(MultiRNG)
## Warning: package 'MultiRNG' was built under R version 4.0.3
Theta = matrix(c(   2,  0.6,    0,    0,  0.5,  0.6,    2, -0.4,  0.3,    0,    0, -0.4,    2,
                 -0.2,    0,    0,  0.3, -0.2,    2, -0.2,  0.5,    0,    0, -0.2,    2),
               nrow = 5)
Sigma = solve(Theta)
meanvec = rep(0, 5)
dat = draw.d.variate.normal(no.row = 20, d = 5, mean.vec = meanvec, cov.mat = Sigma) 
# average: mean.vec, cov matric: cov.mat, sample #: no.row, variable #: d
s = t(dat) %*% dat / nrow(dat)
s <- t(dat) %*% dat / nrow(dat)

Theta
##      [,1] [,2] [,3] [,4] [,5]
## [1,]  2.0  0.6  0.0  0.0  0.5
## [2,]  0.6  2.0 -0.4  0.3  0.0
## [3,]  0.0 -0.4  2.0 -0.2  0.0
## [4,]  0.0  0.3 -0.2  2.0 -0.2
## [5,]  0.5  0.0  0.0 -0.2  2.0
graph.lasso(s)
##             [,1]       [,2]        [,3]       [,4]       [,5]
## [1,]  1.38061821  0.2162693  0.08876051 -0.1910042  0.2497553
## [2,]  0.21923131  5.1072602 -1.13496436  1.1958117 -0.8245919
## [3,]  0.08772939 -1.1357265  2.84475634 -0.5635932  0.1143785
## [4,] -0.19040425  1.1965078 -0.56367065  2.4486443 -0.3984308
## [5,]  0.24940326 -0.8256401  0.11464103 -0.3985777  2.0033170
graph.lasso(s, lambda = 0.015)
##            [,1]       [,2]        [,3]       [,4]       [,5]
## [1,]  1.3546441  0.2053065  0.03394195 -0.1364187  0.2100340
## [2,]  0.2039668  4.6468931 -0.80869487  0.8664287 -0.5972106
## [3,]  0.0340876 -0.8086943  2.70681526 -0.3535684  0.0000000
## [4,] -0.1366785  0.8659684 -0.35352104  2.2870580 -0.2417152
## [5,]  0.2102011 -0.5969173  0.00000000 -0.2417207  1.9233193
graph.lasso(s, lambda = 0.03)
##             [,1]       [,2]       [,3]        [,4]       [,5]
## [1,]  1.33523854  0.1702010  0.0000000 -0.09362097  0.1770086
## [2,]  0.16948454  4.3510582 -0.5799040  0.63031601 -0.4462609
## [3,]  0.00000000 -0.5799715  2.6226460 -0.20413955  0.0000000
## [4,] -0.09371067  0.6301922 -0.2041181  2.19053995 -0.1312166
## [5,]  0.17707630 -0.4461230  0.0000000 -0.13121300  1.8712432
graph.lasso(s, lambda = 0.05)
##             [,1]        [,2]        [,3]         [,4]         [,5]
## [1,]  1.31690897  0.09981041  0.00000000 -0.044318240  0.135444171
## [2,]  0.09968740  4.09855781 -0.32083570  0.392617073 -0.277676939
## [3,]  0.00000000 -0.32084029  2.56142716 -0.045264749  0.000000000
## [4,] -0.04432971  0.39262036 -0.04526493  2.121903022 -0.009747956
## [5,]  0.13545921 -0.27765798  0.00000000 -0.009746372  1.828809486

Example 48

library(glasso)
## Warning: package 'glasso' was built under R version 4.0.3
solve(s)
##             [,1]       [,2]        [,3]       [,4]       [,5]
## [1,]  1.38052115  0.2178846  0.08808054 -0.1907417  0.2496124
## [2,]  0.21788461  5.1078138 -1.13583330  1.1964618 -0.8254007
## [3,]  0.08808054 -1.1358333  2.84492510 -0.5638045  0.1146495
## [4,] -0.19074168  1.1964618 -0.56380452  2.4487926 -0.3986328
## [5,]  0.24961238 -0.8254007  0.11464953 -0.3986328  2.0034230
glasso(s, rho = 0)
## Warning in glasso(s, rho = 0): With rho=0, there may be convergence problems if the input matrix 
##  is not of full rank
## $w
##             [,1]        [,2]        [,3]        [,4]        [,5]
## [1,]  0.76815936 -0.07496405 -0.03531827  0.07032786 -0.11057745
## [2,] -0.07496405  0.25422218  0.08101719 -0.09671395  0.09019801
## [3,] -0.03531827  0.08101719  0.39433917  0.05263765  0.02568600
## [4,]  0.07032786 -0.09671395  0.05263765  0.48037181  0.04396210
## [5,] -0.11057745  0.09019801  0.02568600  0.04396210  0.55736155
## 
## $wi
##            [,1]       [,2]        [,3]       [,4]       [,5]
## [1,]  1.3805214  0.2178869  0.08808086 -0.1907403  0.2496127
## [2,]  0.2178903  5.1078137 -1.13583184  1.1964637 -0.8253984
## [3,]  0.0880788 -1.1358323  2.84492456 -0.5638045  0.1146488
## [4,] -0.1907401  1.1964610 -0.56380406  2.4487931 -0.3986324
## [5,]  0.2496115 -0.8254002  0.11464935 -0.3986329  2.0034225
## 
## $loglik
## [1] NA
## 
## $errflag
## [1] 0
## 
## $approx
## [1] FALSE
## 
## $del
## [1] 2.42895e-07
## 
## $niter
## [1] 1
glasso(s, rho = 0.015)
## $w
##             [,1]        [,2]        [,3]        [,4]        [,5]
## [1,]  0.78315936 -0.05996359 -0.02031806  0.05532820 -0.09557720
## [2,] -0.05996359  0.26922218  0.06601725 -0.08171380  0.07519821
## [3,] -0.02031806  0.06601725  0.40933917  0.03763769  0.02588584
## [4,]  0.05532820 -0.08171380  0.03763769  0.49537181  0.02896210
## [5,] -0.09557720  0.07519821  0.02588584  0.02896210  0.57236155
## 
## $wi
##             [,1]       [,2]        [,3]       [,4]       [,5]
## [1,]  1.32623113  0.1909320  0.03434332 -0.1310195  0.2014554
## [2,]  0.19093021  4.3225462 -0.72491912  0.7785015 -0.5426306
## [3,]  0.03434384 -0.7249195  2.59102847 -0.3202773  0.0000000
## [4,] -0.13101992  0.7785017 -0.32027738  2.1989886 -0.2209461
## [5,]  0.20145559 -0.5426306  0.00000000 -0.2209461  1.8632601
## 
## $loglik
## [1] NA
## 
## $errflag
## [1] 0
## 
## $approx
## [1] FALSE
## 
## $del
## [1] 3.209246e-07
## 
## $niter
## [1] 2
glasso(s, rho = 0.030)
## $w
##              [,1]        [,2]         [,3]        [,4]        [,5]
## [1,]  0.798159358 -0.04496376 -0.005987743  0.04032839 -0.08057721
## [2,] -0.044963759  0.28422218  0.051017251 -0.06671378  0.06019821
## [3,] -0.005987743  0.05101725  0.424339175  0.02263769  0.01277091
## [4,]  0.040328389 -0.06671378  0.022637691  0.51037181  0.01396210
## [5,] -0.080577205  0.06019821  0.012770914  0.01396210  0.58736155
## 
## $wi
##             [,1]       [,2]       [,3]        [,4]       [,5]
## [1,]  1.28201969  0.1480635  0.0000000 -0.08640033  0.1627528
## [2,]  0.14806316  3.8282875 -0.4746591  0.52000640 -0.3740866
## [3,]  0.00000000 -0.4746592  2.4227153 -0.16950581  0.0000000
## [4,] -0.08639983  0.5200064 -0.1695058  2.04468571 -0.1100663
## [5,]  0.16275282 -0.3740866  0.0000000 -0.11006629  1.7658123
## 
## $loglik
## [1] NA
## 
## $errflag
## [1] 0
## 
## $approx
## [1] FALSE
## 
## $del
## [1] 6.720195e-08
## 
## $niter
## [1] 2
glasso(s, rho = 0.045)
## $w
##              [,1]        [,2]         [,3]         [,4]         [,5]
## [1,]  0.813159358 -0.02996373 -0.003065848  0.025328369 -0.065577205
## [2,] -0.029963727  0.29922218  0.036017255 -0.051713781  0.045198213
## [3,] -0.003065848  0.03601726  0.439339175  0.007637693  0.005622312
## [4,]  0.025328369 -0.05171378  0.007637693  0.525371814 -0.001037903
## [5,] -0.065577205  0.04519821  0.005622312 -0.001037903  0.602361550
## 
## $wi
##             [,1]        [,2]        [,3]        [,4]        [,5]
## [1,]  1.24523890  0.09664008  0.00000000 -0.05026757  0.12822724
## [2,]  0.09664026  3.48276841 -0.28761995  0.34185165 -0.24753510
## [3,]  0.00000000 -0.28761995  2.30079865 -0.06175952  0.00000000
## [4,] -0.05026707  0.34185167 -0.06175952  1.94033078 -0.02720357
## [5,]  0.12822722 -0.24753511  0.00000000 -0.02720358  1.69261915
## 
## $loglik
## [1] NA
## 
## $errflag
## [1] 0
## 
## $approx
## [1] FALSE
## 
## $del
## [1] 9.818425e-08
## 
## $niter
## [1] 2
library(igraph)
## Warning: package 'igraph' was built under R version 4.0.3
## 
## Attaching package: 'igraph'
## The following objects are masked from 'package:stats':
## 
##     decompose, spectrum
## The following object is masked from 'package:base':
## 
##     union
adj = function(mat) {
  p = ncol(mat); ad = matrix(0, nrow = p, ncol = p)
  for (i in 1:(p - 1)) for (j in (i + 1):p) {
    if (mat[i, j] == 0) ad[i, j] = 0 else ad[i, j] = 1
  }
  g = graph.adjacency(ad, mode = "undirected")
  plot(g)
}

Example 49

library(glasso); library(igraph)
df = read.csv("breastcancer.csv")
w = matrix(nrow = 250, ncol = 1000)
for (i in 1:1000) w[, i] = as.numeric(df[[i]])
x = w; s = t(x) %*% x / 250
fit = glasso(s, rho = 0.75); sum(fit$wi == 0) 
## [1] 992822
y = NULL; z = NULL 
for (i in 1:999) for (j in (i + 1):1000) if (fit$wi[i, j] != 0) {y = c(y, i); z = c(z, j)}
edges = cbind(y, z)  
write.csv(edges,"edges.csv")

5.3 Estimation of Graphical Models using the Quasi-Likelihood

Example 50

library(glmnet)
## Warning: package 'glmnet' was built under R version 4.0.3
## Loading required package: Matrix
## Loaded glmnet 4.0-2
df = read.csv("breastcancer.csv")
n = 250; p = 50; w = matrix(nrow = n, ncol = p)
for (i in 1:p) w[, i] = as.numeric(df[[i]])
x = w[, 1:p]; fm = rep("gaussian", p); lambda = 0.1
fit = list()
for (j in 1:p) fit[[j]] = glmnet(x[, -j], x[, j], family = fm[j], lambda = lambda)
ad = matrix(0, p, p)
for (i in 1:p) for (j in 1:(p - 1)) {
  k = j
  if (j >= i) k = j + 1
  if (fit[[i]]$beta[j] != 0) ad[i, k] = 1 else ad[i, k] = 0
}
## AND
for (i in 1:(p - 1)) for (j in (i + 1):p) {
  if (ad[i, j] != ad[i, j]) {ad[i, j] = 0; ad[j, i] = 0}
}
u = NULL; v = NULL
for (i in 1:(p - 1)) for (j in (i + 1):p) {
  if (ad[i, j] == 1) {u = c(u, i); v = c(v, j)}
}
u
##   [1]  1  1  1  1  1  1  1  1  1  1  1  1  2  2  2  2  2  2  2  2  2  2  2  2  3
##  [26]  3  3  3  3  3  3  3  3  3  4  4  4  4  4  4  4  5  5  5  5  5  5  6  6  6
##  [51]  6  6  6  6  6  6  6  6  7  7  7  7  7  7  7  7  8  8  8  8  8  8  9  9  9
##  [76]  9  9  9  9  9  9 10 10 10 10 10 10 10 11 11 11 11 11 11 12 12 12 12 12 12
## [101] 12 12 13 13 13 13 13 13 13 13 13 14 14 15 15 15 16 16 16 16 16 16 16 16 16
## [126] 16 16 17 17 17 17 17 18 18 18 18 18 18 18 18 19 19 19 19 20 20 20 21 21 21
## [151] 21 21 22 22 22 22 22 22 23 23 23 23 23 23 23 24 24 24 24 24 24 25 25 25 25
## [176] 26 26 26 26 26 27 27 27 27 27 28 28 28 28 29 29 29 29 29 30 30 30 30 30 30
## [201] 31 31 31 31 31 31 31 32 32 33 33 34 34 34 35 35 36 36 36 37 37 37 38 38 38
## [226] 38 38 40 40 42 42 43 43 43 44 44 45 45 47 47 47 49
v
##   [1]  2 12 13 18 22 23 24 28 41 46 48 50  4  5 10 16 18 21 23 26 27 34 37 43  9
##  [26] 10 15 19 20 21 24 29 38 50  5  8 17 18 21 23 46  9 20 21 28 30 32 10 11 15
##  [51] 21 24 25 31 33 39 41 45  8 12 14 18 30 36 44 47 12 13 15 17 41 49 10 14 18
##  [76] 19 21 29 45 46 50 18 25 28 32 34 37 42 12 16 20 28 42 48 17 18 29 33 35 41
## [101] 43 47 20 22 25 28 39 40 43 45 50 15 16 17 25 45 18 23 26 27 30 31 32 41 46
## [126] 47 49 20 25 29 31 36 21 24 27 35 41 43 48 49 20 25 42 44 27 44 48 23 24 34
## [151] 37 42 23 29 30 35 36 50 25 28 32 35 37 47 50 28 33 35 40 41 42 29 39 45 48
## [176] 39 40 42 46 50 37 40 42 43 49 29 31 47 50 30 39 45 48 50 31 32 45 47 48 50
## [201] 34 36 39 44 47 49 50 33 45 37 49 35 36 45 41 42 38 39 45 38 42 46 39 45 46
## [226] 49 50 41 42 49 50 47 49 50 48 50 46 50 48 49 50 50
adj(ad)

## OR
for (i in 1:(p - 1)) for (j in (i + 1):p) {
  if (ad[i, j] != ad[i, j]) {ad[i, j] = 1; ad[j, i] = 1}
}
adj(ad)

Example 51

library(glmnet)
df = read.csv("breastcancer.csv")
w = matrix(nrow = 250, ncol = 1000); for (i in 1:1000) w[, i] = as.numeric(df[[i]])
w = (sign(w) + 1) / 2  ## transforming it to binary
p = 50; x = w[, 1:p]; fm = rep("binomial", p); lambda = 0.15
fit = list()
for (j in 1:p) fit[[j]] = glmnet(x[, -j], x[, j], family = fm[j], lambda = lambda)
ad = matrix(0, nrow = p, ncol = p)
for (i in 1:p) for (j in 1:(p - 1)) {
  k = j
  if (j >= i) k = j + 1
  if (fit[[i]]$beta[j] != 0) ad[i, k] = 1 else ad[i, k] = 0
}
for (i in 1:(p - 1)) for (j in (i + 1):p) {
  if (ad[i, j] != ad[i, j]) {ad[i, j] = 0; ad[j, i] = 0}
}
sum(ad); adj(ad)
## [1] 203

5.4 JointグラフィカルLasso

# genlasso works only when the size is at least three
b.fused = function(y, lambda) {
  if (y[1] > y[2] + 2 * lambda) {a = y[1] - lambda; b = y[2] + lambda} 
  else if (y[1] < y[2] - 2 * lambda) {a = y[1] + lambda; b = y[2] - lambda}
  else {a = (y[1] + y[2]) / 2; b = a}
  return(c(a, b))
}
# fused Lasso that compares not only the adjacency terms but also all adjacency values
fused = function(y, lambda.1, lambda.2) {
  K = length(y)
  if (K == 1) theta = y
  else if (K == 2) theta = b.fused(y, lambda.2)
  else {
    L = K * (K - 1) / 2; D = matrix(0, nrow = L, ncol = K)
    k = 0
    for (i in 1:(K - 1)) for (j in (i + 1):K) {
      k = k + 1; D[k, i] = 1; D[k, j] = -1
    }
    out = genlasso(y, D = D)
    theta = coef(out, lambda = lambda.2)               
  }
  theta = soft.th(lambda.1, theta)            
  return(theta)
}
# Joint Graphical Lasso
jgl = function(X, lambda.1, lambda.2) {  # X is given as a list
  K = length(X); p = ncol(X[[1]]); n = array(dim = K); S = list()
  for (k in 1:K) {n[k] = nrow(X[[k]]); S[[k]] = t(X[[k]]) %*% X[[k]] / n[k]}
  rho = 1; lambda.1 = lambda.1 / rho; lambda.2 = lambda.2 / rho
  Theta = list(); for (k in 1:K) Theta[[k]] = diag(p)
  Theta.old = list(); for (k in 1:K) Theta.old[[k]] = diag(rnorm(p))
  U = list(); for (k in 1:K) U[[k]] = matrix(0, nrow = p, ncol = p)
  Z = list(); for (k in 1:K) Z[[k]] = matrix(0, nrow = p, ncol = p)
  epsilon = 0; epsilon.old = 1
  while (abs((epsilon - epsilon.old) / epsilon.old) > 0.0001) {
    Theta.old = Theta;  epsilon.old = epsilon
    ## Update (i)
    for (k in 1:K) {
      mat = S[[k]] - rho * Z[[k]] / n[k] + rho * U[[k]] / n[k]
      svd.mat = svd(mat)
      V = svd.mat$v
      D = svd.mat$d
      DD = n[k] / (2 * rho) * (-D + sqrt(D ^ 2 + 4 * rho / n[k]))
      Theta[[k]] = V %*% diag(DD) %*% t(V)
    }
    ## Update (ii)
    for (i in 1:p) for (j in 1:p) {
      A = NULL; for (k in 1:K) A = c(A, Theta[[k]][i, j] + U[[k]][i, j])
      if (i == j) B = fused(A, 0, lambda.2) else B = fused(A, lambda.1, lambda.2)
      for (k in 1:K) Z[[k]][i, j] = B[k]
    }
    ## Update (iii)
    for (k in 1:K) U[[k]] = U[[k]] + Theta[[k]] - Z[[k]]     
    ## Test Convergence
    epsilon = 0
    for (k in 1:K) {
      epsilon.new = max(abs(Theta[[k]] - Theta.old[[k]]))
      if (epsilon.new > epsilon) epsilon = epsilon.new
    }
  }
  return(Z)
}
## Replace the update (ii) by the following
for (i in 1:p) for (j in 1:p) {
  A = NULL; for (k in 1:K) A = c(A, Theta[[k]][i, j] + U[[k]][i, j])
  if (i == j) B = A
  else {B = soft.th(lambda.1 / rho,A) *
    max(1 - lambda.2 / rho / sqrt(norm(soft.th(lambda.1 / rho, A), "2") ^ 2), 0)}
  for (k in 1:K) Z[[k]][i, j] = B[k]
}

Example 52

## Data Generation and Execution
p = 10; K = 2; N = 100; n = array(dim = K); for (k in 1:K) n[k] = N / K
X = list(); X[[1]] = matrix(rnorm(n[k] * p), ncol = p)
for (k in 2:K) X[[k]] = X[[k - 1]] + matrix(rnorm(n[k] * p) * 0.1, ncol = p)
## Change the lambda.1,lambda.2 values to execute
Theta = jgl(X, 3, 0.01)
par(mfrow = c(1, 2)); adj(Theta[[1]]); adj(Theta[[2]])