第8章 サポートベクトルマシン

8.1 最適な境界

8.2 最適化の理論

8.3 サポートベクトルマシンの解

library(quadprog)
## Warning: package 'quadprog' was built under R version 4.0.3
svm.1 <- function(X, y, C) {
  eps <- 0.0001
  n <- nrow(X)
  meq <- 1
  Dmat <- matrix(nrow = n, ncol = n)
  for (i in 1:n)
    for (j in 1:n)
      Dmat[i, j] <- sum(X[i, ] * X[j, ]) * y[i] * y[j]
  Dmat <- Dmat + eps * diag(n)  # 対角成分に小さい値を加えて,正則にする
  dvec <- rep(1, n)
  Amat <- matrix(nrow = (2 * n + 1), ncol = n)
  Amat[1, ] <- y
  Amat[2:(n + 1), 1:n] <- -diag(n)
  Amat[(n + 2):(2 * n + 1), 1:n] <- diag(n)
  Amat <- t(Amat)  # このパッケージでは,Amatは転置にしたものを指定する
  bvec <- c(0, rep(-C, n), rep(0, n))
  alpha <- solve.QP(Dmat, dvec, Amat, bvec = bvec, meq = 1)$solution
  beta <- drop((alpha * y) %*% X)
  index <- (1:n)[eps < alpha & alpha < C - eps]
  beta.0 <- mean(y[index] - X[index, ] %*% beta)
  return(list(beta = beta, beta.0 = beta.0))
}

例73

library(quadprog)

a <- rnorm(1)
b <- rnorm(1)
n <- 100
X <- matrix(rnorm(n * 2), ncol = 2, nrow =n)
y <- sign(a * X[, 1] + b * X[, 2] + 0.1 * rnorm(n))
plot(-3:3, -3:3, xlab = "第1成分", ylab = "第2成分", type = "n")
for (i in 1:n) {
  if (y[i] == 1) {
    points(X[i, 1], X[i, 2], col = "red")
  } else {
    points(X[i, 1], X[i, 2], col = "blue")
  }
}
qq <- svm.1(X, y, 10)
abline(-qq$beta.0 / qq$beta[2], -qq$beta[1] / qq$beta[2])

8.4 カーネルを用いたサポートベクトルマシンの拡張

例74

K.linear <- function(x, y) {
  return(t(x) %*% y)
}

K.poly <- function(x, y) {
  return((1 + t(x) %*% y) ^ 2)
}
svm.2 <- function(X, y, C, K) {  # 関数名をsvm.2とした
  eps <- 0.0001
  n <- nrow(X)
  Dmat <- matrix(nrow = n, ncol = n)
  for (i in 1:n)
    for(j in 1:n)
      Dmat[i, j] <- K(X[i, ], X[j, ]) * y[i] * y[j]
  Dmat <- Dmat + eps * diag(n)
  dvec <- rep(1, n)
  Amat <- matrix(nrow = (2 * n + 1), ncol = n)
  Amat[1, ] <- y
  Amat[2:(n + 1), 1:n] <- -diag(n)
  Amat[(n + 2):(2 * n + 1), 1:n] <- diag(n)
  Amat <- t(Amat)
  bvec <- c(0, -C * rep(1, n), rep(0, n))
  meq <- 1
  alpha <- solve.QP(Dmat, dvec, Amat, bvec = bvec, meq = 1)$solution
  index <- (1:n)[eps < alpha & alpha < C - eps]
  beta <- drop((alpha * y) %*% X)
  beta.0 <- mean(y[index] - X[index, ] %*% beta)
  return(list(alpha = alpha, beta.0 = beta.0))
}

例77

# 関数の定義
plot.kernel <- function(K, lty) {  # 引数ltyで,線の種類を指定する
  qq <- svm.2(X, y, 1, K)
  alpha <- qq$alpha
  beta.0 <- qq$beta.0

  f <- function(u, v) {
    x <- c(u, v)
    S <- beta.0
    for (i in 1:n)
      S <- S + alpha[i] * y[i] * K(X[i, ], x)
    return(S)
  }  # fは(x, y)でのz方向の高さを求める。これから輪郭を求めることができる。

  u <- seq(-2, 2, .1)
  v <- seq(-2, 2, .1)
  w <- array(dim = c(41, 41))
  for (i in 1:41)
    for (j in 1:41)
      w[i, j] <- f(u[i], v[j])
  contour(u, v, w, level = 0, add = TRUE, lty = lty)
}

# 実行
a <- 3
b <- -1
n <- 200
X <- matrix(rnorm(n * 2), ncol = 2, nrow = n)
y <- sign(a * X[, 1] + b * X[, 2] ^ 2 + 0.3 * rnorm(n))
plot(-3:3, -3:3, xlab = "X[, 1]", ylab = "X[, 2]", type = "n")
for (i in 1:n) {
  if (y[i] == 1) {
    points(X[i, 1], X[i, 2], col = "red")
  } else {
    points(X[i, 1], X[i, 2], col = "blue")
  }
}
plot.kernel(K.linear, 1)
plot.kernel(K.poly, 2)

例78

library(e1071)
## Warning: package 'e1071' was built under R version 4.0.3
x <- matrix(rnorm(200 * 2), ncol = 2)
x[1:100, ] <- x[1:100, ] + 2
x[101:150, ] <- x[101:150, ] - 2
y <- c(rep(1, 150), rep(2, 50))
dat <- data.frame(x = x, y = as.factor(y))
train <- sample(200, 100)
svmfit <- svm(y ~ ., data = dat[train, ], kernel = "radial",
              gamma = 1, cost = 100)
plot(svmfit, dat[train, ])

tune.out <- tune(svm, y ~ ., data = dat[train, ], kernel = "radial",
                 ranges = list(cost = c(0.1, 1, 10, 100, 1000),
                               gamma = c(0.5, 1, 2, 3, 4)))
summary (tune.out)
## 
## Parameter tuning of 'svm':
## 
## - sampling method: 10-fold cross validation 
## 
## - best parameters:
##  cost gamma
##     1   0.5
## 
## - best performance: 0.08 
## 
## - Detailed performance results:
##     cost gamma error dispersion
## 1  1e-01   0.5  0.23 0.06749486
## 2  1e+00   0.5  0.08 0.06324555
## 3  1e+01   0.5  0.09 0.07378648
## 4  1e+02   0.5  0.08 0.07888106
## 5  1e+03   0.5  0.13 0.12516656
## 6  1e-01   1.0  0.23 0.06749486
## 7  1e+00   1.0  0.10 0.06666667
## 8  1e+01   1.0  0.09 0.07378648
## 9  1e+02   1.0  0.13 0.09486833
## 10 1e+03   1.0  0.12 0.09189366
## 11 1e-01   2.0  0.23 0.06749486
## 12 1e+00   2.0  0.09 0.05676462
## 13 1e+01   2.0  0.11 0.08755950
## 14 1e+02   2.0  0.11 0.08755950
## 15 1e+03   2.0  0.11 0.09944289
## 16 1e-01   3.0  0.23 0.06749486
## 17 1e+00   3.0  0.09 0.05676462
## 18 1e+01   3.0  0.13 0.12516656
## 19 1e+02   3.0  0.11 0.08755950
## 20 1e+03   3.0  0.12 0.11352924
## 21 1e-01   4.0  0.23 0.06749486
## 22 1e+00   4.0  0.10 0.08164966
## 23 1e+01   4.0  0.12 0.13165612
## 24 1e+02   4.0  0.12 0.11352924
## 25 1e+03   4.0  0.11 0.09944289

例79

library(e1071)

df <- iris
x <- df[, 1:4]
y <- as.factor(df[, 5])
m <- 30
train <- sample(1:150, 150 - m)
x.train <- x[train, ]
y.train <- y[train]
dat <- data.frame(x.train, y.train)
svmfit <- svm(y.train ~ ., data = dat, kernel = "radial", cost = 10, gamma = 1)
x.test <- x[-train, ]
y.test <- y[-train]
table(y.test, predict(svmfit, x.test))
##             
## y.test       setosa versicolor virginica
##   setosa          9          0         0
##   versicolor      0          6         3
##   virginica       0          2        10