f <- function(x) {
return(exp(beta.0 + beta * x) / (1 + exp(beta.0 + beta * x)))
}
beta.0 <- 0
beta.seq <- c(0, 0.2, 0.5, 1, 2, 10)
m <- length(beta.seq)
beta <- beta.seq[1]
plot(f, xlim = c(-10, 10), ylim = c(0, 1),
xlab = "x", ylab = "y", col = 1, main = "ロジスティック曲線")
for (i in 2:m) {
beta <- ## 空欄(1) ##
par(new = TRUE)
plot(## 空欄(2) ##, xlim = c(-10, 10), ylim = c(0, 1),
xlab = "", ylab = "", axes = FALSE, , col = i)
}
legend("topleft", legend = beta.seq, col = 1:length(beta.seq), lwd = 2, cex = .8)
par(new = FALSE)
## データ生成
N <- 1000
p <- 2
X <- matrix(rnorm(N * p), ncol = p)
X <- cbind(rep(1, N), X)
beta <- rnorm(p + 1)
y <- array(N)
s <- as.vector(X %*% beta)
prob <- 1 / (1 + exp(s))
for (i in 1:N) {
if (runif(1) > prob[i]) {
y[i] <- 1
} else {
y[i] <- -1
}
}
beta
## 最尤推定値の計算
beta <- Inf
gamma <- rnorm(p + 1)
while (sum((beta - gamma) ^ 2) > 0.001) {
beta <- gamma
s <- as.vector(X %*% beta)
v <- exp(-s * y)
u <- y * v / (1 + v)
w <- v / (1 + v) ^ 2
z <- s + u / w
W <- diag(w)
gamma <- as.vector(solve(t(X) %*% W %*% X) %*% t(X) %*% W %*% z)
print(gamma)
}
glm <- glmnet(x, y, lambda = lambda, family = "binomial")
df <- read.csv("breastcancer.csv")
x <- as.matrix(df[, 1:1000])
y <- as.vector(df[, 1001])
cv <- cv.glmnet(x, y, family = "binomial")
cv2 <- cv.glmnet(x, y, family = "binomial", type.measure = "class")
par(mfrow = c(1, 2))
plot(cv)
plot(cv2)
par(mfrow = c(1, 1))
y %*% rep(1, nc) でエラー: 適切な引数ではありません
library(glmnet)
df <- read.table("iris.txt", sep = ",")
x <- as.matrix(df[, 1:4])
y <- as.vector(df[, 5])
y <- as.numeric(y == "Iris-setosa")
cv <- cv.glmnet(x, y, family = "binomial")
cv2 <- cv.glmnet(x, y, family = "binomial", type.measure = "class")
par(mfrow = c(1, 2))
plot(cv)
plot(cv2)
par(mfrow = c(1, 1))
lambda <- cv$lambda.min
result <- glmnet(x, y, lambda = lambda, family = "binomial")
beta <- result$beta
beta.0 <- result$a0
f <- function(x) {
return(exp(beta.0 + x %*% beta))
}
z <- array(dim = 150)
for (i in 1:150)
z[i] <- drop(f(x[i, ]))
yy <- (z > 1)
sum(yy == y)
library(glmnet)
df <- read.table("iris.txt", sep = ",")
x <- as.matrix(df[, 1:4])
y <- as.vector(df[, 5])
n <- length(y)
u <- array(dim = n)
for (i in 1:n) {
if (y[i] == "Iris-setosa") {
u[i] <- 1
} else if (y[i] == "Iris-versicolor") {
u[i] <- 2
} else {
u[i] <- 3
}
}
u <- as.numeric(u)
cv <- cv.glmnet(x, u, family = "multinomial")
cv2 <- cv.glmnet(x, u, family = "multinomial", type.measure = "class")
par(mfrow = c(1, 2))
plot(cv)
plot(cv2)
par(mfrow = c(1, 1))
lambda <- cv$lambda.min
result <- glmnet(x, y, lambda = lambda, family = "multinomial")
beta <- result$beta
beta.0 <- result$a0
v <- array(dim = n)
for (i in 1:n) {
max.value <- -Inf
for (j in 1:3) {
value <- ## 空欄 ##
if (value > max.value) {
v[i] <- j
max.value <- value
}
}
}
sum(u == v)
## データ生成
N <- 1000
p <- 7
beta <- rnorm(p + 1)
X <- matrix(rnorm(N * p), ncol = p)
X <- cbind(rep(1, N), X)
s <- X %*% beta
y <- rpois(N, lambda = exp(s))
beta
## 最尤推定値の計算
lambda <- 100
beta <- Inf
gamma <- rnorm(p + 1)
while (sum((beta - gamma) ^ 2) > 0.01) {
beta <- gamma
s <- as.vector(X %*% beta)
w <- ## 空欄(1) ##
u <- ## 空欄(2) ##
z <- ## 空欄(3) ##
W <- diag(w)
gamma <- coordinate(W, z, gamma)
print(gamma)
}
df <- read.csv("baseball.csv", sep = "", , header = "FALSE")
df <- df[, -c(1, 2)]
df[[6]]
library(survival)
data(kidney)
names(kidney)
y <- kidney$time
delta <- kidney$status
Surv(y, delta)
fit <- survfit(Surv(time, status) ~ disease, data = kidney)
plot(fit, xlab = "時間", ylab = "生存率", col = c("red", "green", "blue", "black"))
legend(300, 0.8, legend = c("その他", "GN", "AN", "PKD"),
lty = 1, col = c("red", "green", "blue", "black"))
## 以下も実行する
library(ranger)
library(ggplot2)
library(dplyr)
library(ggfortify)
autoplot(fit)
library(survival)
load("LymphomaData.rda")
attach("LymphomaData.rda")
names(patient.data)
x <- t(patient.data$x)
y <- patient.data$time
delta <- patient.data$status
Surv(y, delta)
cv.fit <- cv.glmnet(x, Surv(y, delta), family = "cox")
fit2 <- glmnet(x, Surv(y, delta), lambda = cv.fit$lambda.min, family = "cox")
z <- sign(drop(x %*% fit2$beta))
fit3 <- survfit(Surv(y, delta) ~ ## 空欄 ##)
autoplot(fit3)
mean(y[z == 1])
mean(y[z == -1]
library(ElemStatLearn)
library(glmnet)
library(sparseSVM)
data(SAheart)
df <- SAheart
df[, 5] <- as.numeric(df[, 5])
x <- as.matrix(df[, 1:9])
y <- as.vector(df[, 10])
p <- 9
binom.fit <- glmnet(x, y, family = "binomial")
svm.fit <- sparseSVM(x, y)
par(mfrow = c(1, 2))
plot(binom.fit)
plot(svm.fit, xvar = "norm")
par(mfrow = c(1, 1))
## 出力は似ているが,凡例がないので,係数の値が近いかどうかわからない。
## そこで,自分でグラフを作成してみた。
coef.binom <- binom.fit$beta
coef.svm <- coef(svm.fit)[2:(p + 1), ]
norm.binom <- apply(abs(coef.binom), 2, sum)
norm.binom <- norm.binom / max(norm.binom)
norm.svm <- apply(abs(coef.svm), 2, sum)
norm.svm <- norm.svm / max(norm.svm)
par(mfrow <- c(1, 2))
plot(norm.binom, xlim = c(0, 1), ylim = c(min(coef.binom), max(coef.binom)),
main = "ロジスティック回帰", xlab = "ノルム", ylab = "係数", type = "n")
for (i in 1:p)
lines(norm.binom, coef.binom[i, ], col = i)
legend("topleft", legend = colnames(df), col = 1:p, lwd = 2, cex = .8)
par(mfrow = c(1, 1))
df <- read.csv("http://web.stanford.edu/~hastie/CASI_files/DATA/leukemia_big.csv")
dim(df)
names <- colnames(df)
x <- t(as.matrix(df)) #ゲノムデータでは,行と列が反対になっていることが多い
y <- as.numeric(substr(names, 1, 3) == "ALL")
p <- 7128
binom.fit <- glmnet(x, y, family = "binomial")
svm.fit <- sparseSVM(x, y)
coef.binom <- binom.fit$beta
coef.svm <- coef(svm.fit)[2:(p + 1), ]
norm.binom <- apply(abs(coef.binom), 2, sum)
norm.binom <- norm.binom / max(norm.binom)
norm.svm <- apply(abs(coef.svm), 2, sum)
norm.svm <- norm.svm / max(norm.svm)