Chapter 6 Regularization
6.1 Ridge
ridge=function(X, y, lambda=0){
X=as.matrix(X); p=ncol(X); n=length(y); X.bar=array(dim=p); s=array(dim=p)
for(j in 1:p){X.bar[j]=mean(X[,j]);X[,j]=X[,j]-X.bar[j];};
for(j in 1:p){s[j]=sd(X[,j]);X[,j]=X[,j]/s[j]};
y.bar=mean(y); y=y-y.bar
beta=drop(solve(t(X)%*%X+n*lambda*diag(p))%*%t(X)%*%y)
for(j in 1:p)beta[j]=beta[j]/s[j]
beta.0= y.bar-sum(X.bar*beta)
return(list(beta=beta, beta.0=beta.0))
}
df=read.table("crime.txt"); x=df[,3:7]; y=df[,1]; p=ncol(x);
lambda.seq=seq(0,50);
plot(lambda.seq, xlim=c(0,50), ylim=c(-7.5,15),
xlab="lambda",ylab="beta",main="The estimates for each lambda", type="n", col="red")
for(j in 1:p){
coef.seq=NULL; for(lambda in lambda.seq)coef.seq=c(coef.seq,ridge(x,y,lambda)$beta[j])
par(new=TRUE); lines(lambda.seq,coef.seq, col=j)
}
legend("topright",legend=c("annual police funding","% of people 25 years+ with 4 yrs. of high school",
"% of 16 to 19 year-olds not in highschool and not highschool graduates",
"% of 18 to 24 year-olds in college",
"% of 18 to 24 year-olds in college"), col=1:p, lwd=2, cex =.8)
6.2 Sub-derivative
curve(x^2-3*x+abs(x), -2,2, main="y=x^2-3x+|x|"); points(1,-1, col="red", pch=16)
curve(x^2+x+2*abs(x), -2,2, main="x^2+x+2|x|"); points(0,0, col="red", pch=16)
6.3 Lasso
soft.th=function(lambda,x)sign(x)*pmax(abs(x)-lambda,0)
curve(soft.th(5,x),-10,10, main="soft.th(lambda,x)")
segments(-5,-4,-5,4, lty=5, col="blue"); segments(5,-4,5,4, lty=5, col="blue")
text(-0.2,1,"lambda=5",cex=1.5)
lasso=function(X, y, lambda=0){
X=as.matrix(X); p=ncol(X); n=length(y); X.bar=array(dim=p); s=array(dim=p)
for(j in 1:p){X.bar[j]=mean(X[,j]);X[,j]=X[,j]-X.bar[j];};
for(j in 1:p){s[j]=sd(X[,j]);X[,j]=X[,j]/s[j]}
y.bar=mean(y); y=y-y.bar
eps=1; beta=array(0, dim=p); beta.old=array(0, dim=p)
while(eps>0.001){
for(j in 1:p){
r= y-X[,-j]%*%beta[-j]
beta[j]= soft.th(lambda,sum(r*X[,j])/n)
}
eps=max(abs(beta-beta.old)); beta.old=beta
}
for(j in 1:p)beta[j]=beta[j]/s[j]
beta.0= y.bar-sum(X.bar*beta)
return(list(beta=beta, beta.0=beta.0))
}
df=read.table("crime.txt"); x=df[,3:7]; y=df[,1]; p=ncol(x);
lambda.seq=seq(0,200);
plot(lambda.seq, xlim=c(0,200), ylim=c(-7.5,15),
xlab="lambda",ylab="beta",main="The estimates for each lambda", type="n", col="red")
for(j in 1:p){
coef.seq=NULL; for(lambda in lambda.seq)coef.seq=c(coef.seq,lasso(x,y,lambda)$beta[j])
par(new=TRUE); lines(lambda.seq,coef.seq, col=j)
}
legend("topright",legend=c("annual police funding","% of people 25 years+ with 4 yrs. of high school",
"% of 16 to 19 year-olds not in highschool and not highschool graduates",
"% of 18 to 24 year-olds in college",
"% of 18 to 24 year-olds in college"), col=1:p, lwd=2, cex =.8)
6.5 Setting of lambda values
library(glmnet)
## Warning: package 'glmnet' was built under R version 4.0.3
## Loading required package: Matrix
## Loaded glmnet 4.0-2
df=read.table("crime.txt"); X=as.matrix(df[,3:7])
cv.fit=cv.glmnet(X,y);
plot(cv.fit)
lambda.min=cv.fit$lambda.min;
lambda.min
## [1] 20.03869
fit=glmnet(X,y,lambda=lambda.min);
fit$beta
## 5 x 1 sparse Matrix of class "dgCMatrix"
## s0
## V3 9.656911
## V4 -2.527286
## V5 3.229431
## V6 .
## V7 .