Chapter 6 Regularization¶
In [ ]:
from sklearn.linear_model import LassoCV
from sklearn.linear_model import Lasso
import copy
from numpy.random import randn
from scipy import stats
import scipy
import matplotlib.pyplot as plt
import numpy as np
51¶
In [ ]:
def soft_th(lam, x):
return np.sign(x)*np.maximum(np.abs(x)-lam, 0)
In [ ]:
x_seq = np.arange(-10, 10, 0.1)
plt.plot(x_seq, soft_th(5, x_seq))
plt.plot([-5, -5], [4, -4], c="black", linestyle="dashed", linewidth=0.8)
plt.plot([5, 5], [4, -4], c="black", linestyle="dashed", linewidth=0.8)
plt.title("soft_th(lam,x)")
plt.text(-1.5, 1, 'λ=5', fontsize=15)
Out[ ]:
Text(-1.5, 1, 'λ=5')
53¶
In [ ]:
def lasso(x, y, lam=0): # lam stands for lambda
X = copy.copy(x)
n, p = X.shape
X_bar = np.zeros(p)
s = np.zeros(p)
for j in range(p):
X_bar[j] = np.mean(X[:, j])
for j in range(p):
s[j] = np.std(X[:, j])
X[:, j] = (X[:, j]-X_bar[j])/s[j]
y_bar = np.mean(y)
y = y-y_bar
eps = 1
beta = np.zeros(p)
beta_old = np.zeros(p)
while eps > 0.001:
for j in range(p):
index = list(set(range(p))-{j})
r = y-X[:, index]@beta[index]
beta[j] = soft_th(lam, r.T@X[:, j]/n)
eps = np.max(np.abs(beta-beta_old))
beta_old = copy.copy(beta)
for j in range(p):
beta[j] = beta[j]/s[j]
beta_0 = y_bar-X_bar.T@beta
return {'beta': beta, 'beta_0': beta_0}
In [ ]:
df = np.loadtxt("crime.txt", delimiter="\t")
X = df[:, [i for i in range(2, 7)]]
p = X.shape[1]
y = df[:, 0]
lambda_seq = np.arange(0, 200, 0.5)
plt.xlim(0, 200)
plt.ylim(-10, 20)
plt.xlabel("lambda")
plt.ylabel("beta")
labels = ["Annual funding to the police", "Percentage of those 25 and older who graduated from high school",
"Percentage of those aged 16-19 who are not attending high school",
"Percentage of those aged 18-24 who are college students",
"Percentage of those aged 25 and older who graduated from a 4-year college"]
for j in range(p):
coef_seq = []
for l in lambda_seq:
coef_seq.append(lasso(X, y, l)['beta'][j])
plt.plot(lambda_seq, coef_seq, label="{}".format(labels[j]))
plt.legend(loc="upper right")
plt.title("Value of each coefficient for each λ")
Out[ ]:
Text(0.5, 1.0, 'Value of each coefficient for each λ')
54¶
In [ ]:
def ridge(x, y, lam=0): # lam stands for lambda
X = copy.copy(x)
n, p = X.shape
X_bar = np.zeros(p)
s = np.zeros(p)
for j in range(p):
X_bar[j] = np.mean(X[:, j])
for j in range(p):
s[j] = np.std(X[:, j])
X[:, j] = (X[:, j]-X_bar[j])/s[j]
y_bar = np.mean(y)
y = y-y_bar
beta = np.linalg.inv(X.T@X+n*lam*np.eye(p))@X.T@y
for j in range(p):
beta[j] = beta[j]/s[j]
beta_0 = y_bar-X_bar.T@beta
return {'beta': beta, 'beta_0': beta_0}
In [ ]:
lambda_seq = np.arange(0, 50, 0.5)
plt.xlim(0, 50)
plt.ylim(-7.5, 15)
plt.xlabel("lambda")
plt.ylabel("beta")
labels = ["Annual funding to the police", "Percentage of those 25 and older who graduated from high school",
"Percentage of those aged 16-19 who are not attending high school",
"Percentage of those aged 18-24 who are college students",
"Percentage of those aged 25 and older who graduated from a 4-year college"]
for j in range(p):
coef_seq = []
for l in lambda_seq:
coef_seq.append(ridge(X, y, l)['beta'][j])
plt.plot(lambda_seq, coef_seq, label="{}".format(labels[j]))
plt.legend(loc="upper right")
plt.title("Value of each coefficient for each λ")
Out[ ]:
Text(0.5, 1.0, 'Value of each coefficient for each λ')
55¶
In [ ]:
Las = Lasso(alpha=20)
Las.fit(X, y)
Las.coef_
Out[ ]:
array([11.09067594, -5.2800757 , 4.65494282, 0.55015932, 2.84324295])
In [ ]:
# Search the values specified by alphas, cv is the number of K
Lcv = LassoCV(alphas=np.arange(0.1, 30, 0.1), cv=10)
Lcv.fit(X, y)
Lcv.alpha_
Lcv.coef_
Out[ ]:
array([11.14516156, -4.87861992, 4.24780979, 0.63662582, 1.52576885])