Chapter 6 Regularization¶

In [ ]:
from sklearn.linear_model import LassoCV
from sklearn.linear_model import Lasso
import copy
from numpy.random import randn
from scipy import stats
import scipy
import matplotlib.pyplot as plt
import numpy as np

51¶

In [ ]:
def soft_th(lam, x):
    return np.sign(x)*np.maximum(np.abs(x)-lam, 0)
In [ ]:
x_seq = np.arange(-10, 10, 0.1)
plt.plot(x_seq, soft_th(5, x_seq))
plt.plot([-5, -5], [4, -4], c="black", linestyle="dashed", linewidth=0.8)
plt.plot([5, 5], [4, -4], c="black", linestyle="dashed", linewidth=0.8)
plt.title("soft_th(lam,x)")
plt.text(-1.5, 1, 'λ=5', fontsize=15)
Out[ ]:
Text(-1.5, 1, 'λ=5')
No description has been provided for this image

53¶

In [ ]:
def lasso(x, y, lam=0):  # lam stands for lambda
    X = copy.copy(x)
    n, p = X.shape
    X_bar = np.zeros(p)
    s = np.zeros(p)
    for j in range(p):
        X_bar[j] = np.mean(X[:, j])
    for j in range(p):
        s[j] = np.std(X[:, j])
        X[:, j] = (X[:, j]-X_bar[j])/s[j]
    y_bar = np.mean(y)
    y = y-y_bar
    eps = 1
    beta = np.zeros(p)
    beta_old = np.zeros(p)
    while eps > 0.001:
        for j in range(p):
            index = list(set(range(p))-{j})
            r = y-X[:, index]@beta[index]
            beta[j] = soft_th(lam, r.T@X[:, j]/n)
        eps = np.max(np.abs(beta-beta_old))
        beta_old = copy.copy(beta)
    for j in range(p):
        beta[j] = beta[j]/s[j]
    beta_0 = y_bar-X_bar.T@beta
    return {'beta': beta, 'beta_0': beta_0}
In [ ]:
df = np.loadtxt("crime.txt", delimiter="\t")
X = df[:, [i for i in range(2, 7)]]
p = X.shape[1]
y = df[:, 0]

lambda_seq = np.arange(0, 200, 0.5)
plt.xlim(0, 200)
plt.ylim(-10, 20)
plt.xlabel("lambda")
plt.ylabel("beta")
labels = ["Annual funding to the police", "Percentage of those 25 and older who graduated from high school",
          "Percentage of those aged 16-19 who are not attending high school",
          "Percentage of those aged 18-24 who are college students",
          "Percentage of those aged 25 and older who graduated from a 4-year college"]
for j in range(p):
    coef_seq = []
    for l in lambda_seq:
        coef_seq.append(lasso(X, y, l)['beta'][j])
    plt.plot(lambda_seq, coef_seq, label="{}".format(labels[j]))
plt.legend(loc="upper right")
plt.title("Value of each coefficient for each λ")
Out[ ]:
Text(0.5, 1.0, 'Value of each coefficient for each λ')
No description has been provided for this image

54¶

In [ ]:
def ridge(x, y, lam=0):  # lam stands for lambda
    X = copy.copy(x)
    n, p = X.shape
    X_bar = np.zeros(p)
    s = np.zeros(p)
    for j in range(p):
        X_bar[j] = np.mean(X[:, j])
    for j in range(p):
        s[j] = np.std(X[:, j])
        X[:, j] = (X[:, j]-X_bar[j])/s[j]
    y_bar = np.mean(y)
    y = y-y_bar
    beta = np.linalg.inv(X.T@X+n*lam*np.eye(p))@X.T@y
    for j in range(p):
        beta[j] = beta[j]/s[j]
    beta_0 = y_bar-X_bar.T@beta
    return {'beta': beta, 'beta_0': beta_0}
In [ ]:
lambda_seq = np.arange(0, 50, 0.5)
plt.xlim(0, 50)
plt.ylim(-7.5, 15)
plt.xlabel("lambda")
plt.ylabel("beta")
labels = ["Annual funding to the police", "Percentage of those 25 and older who graduated from high school",
          "Percentage of those aged 16-19 who are not attending high school",
          "Percentage of those aged 18-24 who are college students",
          "Percentage of those aged 25 and older who graduated from a 4-year college"]
for j in range(p):
    coef_seq = []
    for l in lambda_seq:
        coef_seq.append(ridge(X, y, l)['beta'][j])
    plt.plot(lambda_seq, coef_seq, label="{}".format(labels[j]))
plt.legend(loc="upper right")
plt.title("Value of each coefficient for each λ")
Out[ ]:
Text(0.5, 1.0, 'Value of each coefficient for each λ')
No description has been provided for this image

55¶

In [ ]:
Las = Lasso(alpha=20)
Las.fit(X, y)
Las.coef_
Out[ ]:
array([11.09067594, -5.2800757 ,  4.65494282,  0.55015932,  2.84324295])
In [ ]:
# Search the values specified by alphas, cv is the number of K
Lcv = LassoCV(alphas=np.arange(0.1, 30, 0.1), cv=10)
Lcv.fit(X, y)
Lcv.alpha_
Lcv.coef_
Out[ ]:
array([11.14516156, -4.87861992,  4.24780979,  0.63662582,  1.52576885])