import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import scipy
from scipy import stats
from numpy.random import randn
import copy
def ridge(x,y,lam=0): #lam = lambda
X=copy.copy(x)
n,p=X.shape
X_bar=np.zeros(p)
s=np.zeros(p)
for j in range(p):
X_bar[j]=np.mean(X[:,j])
for j in range(p):
s[j]=np.std(X[:,j])
X[:,j]=(X[:,j]-X_bar[j])/s[j]
y_bar=np.mean(y)
y=y-y_bar
beta=np.linalg.inv(X.T@X+n*lam*np.eye(p))@X.T@y
for j in range(p):
beta[j]=beta[j]/s[j]
beta_0=y_bar-X_bar.T@beta
return {'beta':beta, 'beta_0':beta_0}
df=np.loadtxt("crime.txt",delimiter="\t")
X=df[:,[i for i in range(2,7)]]
p=X.shape[1]
y=df[:,0]
lambda_seq=np.arange(0,50,0.5)
plt.xlim(0,50)
plt.ylim(-7.5,15)
plt.xlabel("lambda")
plt.ylabel("beta")
labels=["annual police funding in resident","% of people 25 years+ with 4 yrs. of high school",
"% of 16 to 19 year-olds not in highschool and not highschool graduates","% of 18 to 24 year-olds in college",
"% of 18 to 24 year-olds in college"]
for j in range(p):
coef_seq=[]
for l in lambda_seq:
coef_seq.append(ridge(X,y,l)['beta'][j])
plt.plot(lambda_seq,coef_seq,label="{}".format(labels[j]))
plt.legend(loc="upper right")
x_seq=np.arange(-2,2,0.05)
y=x_seq**2-3*x_seq+np.abs(x_seq)
plt.plot(x_seq,y)
plt.scatter(1,-1,c="red")
plt.title("y=x^2-3x+|x|")
y=x_seq**2+x_seq+2*np.abs(x_seq)
plt.plot(x_seq,y)
plt.scatter(0,0,c="red")
plt.title("y=x^2+x+2|x|")
def soft_th(lam,x):
return np.sign(x)*np.maximum(np.abs(x)-lam,0)
x_seq=np.arange(-10,10,0.1)
plt.plot(x_seq,soft_th(5,x_seq))
plt.plot([-5,-5],[4,-4],c="black",linestyle="dashed",linewidth=0.8)
plt.plot([5,5],[4,-4],c="black",linestyle="dashed",linewidth=0.8)
plt.title("soft_th(lam,x)")
plt.text( -1.5,1, 'λ=5',fontsize=15)
def lasso(x,y,lam=0): #lam = lambda
X=copy.copy(x)
n,p=X.shape
X_bar=np.zeros(p)
s=np.zeros(p)
for j in range(p):
X_bar[j]=np.mean(X[:,j])
for j in range(p):
s[j]=np.std(X[:,j])
X[:,j]=(X[:,j]-X_bar[j])/s[j]
y_bar=np.mean(y)
y=y-y_bar
eps=1
beta=np.zeros(p);beta_old=np.zeros(p)
while eps>0.001:
for j in range(p):
index=list(set(range(p))-{j})
r=y-X[:,index]@beta[index]
beta[j]=soft_th(lam,r.T@X[:,j]/n)
eps=np.max(np.abs(beta-beta_old))
beta_old=copy.copy(beta)
for j in range(p):
beta[j]=beta[j]/s[j]
beta_0=y_bar-X_bar.T@beta
return {'beta':beta, 'beta_0':beta_0}
df=np.loadtxt("crime.txt",delimiter="\t")
X=df[:,[i for i in range(2,7,1)]]
p=X.shape[1]
y=df[:,0]
lambda_seq=np.arange(0,200,0.5)
plt.xlim(0,200)
plt.ylim(-10,20)
plt.xlabel("lambda")
plt.ylabel("beta")
labels=["annual police funding in resident","% of people 25 years+ with 4 yrs. of high school",
"% of 16 to 19 year-olds not in highschool and not highschool graduates","% of 18 to 24 year-olds in college",
"% of 18 to 24 year-olds in college"]
for j in range(p):
coef_seq=[]
for l in lambda_seq:
coef_seq.append(lasso(X,y,l)['beta'][j])
plt.plot(lambda_seq,coef_seq,label="{}".format(labels[j]))
plt.legend(loc="upper right")
plt.title(" The coefficients for each λ")
lasso(X,y,20)
from sklearn.linear_model import Lasso
from sklearn.linear_model import LassoCV
Las=Lasso(alpha=20)
Las.fit(X,y)
Las.coef_
Lcv = LassoCV(alphas=np.arange(0.1, 30, 0.1), cv=10)
Lcv.fit(X,y)
Lcv.alpha_
Lcv.coef_
Lcv.intercept_
df=np.loadtxt("crime.txt",delimiter="\t")
X=df[:,[i for i in range(2,7,1)]]
y=df[:,0]
# grid search for the alphas value,cv is the K
Lcv = LassoCV(alphas=np.arange(0.1, 30, 0.1), cv=10)
Lcv.fit(X,y)
Lcv.alpha_
Lcv.coef_
df=np.loadtxt("crime.txt",delimiter="\t")
X=df[:,[i for i in range(2,7,1)]]
y=df[:,0]
Lcv = LassoCV(alphas=np.arange(0.1, 30, 0.1), cv=10)
Lcv.fit(X,y)
Lcv.alpha_
Lcv.coef_
Las=Lasso(alpha=20)
Las.fit(X,y)
Las.coef_