# Lasso and CV demo

In [None]:
import numpy as np
LR = 0.01
NUM_ITERATIONS = 500

# NOTE: here, X and Y represent only the training data, not the overall dataset (train + test).
X = np.random.random((1000, 50))
Y = np.random.random((1000,))

In [2]:
def predict(w, b, Xin):
 return np.dot(Xin, w) + b

def fit(Xin, Yin, l1_penalty) :
 # no_of_training_examples, no_of_features
 m, n = Xin.shape

 # weight initialization
 w = np.zeros(n)
 b = 0

 # gradient descent learning
 for i in range(NUM_ITERATIONS) :
 w, b = update_weights(w, b, Xin, Yin, l1_penalty)

 return w, b

def update_weights(w, b, Xin, Yin, l1_penalty) :
 m, n = Xin.shape
 Y_pred = predict(w, b, Xin)

 # calculate gradients
 dW = np.zeros(n)
 for j in range(n) :
 if w[j] > 0 :
 dW[j] = ( - ( 2 * ( Xin[:, j] ).dot(Yin - Y_pred))
 + l1_penalty ) / m
 else :
 dW[j] = ( - ( 2 * ( Xin[:, j] ).dot(Yin - Y_pred))
 - l1_penalty ) / m

 db = - 2 * np.sum(Yin - Y_pred) / m

 # update weights
 w = w - LR * dW
 b = b - LR * db

 return w, b

def rmse_lasso(w, b, Xin, Yin):
 Y_pred = predict(w, b, Xin)
 return rmse(Yin, Y_pred)

def rmse(a, b):
 return np.sqrt(np.mean(np.square(a - b)))

In [3]:
# candidate values for l1 penalty
l1_penalties = 10 ** np.linspace(-5, -1)
err = np.zeros(len(l1_penalties))

# We will perform 10-fold CV. Here, we will create the training and validation sets by
# creating an indices array with randomized index values to use when slicing our training data.
k_fold = 10
num_samples = len(X) // k_fold
indices = np.random.permutation(len(X))

In [4]:
for idx, l1_penalty in enumerate(l1_penalties):
 for k in range(k_fold): #10-fold CV
 # slice larger training set into validation and training sets for each fold
 VAL = indices[k * num_samples : (k + 1) * num_samples]
 TRAIN = np.concatenate((indices[: k * num_samples], indices[(k + 1) * num_samples:]))

 x_train_fold = X[TRAIN]
 y_train_fold = Y[TRAIN]

 x_val_fold = X[VAL]
 y_val_fold = Y[VAL]

 w, b = fit(x_train_fold, y_train_fold, l1_penalty)

 # accumulate error from this fold of validation set
 err[idx] += rmse_lasso(w, b, x_val_fold, y_val_fold)

 #calculate error for kth fold
 err[idx]/=k_fold

In [None]:
l1_penalty_best = l1_penalties[np.argmin(err)]

print('Best choice of l1_penalty = ', l1_penalty_best)