import numpy as np
import matplotlib.pyplot as plt
# Input data
theta_star = np.array([1.0,0.1])
X = np.random.randn(100,2)
y =X @ theta_star + np.random.randn(100)*0.000
# Compute cost function with L1 regularization
def cost_function(X, y, theta, lambda_param):
m = len(y)
J = (np.sum((X.dot(theta) - y) ** 2) / (2 * m)) + (lambda_param * np.sum(np.abs(theta)))
return J
# Gradient descent with L1 regularization
def gradient_descent_step(X, y, theta, alpha, lambda_param, iterations=1):
m = len(y)
for _ in range(iterations):
theta = theta - (alpha) * ( X.T.dot(X.dot(theta) - y)/m + (lambda_param * np.sign(theta)))
return theta
# Initialize weights
theta = np.array([0.5, 0.5])
# Set learning rate, number of iterations and regularization parameter
alpha = 0.05
iterations = 100
lambda_param = .2
loss = []
err = []
thetas = []
for k in range(iterations):
loss.append(cost_function(X, y, theta, lambda_param))
err.append(cost_function(X, y, theta, 0))
thetas.append(theta)
theta = gradient_descent_step(X, y, theta, alpha, lambda_param, iterations=1)
theta_min, theta_max = -1,1.5
_x = np.linspace(theta_min, theta_max, 100)
_y = np.linspace(theta_min, theta_max, 100)
_X, _Y = np.meshgrid(_x, _y)
Z = np.zeros(_X.shape)
for i in range(_X.shape[0]):
for j in range(_X.shape[1]):
_theta = np.array([_X[i,j],_Y[i,j]])
Z[i,j] = cost_function(X, y, _theta, lambda_param)
plt.contour(_X, _Y, Z, cmap='coolwarm')
thetas = np.array(thetas)
plt.scatter(thetas[:,0],thetas[:,1],c=loss,cmap='jet')
plt.plot(theta_star[0],theta_star[1],'rx')
plt.grid('on')
plt.show()
plt.plot(thetas)
plt.title('theta values')
plt.xlabel('iteration')
plt.show()
plt.plot(loss)
plt.title('cost function per iteration')
plt.xlabel('iteration')
plt.show()