import numpy as np
filename = "diabetes.txt"
data = np.loadtxt(filename)
# Note that the X, Y are transposed. This makes our demo simpler
# when working feature by feature, but something you probably
# wouldn't do in practice unless you really needed to.
X = data.T[0:10]
Y = data.T[10]
X.shape
(10, 442)
import matplotlib.pyplot as plt
for i in range(10):
if (i%2==0): plt.figure(figsize=(20,6))
plt.subplot(1,2,2-(i+1)%2)
# Make data padded with 1s
X_ = np.vstack([X[i], np.ones(len(X[i]))]).T
# Solve for w
w = np.linalg.lstsq(X_, Y, rcond=None)[0]
# Plot data
plt.plot(X[i],Y,'.')
# Plot learned model
t = np.linspace(np.min(X[i]),np.max(X[i]),20)
plt.plot(t, w[0]*t + w[1], 'r')
# Label graph
MSE = np.mean((Y - X_ @ w)**2)
plt.xlabel('Feature '+str(i))
plt.ylabel('Y, Y-predicted')
plt.title('MSE using feature %d alone: %.2f' % (i,MSE))
if ((i+1)%2==0): plt.show()
feature_index = 2
X_ = np.vstack([X[feature_index], np.ones(len(X[feature_index]))]).T
w = np.linalg.lstsq(X_, Y, rcond=None)[0]
t = np.linspace(np.min(X[feature_index]),np.max(X[feature_index]),20)
plt.plot(X[feature_index],Y,'.')
plt.plot(t, w[0]*t + w[1], 'r')
plt.xlabel('Feature '+str(feature_index))
plt.ylabel('Y, Y-predicted')
plt.show()
plt.plot(Y,w[0]*X[feature_index] + w[1],'.')
plt.plot([-130, 200],[-130, 200],'k-')
plt.ylabel('Predicted Y')
plt.xlabel('Actual Y')
plt.show()
A = np.vstack([X, np.ones(len(X[0]))]).T
w = np.linalg.lstsq(A, Y, rcond=None)[0]
print('A.shape',A.shape)
print('w.shape',w.shape)
Y_= A @ w
plt.plot(Y,Y_,'.')
plt.plot([-130, 200],[-130, 200],'k-')
plt.ylabel('Predicted Y')
plt.xlabel('Actual Y')
plt.show()
A.shape (442, 11) w.shape (11,)