import numpy as np
filename = "diabetes.txt"
data = np.loadtxt(filename)
X = data.T[0:10]
Y = data.T[10]
X.shape
(10, 442)
import matplotlib.pyplot as plt
for i in range(10):
if (i%2==0): plt.figure(figsize=(10,3))
plt.subplot(1,2,2-(i+1)%2)
X_ = np.vstack([X[i], np.ones(len(X[i]))]).T
w = np.linalg.lstsq(X_, Y, rcond=None)[0]
t = np.linspace(np.min(X[i]),np.max(X[i]),20)
plt.plot(X[i],Y,'.')
plt.plot(t, w[0]*t + w[1], 'r')
MSE = np.mean( (Y - X_ @ w)**2 )
plt.xlabel('Feature '+str(i))
plt.ylabel('Y, Y-predicted')
plt.title('MSE using feature %d alone: %.2f' % (i,MSE))
if ((i+1)%2==0): plt.show()
feature_index = 2
X_ = np.vstack([X[feature_index], np.ones(len(X[feature_index]))]).T
w = np.linalg.lstsq(X_, Y, rcond=None)[0]
t = np.linspace(np.min(X[feature_index]),np.max(X[feature_index]),20)
plt.plot(X[feature_index],Y,'.')
plt.plot(t, w[0]*t + w[1], 'r')
plt.xlabel('Feature '+str(feature_index))
plt.ylabel('Y, Y-predicted')
plt.show()
plt.plot(Y,w[0]*X[feature_index] + w[1],'.')
plt.plot([-130, 200],[-130, 200],'k-')
plt.ylabel('Predicted Y')
plt.xlabel('Actual Y')
plt.show()
A = np.vstack([X, np.ones(len(X[0]))]).T
w = np.linalg.lstsq(A, Y, rcond=None)[0]
print('A.shape',A.shape)
print('w.shape',w.shape)
Y_= A @ w
plt.plot(Y,Y_,'.')
plt.plot([-130, 200],[-130, 200],'k-')
plt.ylabel('Predicted Y')
plt.xlabel('Actual Y')
plt.show()
A.shape (442, 11) w.shape (11,)