we consider degree-5 polynomial model (for ground truth) with additive Gaussian noise
# generate training data
import numpy as np
n = 40 # sample size
x = np.random.uniform(-1,1,n) #sample input data
x = np.sort(x)
x[0]=-1
x[n-1]=1
# ground truth is 5-th order polynomial and we add Gaussian noise to it
y = (x-.99)*(x-.4)*(x-.25)*(x+.6)*(x+.8) + .03*np.random.randn(n)
# generate test data
n_ = 100
x_ = np.random.uniform(-1,1,n_)
y_ = (x_-.99)*(x_-.4)*(x_-.25)*(x_+.6)*(x_+.8) + .03*np.random.randn(n_)
# plot the samples and grounstruth
t = np.linspace(-1,1,100)
y0 = (t-.99)*(t-.4)*(t-.25)*(t+.6)*(t+.8)
import matplotlib.pyplot as plt
fig=plt.figure(figsize=(9, 8), dpi= 80, facecolor='w', edgecolor='k')
plt.plot(x,y,'o',label='train dataset')
plt.plot(x_,y_,'ro',label='test dataset')
plt.plot(t,y0,'k-')
plt.legend()
plt.show()
In typical scenarios, we only have one set of samples, which we separate into $S_{\rm test}$ and $S_{\rm train}$
However, in order to understand how the test error behaves (theoretically), we consider the expected test error, and call it true error: i.e. ${\cal L}_{\rm true} = {\mathbb E}[{\cal L}_{\rm test}]$
In order to compute the true error, we simulate a process where we get many fresh samples, and train new predictor each time with the fresh set of samples. It is important to understand that the resulting predictor $f_{S_{\rm train}}(\cdot)$ is a random function, where the randomness coems from the fresh random training data set $S_{\rm train}$. We will draw many such random functions, and plot them AND see how test, train, true errors behave.
num_runs = 100
yhat_simple = np.zeros((num_runs,len(t)))
yhat_complex = np.zeros((num_runs,len(t)))
yhat_justright = np.zeros((num_runs,len(t)))
run=0
while run < num_runs:
n = 40 # sample size
x = np.random.uniform(-1,1,n) #sample input data
x[0]=-1
x[n-1]=1
y = (x-.99)*(x-.4)*(x-.25)*(x+.6)*(x+.8) + .03*np.random.randn(n)
# degree-3 polynomial linear regression
p=3
X = np.vstack([np.ones(len(x)),x,x**2,x**3]).T
w = np.matmul(np.matmul(np.linalg.inv(np.matmul(X.T,X)),X.T),y)
T_ = np.vstack([np.ones(len(t)),t,t**2,t**3]).T
yhat_simple[int(run)] = np.matmul(T_,w)
yh = np.matmul(X,w)
X_ = np.vstack([np.ones(len(x_)),x_,x_**2,x_**3]).T
y_h= np.matmul(X_,w)
w_ = w
# degree-5 polynomial linear regression
p=5
X = np.vstack([np.ones(len(x)),x,x**2,x**3,x**4,x**5]).T
w = np.matmul(np.matmul(np.linalg.inv(np.matmul(X.T,X)),X.T),y)
T_ = np.vstack([np.ones(len(t)),t,t**2,t**3,t**4,t**5]).T
yhat_justright[int(run)] = np.matmul(T_,w)
yh = np.matmul(X,w)
X_ = np.vstack([np.ones(len(x_)),x_,x_**2,x_**3,x_**4,x_**5]).T
y_h= np.matmul(X_,w)
w_ = w
# degree-15 polynomial linear regression
p=15
X = np.vstack([np.ones(len(x)),x,x**2,x**3,x**4,x**5,x**6,x**7,x**8,x**9,x**10,x**11,x**12,x**13,x**14,x**15]).T
w = np.array(p+1)
w = np.matmul(np.matmul(np.linalg.inv(np.matmul(X.T,X)),X.T),y)
T_ = np.vstack([np.ones(len(t)),t,t**2,t**3,t**4,t**5,t**6,t**7,t**8,t**9,t**10,t**11,t**12,t**13,t**14,t**15]).T
yhat_complex[int(run)] = np.matmul(T_,w)
yh1 = np.matmul(X,w)
X_ = np.vstack([np.ones(len(x_)),x_,x_**2,x_**3,x_**4,x_**5,x_**6,x_**7,x_**8,x_**9,x_**10,x_**11,x_**12,x_**13,x_**14,x_**15]).T
y_h1= np.matmul(X_,w)
run=run+1
import matplotlib.pyplot as plt
import seaborn as sns
def build_plot(yhat, title):
ave_fit = np.zeros(np.size(t))
for irun in range(1, num_runs):
plt.plot(t,yhat[int(irun)],'g-',alpha=0.3)
ave_fit = ave_fit + yhat[int(irun)]
plt.plot(t,yhat[0],'g-',alpha=0.3,label='Individual fits')
plt.plot(t,y0,'k-',label='Truth')
plt.plot(t,(1/float(num_runs))*ave_fit,'r-',label='Average fit')
plt.legend()
plt.title(title)
axes = plt.gca()
axes.set_ylim([-0.4,0.2])
fig=plt.figure(figsize=(12, 6), dpi= 80, facecolor='w', edgecolor='k')
plt.subplot(1,3,1)
build_plot(yhat_simple, 'Degree 3')
plt.subplot(1,3,2)
build_plot(yhat_justright, 'Degree 5')
plt.subplot(1,3,3)
build_plot(yhat_complex, 'Degree 15')
plt.show()