# Code to estimate bias/variance for varying model complexities

from __future__ import division
import numpy as np
import matplotlib.pyplot as plt

WEIGHTS = np.array([10,0,-1,0,0.01])
SIGMA   = 12
BOUNDS  = 12
NUM_DATASETS = 12

# Expands vector of x's using our polynomial basis
def expand_poly_basis(xs,order):
    X = np.ones((xs.size,order+1))
    for i in range(order):
        X[:,i+1] = X[:,i]*xs
    return X

# Evaluates a polynomial on all points in the vector 'xs'
# Can evaluate multiple polynomials if weights has multiple columns
def eval_poly(xs,weights):
    order = weights.shape[0] - 1
    X = expand_poly_basis(xs,order)
    return X.dot(weights)

# Generates 'num' sets of target data using Gaussian noise
def gen_random_targets(xs,weights,sigma,num):
    noise = np.random.normal(0,sigma,(num,xs.size))
    return (eval_poly(xs,weights) + noise).T

# Plots a polynomial given its coefficients
def plot_poly(weights,bounds,color='k--',lw=1,label=None):
    xs = np.linspace(-bounds,bounds)
    ys = eval_poly(xs,weights)
    plt.plot(xs,ys,color,lw=lw,label=label)

# Solves linear regression problems for a number of data/target pairs.
# Assumes multiple sets of target data for one set of 'xs'
def solve(xs,Y,order):
    expanded = expand_poly_basis(xs,order)
    weights,_,_,_ = np.linalg.lstsq(expanded,Y)
    return weights


if __name__ == '__main__':

    # Default 'macosx' backend is really annoying
    plt.switch_backend('tkagg')
    
    xs_train = np.linspace(-BOUNDS,BOUNDS,BOUNDS+1)
    xs_test  = np.linspace(-BOUNDS+1,BOUNDS-1,BOUNDS)

    # Describe how this process will work, we draw random datasets
    # and fit a polynomial to each one
    Y_train = gen_random_targets(xs_train,WEIGHTS,SIGMA,4)
    W = solve(xs_train,Y_train,2)
    colors  = ['b','g','c','m']
    for i in range(4):
        plot_poly(WEIGHTS,BOUNDS+1,'k',2,'Ground Truth')
        plot_poly(W[:,i],BOUNDS+1,colors[i],2,'Learned Function')
        plt.scatter(xs_train,Y_train[:,i],c=colors[i],s=40)
        plt.legend()
        plt.xlim(-BOUNDS-1,BOUNDS+1)
        plt.ylim(-50,250)
        plt.get_current_fig_manager().full_screen_toggle()
        plt.show()

    # We average them together to get our w_bar, and we'll
    # compute the bias/variance using the original set of 'xs'
    w_bar = W.mean(axis=1)
    plot_poly(WEIGHTS,BOUNDS+1,'k',2,'Ground Truth')
    for x in xs_test:
        plt.axvline(x=x,ls='--',c='k')
    for i in range(4):
        plot_poly(W[:,i],BOUNDS+1,colors[i],label='Learned Function')
        plt.scatter(xs_train,Y_train[:,i],c=colors[i],s=40)
    plot_poly(w_bar,BOUNDS+2,'r',4,'Average Function')
    plt.legend()
    plt.xlim(-BOUNDS-1,BOUNDS+1)
    plt.ylim(-50,250)
    plt.get_current_fig_manager().full_screen_toggle()
    plt.show()

    # Do this process for increasing model complexity, using many random
    # datasets. We don't distinguish the datasets via color anymore
    Y_train = gen_random_targets(xs_train,WEIGHTS,SIGMA,NUM_DATASETS)
    y_true = eval_poly(xs_test,WEIGHTS)
    orders = np.arange(1,BOUNDS)
    biases = np.empty_like(orders)
    variances = np.empty_like(orders)

    for i in range(orders.size):
        # Solve the linear regression for each dataset
        W = solve(xs_train,Y_train,orders[i])
        w_bar = W.mean(axis=1)

        # Compute the bias/variance
        Y_test = eval_poly(xs_test,W)
        y_bar  = eval_poly(xs_test,w_bar)
        resid  = y_true - y_bar
        biases[i] = np.mean(resid**2)
        resid  = Y_test - y_bar[:,np.newaxis] 
        variances[i] = np.mean(resid**2)

        # Plot the learned functions/average
        plt.figure(figsize=(10,6))
        plt.subplot(1,2,1)
        plt.title("Order = {}".format(orders[i]))
        plot_poly(WEIGHTS,BOUNDS+1,'k',4,'Ground Truth')
        plot_poly(w_bar,BOUNDS+1,'r',4,'Average')
        for j in range(NUM_DATASETS):
            plt.scatter(xs_train,Y_train[:,j],c='k',s=40)
            plot_poly(W[:,j],BOUNDS+1)
        plt.legend()
        plt.xlim(-BOUNDS-1,BOUNDS+1)
        plt.ylim(-50,250)

        # Plot the bias/variance
        plt.subplot(1,2,2)
        plt.title("Bias/Variance")
        plt.plot(orders[:i+1],biases[:i+1],'b-',lw=2,label='Bias')
        plt.plot(orders[:i+1],variances[:i+1],'r-',lw=2,label='Variance')
        plt.scatter(orders[:i+1],biases[:i+1],c='b',s=30)
        plt.scatter(orders[:i+1],variances[:i+1],c='r',s=30)
        plt.legend()
        plt.xlim(0,orders[-1]+1)
        plt.ylim(0,600)
        plt.get_current_fig_manager().full_screen_toggle()
        plt.show()