In [None]:
import os,sys
import numpy as np                                       # fast vectors and matrices
import matplotlib.pyplot as plt                          # plotting

from time import time

import torch

%matplotlib inline

In [None]:
os.environ['CUDA_VISIBLE_DEVICES']=''

In [None]:
def mse(y, y_hat):
    return torch.mean((y - y_hat)**2)

def forward(x, mu):
    return x*x + mu

In [None]:
n = 100

mu = torch.FloatTensor([5.])
x = torch.randn(n)
y = forward(x, mu) + torch.randn(x.size(0))

In [None]:
plt.scatter(x.numpy(),y.numpy())

In [None]:
muhat = torch.FloatTensor([0.])
muhat.requires_grad = True

learning_rate = .001

square_loss = []
estimate = []

print('square loss\tparameter')
for i in range(0, 5000):
    j = np.random.randint(n)
    
    yhat = forward(x[j], muhat)
    loss = mse(y[j], yhat)
    
    loss.backward()
    
    muhat.data -= learning_rate * muhat.grad
    muhat.grad.zero_()
    
    square_loss.append(float(loss))
    estimate.append(float(muhat))
    
    if i % 500 == 0:
        print('{:2f}\t{:2f}'.format(square_loss[-1],estimate[-1]))

In [None]:
def plot_results(square_loss, estimate):
    fig = plt.figure(figsize=(10, 7))
    fig.add_axes()

    ax1 = fig.add_subplot(121)
    ax2 = fig.add_subplot(122)

    for ax in [ax1, ax2]:
        ax.spines["top"].set_visible(False)
        ax.spines["right"].set_visible(False)
        ax.spines["left"].set_visible(False)
        ax.spines["bottom"].set_visible(False)
        ax.grid(color='b', linestyle='--', linewidth=0.5, alpha=0.3)
        ax.tick_params(direction='out', color='b', width='2')
        
    ax1.set_title('square loss')
    ax2.set_title('parameter value')
    ax1.plot(np.arange(len(square_loss)), square_loss)
    ax2.plot(np.arange(len(estimate)), estimate)
    
plot_results(square_loss, estimate)

# Built-in Optimizer

In [None]:
n = 100

mu = torch.FloatTensor([5.])
x = torch.randn(n)
y = forward(x, mu) + torch.randn(x.size(0))

muhat = torch.FloatTensor([0.])
muhat.requires_grad=True

learning_rate = .001
opt = torch.optim.SGD([muhat], lr=learning_rate)

square_loss = []
estimate = []

print('square loss\tparameter')
for i in range(0, 5000):
    j = np.random.randint(n)
    
    opt.zero_grad()
    yhat = forward(x[j], muhat)
    loss = mse(y[j], yhat)
    
    loss.backward()
    
    opt.step()
    
    square_loss.append(float(loss))
    estimate.append(float(muhat))
    
    if i % 500 == 0:
        print('{:2f}\t{:2f}'.format(square_loss[-1],estimate[-1]))

In [None]:
plot_results(square_loss, estimate)

# Batch Gradient Descent

In [None]:
n = 100

mu = torch.FloatTensor([5.])
x = torch.randn(n)
y = forward(x, mu) + torch.randn(x.size(0))

muhat = torch.FloatTensor([0.])
muhat.requires_grad=True

learning_rate = .001
opt = torch.optim.SGD([muhat], lr=learning_rate)

square_loss = []
estimate = []

print('square loss\tparameter')
for i in range(0, 5000):    
    opt.zero_grad()
    yhat = forward(x, muhat)
    loss = mse(y, yhat)
    
    loss.backward()
    
    opt.step()
    
    square_loss.append(float(loss))
    estimate.append(float(muhat))
    
    if i % 500 == 0:
        print('{:2f}\t{:2f}'.format(square_loss[-1],estimate[-1]))

In [None]:
plot_results(square_loss, estimate)