!pip install torchviz

from collections import OrderedDict

import matplotlib.pyplot as plt
import numpy as np

import torch
import torch.nn as nn
import torch.nn.functional as F
from torchviz import make_dot

Collecting torchviz
  Downloading https://files.pythonhosted.org/packages/8f/8e/a9630c7786b846d08b47714dd363a051f5e37b4ea0e534460d8cdfc1644b/torchviz-0.0.1.tar.gz (41kB)
     |████████████████████████████████| 51kB 2.0MB/s 
Requirement already satisfied: torch in /usr/local/lib/python3.6/dist-packages (from torchviz) (1.7.0+cu101)
Requirement already satisfied: graphviz in /usr/local/lib/python3.6/dist-packages (from torchviz) (0.10.1)
Requirement already satisfied: typing-extensions in /usr/local/lib/python3.6/dist-packages (from torch->torchviz) (3.7.4.3)
Requirement already satisfied: numpy in /usr/local/lib/python3.6/dist-packages (from torch->torchviz) (1.18.5)
Requirement already satisfied: future in /usr/local/lib/python3.6/dist-packages (from torch->torchviz) (0.16.0)
Requirement already satisfied: dataclasses in /usr/local/lib/python3.6/dist-packages (from torch->torchviz) (0.7)
Building wheels for collected packages: torchviz
  Building wheel for torchviz (setup.py) ... done
  Created wheel for torchviz: filename=torchviz-0.0.1-cp36-none-any.whl size=3520 sha256=61677756d0e5bc2ba5318121bc5cb2430c9be79ac3c5c17e0400751433f8d094
  Stored in directory: /root/.cache/pip/wheels/2a/c2/c5/b8b4d0f7992c735f6db5bfa3c5f354cf36502037ca2b585667
Successfully built torchviz
Installing collected packages: torchviz
Successfully installed torchviz-0.0.1


def f(x):
  return x ** 2

def f_prime_analytical(x):
  return 2 * x


x = torch.tensor([3.5], requires_grad=True)
y = f(x)

y

tensor([12.2500], grad_fn=<PowBackward0>)


make_dot(y, params={'x': x, 'y': y})


y.backward()
x.grad, f_prime_analytical(x)

(tensor([7.]), tensor([7.], grad_fn=<MulBackward0>))


with torch.no_grad():
    no_grad_y = f_prime_analytical(x)

no_grad_y

tensor([7.])


# f(x, y) implemented as a one-liner
def f_all_in_one(x, y):
  return 2 * (x ** 3) + (6 * torch.exp(-y)) / (1 + x ** 2 + y ** 2)

# Declare the variables here so that we can use them later on
z_1 = None
z_2 = None
z_3 = None

# f(x, y) implemented using intermediate variables
def f_piece_by_piece(x, y):
  global z_1, z_2, z_3
  z_1 = torch.exp(-y)
  z_1.retain_grad()
  z_2 = 1 + x ** 2 + y ** 2
  z_2.retain_grad()
  z_3 = x ** 3
  z_3.retain_grad()
  z_4 = 6 * z_1 / z_2 + 2 * z_3
  z_4.retain_grad()
  return z_4


x = torch.randn(1)
y = torch.randn(1)
x, y

(tensor([0.9829]), tensor([1.8251]))


with torch.no_grad():
  # Make sure that the all-in-one closed form solution and the piece-by-piece solution are equal
  print(f_all_in_one(x, y), f_piece_by_piece(x, y))

tensor([2.0817]) tensor([2.0817])


x = torch.randn(1, requires_grad=True)
y = torch.randn(1, requires_grad=True)

x, y

(tensor([2.], requires_grad=True), tensor([4.], requires_grad=True))


fxy_piece = f_piece_by_piece(x, y)
make_dot(fxy_piece, params={'x': x, 'y': y, 'z_1': z_1})


# Closed-form definitions of gradients
def dfdx_analytical(x, y):
  return 6 * (x ** 2) - (12 * x * torch.exp(-y)) / ((x ** 2 + y ** 2 + 1) ** 2)

def dfdy_analytical(x, y):
  return - (6 * torch.exp(-y) * (x ** 2 + (y + 1) ** 2)) / ((x ** 2 + y ** 2 + 1) ** 2)


fxy_piece.backward()


with torch.no_grad():
  dz4z3 = torch.FloatTensor([2])
  dz4z2 = -6 * z_1 / (z_2 ** 2)
  dz4z1 = 6 / z_2
  dz3x = 3 * (x ** 2)
  dz2x = 2 * x
  dz2y = 2 * y
  dz1y = -torch.exp(-y)

  dz4x = z_3.grad * dz3x + z_2.grad * dz2x
  dz4y = z_2.grad * dz2y + z_1.grad * dz1y

  print("Variable\tPyTorch\t\t\tAutodiff\t\tAnalytical")
  print(f"dz4/dz3\t\t{z_3.grad}\t\t{dz4z3}")
  print(f"dz4/dz2\t\t{z_2.grad}\t{dz4z2}")
  print(f"dz4/dz1\t\t{z_1.grad}\t{dz4z1}")
  print(f"dz4/dx\t\t{x.grad}\t{dz4x}\t{dfdx_analytical(x, y)}")
  print(f"dz4/dy\t\t{y.grad}\t{dz4y}\t{dfdy_analytical(x, y)}")

Variable	PyTorch			Autodiff		Analytical
dz4/dz3		tensor([2.])		tensor([2.])
dz4/dz2		tensor([-0.0002])	tensor([-0.0002])
dz4/dz1		tensor([0.2857])	tensor([0.2857])
dz4/dx		tensor([23.9990])	tensor([23.9990])	tensor([23.9990])
dz4/dy		tensor([-0.0072])	tensor([-0.0072])	tensor([-0.0072])


model = nn.Sequential(OrderedDict([
          ('linear1', nn.Linear(3, 3)),
          ('sig1', nn.Sigmoid()),
          ('linear2', nn.Linear(3, 1)),
          ('sig2', nn.Sigmoid())
        ]))

model.requires_grad_()

model

Sequential(
  (linear1): Linear(in_features=3, out_features=3, bias=True)
  (sig1): Sigmoid()
  (linear2): Linear(in_features=3, out_features=1, bias=True)
  (sig2): Sigmoid()
)


# Not requiring grad here!  We don't need to change our inputs.
x = torch.randn(3)
x

tensor([-0.1691, -0.2432,  0.2890])


y = model(x)

make_dot(y, params=dict([('x', x)] + list(model.named_parameters())))


y.backward()

for name, param in model.named_parameters():
    print(name)
    print(param.grad)
    print(param.grad.shape == param.shape)
    print('\n')

print(f"x.grad: {x.grad}")

linear1.weight
tensor([[-0.0011,  0.0057, -0.0029],
        [ 0.0043, -0.0191,  0.0092],
        [ 0.0085, -0.0370,  0.0177]])
True


linear1.bias
tensor([-0.0079,  0.0246,  0.0473])
True


linear2.weight
tensor([[0.3100, 0.2712, 0.1890]])
True


linear2.bias
tensor([0.4511])
True


x.grad: None


%matplotlib inline

d = 1
n = 200
X = torch.rand(n,d)
y = 4 * torch.sin(np.pi * X) * torch.cos(6*np.pi*X**2)

plt.scatter(X.numpy(), y.numpy())
plt.title('plot of $f(x)$')
plt.xlabel('$x$')
plt.ylabel('$y$')

plt.show()


# feel free to play with these parameters

step_size = 0.05
n_epochs = 6000
n_hidden_1 = 32
n_hidden_2 = 32
d_out = 1

neural_network = nn.Sequential(
                            nn.Linear(d, n_hidden_1), 
                            nn.Tanh(),
                            nn.Linear(n_hidden_1, n_hidden_2),
                            nn.Tanh(),
                            nn.Linear(n_hidden_2, d_out)
                            )

loss_func = nn.MSELoss()

optim = torch.optim.SGD(neural_network.parameters(), lr=step_size)
print('iter,\tloss')
for i in range(n_epochs):
    y_hat = neural_network(X)
    loss = loss_func(y_hat, y)
    optim.zero_grad()
    loss.backward()
    optim.step()
    
    if i % (n_epochs // 10) == 0:
        print('{},\t{:.2f}'.format(i, loss.item()))

iter,	loss
0,	3.53
600,	3.24
1200,	1.68
1800,	1.21
2400,	0.95
3000,	0.63
3600,	0.42
4200,	0.18
4800,	0.13
5400,	0.12


X_grid = torch.from_numpy(np.linspace(0,1,50)).float().view(-1, d)
y_hat = neural_network(X_grid)
plt.scatter(X.numpy(), y.numpy())
plt.plot(X_grid.detach().numpy(), y_hat.detach().numpy(), 'r')
plt.title('plot of $f(x)$ and $\hat{f}(x)$')
plt.xlabel('$x$')
plt.ylabel('$y$')
plt.show()


loss = nn.CrossEntropyLoss()

input = torch.tensor([[-1., 1],[-1, 1],[1, -1]]) # raw scores correspond to the correct class
# input = torch.tensor([[-3., 3],[-3, 3],[3, -3]]) # raw scores correspond to the correct class with higher confidence
# input = torch.tensor([[1., -1],[1, -1],[-1, 1]]) # raw scores correspond to the incorrect class
# input = torch.tensor([[3., -3],[3, -3],[-3, 3]]) # raw scores correspond to the incorrect class with incorrectly placed confidence

target = torch.tensor([1, 1, 0])
output = loss(input, target)
print(output)

tensor(0.1269)

Preamble¶

Backpropagation and Computation Graphs in PyTorch¶

A More Complex Function¶

Neural Network¶

Training a Neural Network¶

CrossEntropyLoss¶