import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision.datasets as datasets
from torchvision import transforms
import torchvision.utils
from tqdm import tqdm
import matplotlib.pyplot as plt


from torchinfo import summary

class ConvNet(nn.Module):
    def __init__(self):
        super().__init__()
        
        self.conv1 = nn.Conv2d(1, 16, 3, 1, 1)  # in_channels, out_channels, kernel_size, stride, padding
        self.max1 = nn.MaxPool2d(2, 2, 0)  # kernel_size, stride, padding
        self.conv2 = nn.Conv2d(16, 32, 3, 1, 0)
        self.max2 = nn.MaxPool2d(2, 2, 1)
        self.conv3 = nn.Conv2d(32, 64, 1, 1, 0)
        self.conv4 = nn.Conv2d(64, 4, 5, 1, 0)
        self.flatten = nn.Flatten()
        self.linear1 = nn.Linear(36, 10)
    
    @property
    def trainable_layers(self):
        """A utility property to easily access a list of all model layers."""
        return [self.conv1, self.conv2, self.conv3, self.conv4, self.linear1]
        
    def forward(self, inputs):
        """Implements the forward pass."""
        x = self.conv1(inputs)
        x = self.max1(x)
        x = self.conv2(x)
        x = self.max2(x)
        x = self.conv3(x)
        x = self.conv4(x)
        x = self.flatten(x)
        x = self.linear1(x)
        return x

    def print_weight_shapes(self):
        """Utility function to print the shapes of weights in trainable layers."""
        for layer in self.trainable_layers:
            print(f"Weight shape: {layer.weight.shape}; Bias shape: {layer.bias.shape}")

conv_net = ConvNet()
batch_size = 64
summary(conv_net, input_size=(batch_size, 1, 28, 28))

==========================================================================================
Layer (type:depth-idx)                   Output Shape              Param #
==========================================================================================
├─Conv2d: 1-1                            [64, 16, 28, 28]          160
├─MaxPool2d: 1-2                         [64, 16, 14, 14]          --
├─Conv2d: 1-3                            [64, 32, 12, 12]          4,640
├─MaxPool2d: 1-4                         [64, 32, 7, 7]            --
├─Conv2d: 1-5                            [64, 64, 7, 7]            2,112
├─Conv2d: 1-6                            [64, 4, 3, 3]             6,404
├─Flatten: 1-7                           [64, 36]                  --
├─Linear: 1-8                            [64, 10]                  370
==========================================================================================
Total params: 13,686
Trainable params: 13,686
Non-trainable params: 0
Total mult-adds (M): 0.93
==========================================================================================
Input size (MB): 0.20
Forward/backward pass size (MB): 10.41
Params size (MB): 0.05
Estimated Total Size (MB): 10.67
==========================================================================================


conv_net.print_weight_shapes()

Weight shape: torch.Size([16, 1, 3, 3]); Bias shape: torch.Size([16])
Weight shape: torch.Size([32, 16, 3, 3]); Bias shape: torch.Size([32])
Weight shape: torch.Size([64, 32, 1, 1]); Bias shape: torch.Size([64])
Weight shape: torch.Size([4, 64, 5, 5]); Bias shape: torch.Size([4])
Weight shape: torch.Size([10, 36]); Bias shape: torch.Size([10])


class FCNet(nn.Module):
    def __init__(self):
        super().__init__()
        
        self.fc1 = nn.Linear(28 * 28, 16)
        self.fc2 = nn.Linear(16, 10)

    def forward(self, x):
        x = torch.flatten(x, 1) # flatten all dimensions except batch
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        return x


fc_net = FCNet()
summary(fc_net)
summary(fc_net, input_size=(batch_size, 1, 28,28))

==========================================================================================
Layer (type:depth-idx)                   Output Shape              Param #
==========================================================================================
├─Linear: 1-1                            [64, 16]                  12,560
├─Linear: 1-2                            [64, 10]                  170
==========================================================================================
Total params: 12,730
Trainable params: 12,730
Non-trainable params: 0
Total mult-adds (M): 0.01
==========================================================================================
Input size (MB): 0.20
Forward/backward pass size (MB): 0.01
Params size (MB): 0.05
Estimated Total Size (MB): 0.26
==========================================================================================


mnist_train = datasets.MNIST(root="./data", train=True, download=True, transform=transforms.ToTensor())
mnist_test = datasets.MNIST(root="./data", train=False, download=True, transform=transforms.ToTensor())


# Construct the DataLoader and show one batch
train_loader = torch.utils.data.DataLoader(mnist_train, batch_size=64, shuffle=True)
test_loader = torch.utils.data.DataLoader(mnist_test, batch_size=64, shuffle=True)

# Show one batch of images. Each batch of images has shape [batch_size, 1, 28, 28],
# where 1 is the "channels" dimension of the image.
for images,labels in train_loader:
    grid_img = torchvision.utils.make_grid(images)
    plt.imshow(grid_img.permute(1, 2, 0))
    plt.title("A single batch of images")
    break


import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer_cnn = optim.SGD(conv_net.parameters(), lr=0.001, momentum=0.9)
optimizer_fc = optim.SGD(fc_net.parameters(), lr=0.001, momentum=0.9)


from tqdm import tqdm

print_every_iters = 150

for epoch in range(2):  # loop over the dataset multiple times

    fc_total_loss = 0.0
    cnn_total_loss = 0.0
    
    for i, data in tqdm(enumerate(train_loader, 0)):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data

        # zero the parameter gradients
        optimizer_cnn.zero_grad()
        optimizer_fc.zero_grad()

        # forward + backward + optimize on CNN
        outputs = conv_net(inputs)
        cnn_loss = criterion(outputs, labels)
        cnn_loss.backward()
        optimizer_cnn.step()
        
        # forward + backward + optimize on FC
        outputs = conv_net(inputs)
        fc_loss = criterion(outputs, labels)
        fc_loss.backward()
        optimizer_cnn.step()

        # print statistics
        fc_total_loss += fc_loss.item()
        cnn_total_loss += cnn_loss.item()
        if i % print_every_iters == 0:    # print train loss every 100 mini-batches
            print('[epoch %d, step %5d] cnn train loss: %.3f' %
                  (epoch + 1, i + 1, cnn_total_loss / (print_every_iters * (i+1))))
            print('[epoch %d, step %5d] fc train loss: %.3f' %
                  (epoch + 1, i + 1, fc_total_loss / (print_every_iters * (i+1))))

print('Finished Training')

5it [00:00, 19.72it/s]

[epoch 1, step     1] cnn train loss: 0.015
[epoch 1, step     1] fc train loss: 0.015

155it [00:06, 24.82it/s]

[epoch 1, step   151] cnn train loss: 0.014
[epoch 1, step   151] fc train loss: 0.014

305it [00:12, 24.63it/s]

[epoch 1, step   301] cnn train loss: 0.009
[epoch 1, step   301] fc train loss: 0.009

455it [00:18, 25.00it/s]

[epoch 1, step   451] cnn train loss: 0.007
[epoch 1, step   451] fc train loss: 0.007

605it [00:24, 25.84it/s]

[epoch 1, step   601] cnn train loss: 0.006
[epoch 1, step   601] fc train loss: 0.006


fc_total_loss = 0.0
cnn_total_loss = 0.0

for i, data in tqdm(enumerate(test_loader, 0)):
    inputs, labels = data
    cnn_loss = criterion(conv_net(inputs), labels)
    fc_loss = criterion(fc_net(inputs), labels)
    fc_total_loss += fc_loss.item()
    cnn_total_loss += cnn_loss.item()
    
print(f"CNN test loss is {cnn_total_loss/len(test_loader)}")
print(f"FC net test loss is {fc_total_loss/len(test_loader)}")

157it [00:02, 75.85it/s]

CNN test loss is 0.10700725481673411
FC net test loss is 2.3106368286594465

Convolutional Neural Networks¶

Convolutions for Images¶

Convolutional Layers¶

Padding¶

Stride¶

Pooling¶

Convolution In A Neural Network¶