In this assignment you will learn about
Note: When you first load this colab webpage, it will be in read-only viewing mode. To edit and run code, you can either (a) download the Jupyter notebook ("File" -> "Download .ipynb") to run on your local computer or (b) copy to your Google Drive ("File" -> "Save a copy in Drive...") to work in the browser and run on a Google Cloud GPU. If you run locally, you will need to install Tensorflow and it is recommended that you use a GPU for problem 3.2. If you do not want to use Colab and do not have a local GPU, please let us know.
Given the following training set of labeled two-dimensional points for binary classification, draw a Voronoi diagram of the output of a 1-nearest neighbor classifier. Feel free to render the diagram using Python below (do not use scikit-learn or any machine learning libraries to do this) or submit a PDF along with your assignment.
Point (x,y) | Label -------------|------- (1,3) | + (-4,-2) | + (-3,-1.5) | - (3,3) | - (0,-2) | + (-2,0) | + (-2,4) | -
import matplotlib.pyplot as plt
import numpy as np
## Can render diagram using Python here, if you would like.
Render for 3-NN
In this section we provide a working example of a convolutional neural network written using basic numpy operations. Each neural network operation is represented by a Python class with methods forward() and backward(), which compute activations and gradients, respectively. Your task is to complete certain methods that are left blank.
- Forward
- Backward (10 points)
- Forward (5 points)
- Backward
- Forward
- Backward (5 points)
- Forward (10 points)
- Backward
When you complete an operation, you can check your work by executing its cell. We compare the outputs of your method to that of Tensorflow.
Finally, when you have all of the operations completed, you can run a small network for a few iterations of stochastic gradient descent and plot the loss.
#@title (Hidden utility code: RUN ME FIRST) { display-mode: "form" }
import tensorflow as tf
import numpy as np
class Variable:
"""Placeholder for labels and input images"""
value = 0
def cmp_ops(your_op, tf_op, tf_inputs, tf_weights=None):
your_op.forward()
your_op_f_out = your_op.value
with tf.Session().as_default():
tf_op_f_out = tf_op.eval()[0] # Remove the batch dimension
print("Forward pass:")
cmp_tensors(your_op_f_out, tf_op_f_out, verbose=False)
your_op.inputs.dloss_dvalue = np.zeros(your_op.inputs.value.shape)
your_op.dloss_dvalue = np.ones(your_op.value.shape)
your_op.backward()
your_op_g_inputs = your_op.inputs.dloss_dvalue
if tf_weights is not None:
your_op_g_weights = your_op.dloss_dweights
g_inputs, g_weights = tf.gradients(tf.reduce_sum(tf_op), [tf_inputs, tf_weights])
with tf.Session() as sess:
tf_g_inputs_out, tf_g_weights_out = sess.run([g_inputs, g_weights])
tf_g_weights_out = np.transpose(tf_g_weights_out, [3,0,1,2])
print("Gradient wrt inputs:")
cmp_tensors(your_op_g_inputs, tf_g_inputs_out[0])
print("Gradient wrt weights:")
cmp_tensors(your_op_g_weights, tf_g_weights_out)
else:
g_inputs = tf.gradients(tf.reduce_sum(tf_op), [tf_inputs])
with tf.Session() as sess:
tf_g_inputs_out = sess.run(g_inputs)
print("Gradient wrt inputs:")
cmp_tensors(your_op_g_inputs, tf_g_inputs_out[0], verbose=False)
def cmp_tensors(yours, tfs, verbose=False):
print(" Your Op shape: " + str(yours.shape))
print(" TensorFlow Op shape: " + str(tfs.shape))
print(" Values equal: " + str(np.allclose(tfs, yours, atol=1e-6)))
if verbose:
print(tfs)
print(yours)
inputs = Variable()
inputs.value = np.random.normal(size=(10, 10, 3)) # Input image is 10x10x3
tf_inputs = tf.constant(inputs.value[np.newaxis, ...], dtype=tf.float32)
import numpy as np
"""rows x cols x filters"""
class OpConv2D:
"""Two-dimensional convolutional layer"""
def __init__(self, filters, kernel_size, inputs):
# Shape of the input feature map
input_height = inputs.value.shape[0]
input_width = inputs.value.shape[1]
input_filters = inputs.value.shape[2]
# Shape of this layer's feature map
self.height = input_height - kernel_size + 1
self.width = input_width - kernel_size + 1
self.filters = filters
self.inputs = inputs
self.kernel_size = kernel_size
self.weights = np.random.normal(size=(filters, kernel_size, kernel_size, input_filters), scale=0.1)
self.reset_values()
def reset_values(self):
self.value = np.zeros((self.height, self.width, self.filters))
self.dloss_dvalue = np.zeros(self.value.shape)
self.dloss_dweights = np.zeros(self.weights.shape)
def forward(self):
# Reset value and gradient at start of forward pass
self.reset_values()
for y in range(self.height):
for x in range(self.width):
for f in range(self.filters):
z = 0.0
for ky in range(self.kernel_size):
for kx in range(self.kernel_size):
for kf in range(self.weights.shape[3]):
z += self.inputs.value[y+ky, x+kx, kf] * self.weights[f, ky, kx, kf]
self.value[y, x, f] = z
def backward(self):
## Complete this method, which sets:
## 1. Partial derivative of the loss with respect to the values of the inputs
## self.inputs.dloss_dvalue, which is a `height x width x input_filters` tensor
## 2. Partial derivative of the loss with respect to the weights
## self.dloss_dweights, which is a `filters x kernel_size x kernel_size x input_filters` tensor
##
## This will utilize tensors:
## 1. The partial with respect to the value of this layer
## self.dloss_dvalue, a `height x width x filter` tensor
## 2. The weights of this layer
## self.weights, a `filters x kernel_size x kernel_size x input_filters` tensor
## 3. The value of the input layer
## self.inputs.value, a `height x width x input_filters` tensor
pass
def gradient_step(self, step_size):
self.weights -= step_size * self.dloss_dweights
# Double check that op matches tensorflow
print("Testing Conv2D...")
op1 = OpConv2D(4, 3, inputs)
tf_weights = tf.constant(np.transpose(op1.weights, [1,2,3,0]), dtype=tf.float32)
tf_op1 = tf.nn.conv2d(tf_inputs,
tf_weights,
[1,1,1,1],
'VALID')
cmp_ops(op1, tf_op1, tf_inputs, tf_weights)
class OpRelu:
"""Elementwise relu operator"""
def __init__(self, inputs):
# Shape of the input feature map
self.input_shape = inputs.value.shape
self.inputs = inputs
self.reset_values()
def reset_values(self):
self.value = np.zeros(self.inputs.value.shape)
self.dloss_dvalue = np.zeros(self.inputs.value.shape)
def forward(self):
# Reset value and gradient at start of forward pass
self.reset_values()
## Complete this code by setting self.value using self.inputs.value
def backward(self):
self.inputs.dloss_dvalue = self.dloss_dvalue * np.greater(self.value, 0.0)
def gradient_step(self, step_size):
pass
# Double check that each op matches tensorflow
print("\nTesting Relu...")
op2 = OpRelu(inputs)
tf_op2 = tf.nn.relu(tf_inputs)
cmp_ops(op2, tf_op2, tf_inputs)
class OpAvgPool:
"""Average pooling layer. Non-overlapping cells."""
def __init__(self, cell_size, inputs):
# Shape of the input feature map
self.input_height = inputs.value.shape[0]
self.input_width = inputs.value.shape[1]
self.input_filters = inputs.value.shape[2]
# Shape of this layer's feature map
self.height = (self.input_height + cell_size - 1) / cell_size
self.width = (self.input_width + cell_size - 1) / cell_size
self.filters = self.input_filters
self.inputs = inputs
self.cell_size = cell_size
self.reset_values()
def reset_values(self):
self.value = np.zeros((self.height, self.width, self.filters))
self.dloss_dvalue = np.zeros(self.value.shape)
def forward(self):
# Reset value and gradient at start of forward pass
self.reset_values()
for y in range(self.height):
for x in range(self.width):
for f in range(self.filters):
z = 0.0
for ky in range(min(self.cell_size, self.input_height - y*self.cell_size)):
for kx in range(min(self.cell_size, self.input_width - x*self.cell_size)):
z += self.inputs.value[self.cell_size*y+ky, self.cell_size*x+kx, f]
self.value[y, x, f] = z / (self.cell_size * self.cell_size)
def backward(self):
## Complete this method by setting the partial with repect to the values of the inputs
## self.inputs.dloss_dvalue, an `input_height x input_width x filters` tensor
## This will use the partial with respect to the value of this layer
## self.dloss_dvalue, a `height x width x filters` tensor
pass
def gradient_step(self, step_size):
pass
# Double check that each op matches tensorflow
print("\nTesting AvgPool...")
op3 = OpAvgPool(2, inputs)
tf_op3 = tf.nn.avg_pool(tf_inputs, [1, 2, 2, 1], [1,2,2,1], "VALID")
cmp_ops(op3, tf_op3, tf_inputs)
class OpSoftmaxCrossEntropyLoss:
"""Cross-entropy loss."""
def __init__(self, logits, true_label):
"""
inputs:
logits: shape [1,1,num_classes]
true_label: scalar in range [0, num_classes-1]
"""
# Shape of the input feature map
self.num_classes = logits.value.shape[2]
self.inputs = logits
self.true_label = true_label
def reset_values(self):
self.max_label = 0
self.value = np.zeros((1,))
self.softmax_prob = np.zeros((self.num_classes,))
def forward(self):
# Reset value and gradient at start of forward pass
self.reset_values()
## Complete this method by:
## (1) setting self.value to the scalar value of the
## negative log probability of the true class under a Softmax distribution.
## Loss = -ln(exp(y_true) / sum_j (exp(y_j))), where y_j is the logits
## value for class j.
## (2) setting self.softmax_prob to the vector representing the probability
## of each class according to the Softmax distribution
## softmax_prob[j] = exp(y_i) / sum_j (exp(y_j)), where y_j is the logits
## value for class j.
## This will use
## self.inputs.value, a `1 x 1 x num_classes` tensor containing the logits
def backward(self):
# Loss = -ln(exp(y_true) / sum_j (exp(y_j)))
# dLoss/dYk = exp(y_k) / sum_j (exp(y_j))
# dLoss/dYtrue = exp(y_true) / sum_j (exp(y_j)) - 1
self.inputs.dloss_dvalue[0, 0, :] += self.softmax_prob
self.inputs.dloss_dvalue[0, 0, self.true_label.value] += -1
def gradient_step(self, step_size):
pass
# Double check that each op matches tensorflow
print("\nTesting Cross Entropy Loss...")
pooled = OpAvgPool(10, inputs)
pooled.forward()
tf_pooled = tf.nn.avg_pool(tf_inputs, [1, 10, 10, 1], [1,10,10,1], "VALID")
true_label = Variable()
op4 = OpSoftmaxCrossEntropyLoss(pooled, true_label)
tf_op4 = tf.nn.softmax_cross_entropy_with_logits_v2(logits=tf_pooled,
labels=tf.one_hot(tf.constant(0), 3))
cmp_ops(op4, tf_op4, tf_pooled)
Here we assemble all of our operations into a full convolutional neural network. We then run stochastic gradient descent on a small collection of ten images to ensure that the loss is decreasing.
Run this cell to plot 100 iterations of training. (5 pts)
Why is this plot jagged? What is it about our architecture or training procedure that causes this, and how might adjusting these factors change the shape of this curve? (5 pts)
from tensorflow.examples.tutorials.mnist import input_data
# Construct a mini network for MNIST
inputs = Variable()
true_label = Variable()
inputs.value = np.random.normal(size=(28, 28, 1))
inputs.dloss_dvalue = np.random.normal(size=(28, 28, 1))
op1 = OpConv2D(16, 5, inputs) # Output is 28-5+1=24
op2 = OpAvgPool(2, op1) # Output is 24/2=12
op3 = OpRelu(op2)
op4 = OpConv2D(16, 5, op3) # Output is 12-5+1=8
op5 = OpAvgPool(2, op4) # Output is 8/2=4
op6 = OpRelu(op5)
op7 = OpConv2D(10, 3, op6) # Output is 4-3+1=2
op8 = OpAvgPool(2, op7) # Output is 2/2=1
op9 = OpSoftmaxCrossEntropyLoss(op8, true_label)
ops_list = [op1,op2,op3,op4,op5,op6,op7,op8,op9]
# Run for a few iterations, make sure loss is going down
learning_rate = 0.2
inputs.value = np.random.normal(size=(28, 28, 1))
mnist = input_data.read_data_sets('MNIST_data', one_hot=False)
num_its = 20
batch_size = 10
batch_x, batch_y = mnist.train.next_batch(batch_size)
loss_list = []
for it in range(num_its):
loss_of_batch = 0.0
for im in range(batch_size):
inputs.value = np.reshape(batch_x[im], (28,28,1))
true_label.value = batch_y[im]
for op in ops_list:
op.forward()
loss_of_batch += ops_list[-1].value
for op in reversed(ops_list):
op.backward()
op.gradient_step(learning_rate)
loss_list.append(loss_of_batch)
print("Iteration " + str(it) + " Loss: "+str(loss_of_batch))
plt.plot(range(num_its), loss_list)
Extend the functionality of one of these operations (e.g. add stride, dilation, or padding to the 2D Convolution) or implement a new one (e.g. fully-connected layer).
#@title (Hidden utility code: RUN ME FIRST) { display-mode: "form" }
!git clone https://github.com/tensorflow/models.git 2>/dev/null
import sys
import math
sys.path.append('/content/models/tutorials/image/cifar10/')
from datetime import datetime
import matplotlib.pyplot as plt
import numpy as np
plt.rcParams['axes.facecolor'] = 'white'
import tensorflow as tf
tf.reset_default_graph()
try:
tf.app.flags.FLAGS.f
except Exception:
tf.app.flags.DEFINE_string('f', '', """Placeholder.""")
import cifar10
tf.app.flags.FLAGS.batch_size = 100
# from tensorflow.examples.models.tutorials.image.cifar10 import cifar10
def plot_filters(filters, xlabel=None, ylabel=None):
print(filters.shape)
# filters: height x width x channels x num_filters
num_filters = filters.shape[3]
filter_height = filters.shape[0]
filter_width = filters.shape[1]
filter_channels = filters.shape[2]
spacing = 1
rows = int(math.ceil(math.sqrt(num_filters)))
cols = int(math.ceil(math.sqrt(num_filters)))
plot = np.zeros((rows*(filter_height+spacing), cols*(filter_width+spacing), min(filter_channels, 3) ))
min_value = np.min(filters)
max_value = np.max(filters)
filters = (filters - min_value) / (max_value - min_value)
for f in range(num_filters):
r = int(f/cols)
c = f - r*cols
plot[r*(filter_height+spacing):r*(filter_height+spacing)+filter_height,
c*(filter_width+spacing):c*(filter_width+spacing)+filter_width,:] = filters[:,:,0:min(filter_channels, 3),f]
plt.grid(False)
plt.imshow(np.squeeze(plot))
if xlabel is not None:
plt.xlabel(xlabel)
if ylabel is not None:
plt.ylabel(ylabel)
plt.show()
cifar10.maybe_download_and_extract()
images, labels = cifar10.inputs(False)
test_images, test_labels = cifar10.inputs(True)
We have specified a very simple convolutional neural network to classify images from the Cifar-10 dataset. We then provide a training loop to optimize the weights of the network. Your task is to add Early Stopping (ES) to this training loop. Validation accuracy should be measured periodically, and training should stop if the validation accuracy does not reach a new absolute maximum after some number of measurements (this is called the "patience"). After training, we then measure the test accuracy. Before implementing ES, run the following cell to see a plot of the training loss and validation accuracy. Report the test accuracy you have found with ES.
The hyperparameters we have chosen are not necessarily optimal. Pick two factors to search over (e.g. number of layers, filters per layer, learning rate, convolutional kernel size, etc.). Then write a procedure that uses grid search to find the combination of these hyperparameters that yields the highest validation accuracy. Finally, report the test accuracy achieved by this model.
sess = tf.Session()
with sess.as_default():
tf.train.start_queue_runners()
im_width = 24
# Define placeholders for image and label
y_ = tf.placeholder(tf.float32, [None, 10])
x = tf.placeholder(tf.float32, [None, im_width, im_width, 3])
# Define a convolutional neural network (CNN)
cnnL1 = tf.layers.conv2d(x, 16, 5, strides=(2,2), activation=tf.nn.relu)
cnnL2 = tf.layers.conv2d(cnnL1, 16, 5, activation=tf.nn.relu)
cnnL3 = tf.layers.conv2d(cnnL2, 32, 5, activation=tf.nn.relu)
cnn = tf.reduce_sum(tf.reduce_sum(cnnL3, axis=1), axis=1)
cnn = tf.contrib.layers.flatten(cnn)
y_cnn = tf.layers.dense(cnn, 10)
cross_entropy_cnn = tf.reduce_mean(
tf.nn.softmax_cross_entropy_with_logits_v2(labels=y_, logits=y_cnn))
train_step_cnn = tf.train.GradientDescentOptimizer(0.05).minimize(cross_entropy_cnn)
correct_prediction_cnn = tf.equal(tf.argmax(y_cnn, 1), tf.argmax(y_, 1))
accuracy_cnn = tf.reduce_mean(tf.cast(correct_prediction_cnn, tf.float32))
tf.global_variables_initializer().run(session=sess)
# Train
print('Training... '+str(datetime.now()))
valid_batch_xs, valid_batch_ys = sess.run([test_images, tf.one_hot(test_labels, 10)])
train_losses = []
test_accuracies = []
valid_its = []
valid_accuracies = []
num_its = 1000
for it in range(num_its):
if (it+1) % 50 == 0:
print('Iteration %d/%d ...' % (it, num_its))
# Validation accuracy
valid_acc_cnn = sess.run(accuracy_cnn, feed_dict={x: valid_batch_xs, y_: valid_batch_ys})
valid_accuracies.append(valid_acc_cnn)
valid_its.append(it)
batch_xs, batch_ys = sess.run([images, tf.one_hot(labels, 10)])
loss_cnn_out, _ = sess.run([cross_entropy_cnn, train_step_cnn], feed_dict={x: batch_xs, y_: batch_ys})
train_losses.append(loss_cnn_out)
print('Testing... '+str(datetime.now()))
# # Test trained model
test_batch_xs, test_batch_ys = sess.run([test_images, tf.one_hot(test_labels, 10)])
true_label = tf.argmax(y_, 1)
cnn_label = tf.argmax(y_cnn, 1)
acc_cnn_out, true_label_out, cnn_label_out = sess.run([accuracy_cnn, true_label, cnn_label], feed_dict={x: test_batch_xs,
y_: test_batch_ys})
# Plot train loss and validation accuracy
plt.plot(range(it+1), train_losses)
plt.ylabel('Training loss')
plt.xlabel('Iteration')
plt.show()
plt.plot(valid_its, valid_accuracies)
plt.ylabel('Validation accuracy')
plt.xlabel('Iteration')
plt.show()
print('Test accuracy: ' + str(acc_cnn_out*100)+ '%%')
If you are curious what the weights, activations, or confused images look like, we visualize them below. Feel free to modify this code to inspect other aspects of your trained model.
with sess.as_default():
# Show weights from the first layer
print('Weights from the first layer')
with tf.variable_scope("conv2d_1", reuse=True):
weights = tf.get_variable('kernel')
plot_filters(weights.eval())
# Show activations from the first feature map
print('Activations from the first feature map.')
fmap = cnnL1.eval(feed_dict={x: test_batch_xs, y_: test_batch_ys})
plot_filters(np.transpose(fmap[0:1,...], (1,2,0,3)))
# Show images in a confusion matrix
confusion = np.zeros((24,24,3,100))
for b in range(true_label_out.shape[0]):
confusion[:,:,:,true_label_out[b]*10 + cnn_label_out[b]] = test_batch_xs[b]
plot_filters(confusion, ylabel='True label', xlabel='Guessed label')