%%html
<iframe width="640" height="360" src="https://www.youtube-nocookie.com/embed/aircAruvnKk?start=163&end=331" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share" allowfullscreen></iframe>

!pip install -q keras tensorflow-cpu

!pip install -q keras tensorflow-cpu

from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier

import matplotlib.pyplot as plt

X, y = fetch_openml("mnist_784", version=1, return_X_y=True, parser="auto")
X = X / 255
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
X

plt.imshow(X.loc[0].to_numpy().reshape(28, 28), cmap="gray")

<matplotlib.image.AxesImage at 0x79ef0f9b9290>

%%html
<iframe width="640" height="360" src="https://www.youtube-nocookie.com/embed/aircAruvnKk?start=332&end=806" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share" allowfullscreen></iframe>

mlp_16x16 = MLPClassifier(hidden_layer_sizes=(16, 16), max_iter=5, verbose=True)
%time mlp_16x16.fit(X_train, y_train)
mlp_16x16.score(X_test, y_test)

Iteration 1, loss = 0.92632283
Iteration 2, loss = 0.33565057
Iteration 3, loss = 0.27750873
Iteration 4, loss = 0.24866184
Iteration 5, loss = 0.23040307
CPU times: user 9min 35s, sys: 1.63 s, total: 9min 37s
Wall time: 4min 52s

/opt/conda/lib/python3.11/site-packages/sklearn/neural_network/_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (5) reached and the optimization hasn't converged yet.
  warnings.warn(

0.9285714285714286

probs = mlp_16x16.predict_proba(X_test.head(1))
probs

array([[9.98221411e-01, 9.49217021e-10, 1.04587116e-03, 3.73570513e-07,
        1.84343260e-07, 8.08504362e-05, 5.82896196e-04, 3.64064196e-06,
        6.45884662e-05, 1.82960739e-07]])

for i, v in sorted(enumerate(probs[0]), key=lambda x: x[1], reverse=True):
    print(f"{i}: {v:.4f}")

0: 0.9982
2: 0.0010
6: 0.0006
5: 0.0001
8: 0.0001
7: 0.0000
3: 0.0000
4: 0.0000
9: 0.0000
1: 0.0000

plt.imshow(X_test.iloc[1].to_numpy().reshape(28, 28), cmap="gray")

<matplotlib.image.AxesImage at 0x79ef0f118850>

mlp_40 = MLPClassifier(hidden_layer_sizes=(40,), learning_rate_init=0.001, early_stopping=True, verbose=True)
%time mlp_40.fit(X_train, y_train)
mlp_40.score(X_test, y_test)

Iteration 1, loss = 0.60445835
Validation score: 0.906071
Iteration 2, loss = 0.27666377
Validation score: 0.925179
Iteration 3, loss = 0.22660441
Validation score: 0.933214
Iteration 4, loss = 0.19334185
Validation score: 0.942321
Iteration 5, loss = 0.16749608
Validation score: 0.945000
Iteration 6, loss = 0.14821941
Validation score: 0.947500
Iteration 7, loss = 0.13430439
Validation score: 0.953036
Iteration 8, loss = 0.12222141
Validation score: 0.954821
Iteration 9, loss = 0.11177124
Validation score: 0.958036
Iteration 10, loss = 0.10339358
Validation score: 0.959643
Iteration 11, loss = 0.09575611
Validation score: 0.959821
Iteration 12, loss = 0.08832224
Validation score: 0.959464
Iteration 13, loss = 0.08231870
Validation score: 0.961964
Iteration 14, loss = 0.07663748
Validation score: 0.961964
Iteration 15, loss = 0.07113307
Validation score: 0.963571
Iteration 16, loss = 0.06630588
Validation score: 0.965000
Iteration 17, loss = 0.06296239
Validation score: 0.965357
Iteration 18, loss = 0.05858014
Validation score: 0.964464
Iteration 19, loss = 0.05575241
Validation score: 0.963393
Iteration 20, loss = 0.05258834
Validation score: 0.966786
Iteration 21, loss = 0.04937947
Validation score: 0.966071
Iteration 22, loss = 0.04628569
Validation score: 0.966429
Iteration 23, loss = 0.04376740
Validation score: 0.965357

/opt/conda/lib/python3.11/site-packages/sklearn/neural_network/_multilayer_perceptron.py:698: UserWarning: Training interrupted by user.
  warnings.warn("Training interrupted by user.")

CPU times: user 1h 15min 16s, sys: 12.7 s, total: 1h 15min 29s
Wall time: 37min 57s

0.9656428571428571

fig, axs = plt.subplots(nrows=4, ncols=10, figsize=(12.5, 5))
# Constrain plots to the same scale (divided by 2 for better display)
vmin, vmax = mlp_40.coefs_[0].min() / 2, mlp_40.coefs_[0].max() / 2
for ax, coef in zip(axs.ravel(), mlp_40.coefs_[0].T):
    activations = coef.reshape(28, 28)
    ax.matshow(activations, vmin=vmin, vmax=vmax)
    ax.set_axis_off()

import keras
from keras import layers, models
import matplotlib.pyplot as plt
import numpy as np

# Load the data as (N, 28, 28) images split between 80% train set and 20% test set
(X_train, y_train), (X_test, y_test) = keras.datasets.mnist.load_data()

# Scale image values from [0, 255] to [0, 1]
X_train = X_train.astype("float32") / 255
X_test = X_test.astype("float32") / 255

# Add an extra dimension to each image (28, 28, 1) as Keras requires at least 1 "color" channel
X_train = np.expand_dims(X_train, -1)
X_test = np.expand_dims(X_test, -1)
input_shape = (28, 28, 1)
assert X_train.shape[1:] == input_shape and X_test.shape[1:] == input_shape

# Convert a class vector (integers) to binary class matrix
num_classes = 10
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

# Display an image without any need to reshape
plt.imshow(X_train[0], cmap="gray")

2025-03-03 18:19:22.469255: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.

<matplotlib.image.AxesImage at 0x7f16190df7d0>

# Build the model: in Keras, kernel_size is specified as (height, width)
kernel_size = (3, 3)
model = keras.Sequential([
    keras.Input(shape=input_shape),
    layers.Conv2D(32, kernel_size=kernel_size, activation="relu"),
    layers.MaxPooling2D(pool_size=(2, 2)),
    layers.Conv2D(64, kernel_size=kernel_size, activation="relu"),
    layers.MaxPooling2D(pool_size=(2, 2)),
    layers.Flatten(),
    layers.Dropout(0.2),
    layers.Dense(num_classes, activation="softmax"),
])
model.summary(line_length=80)

# Train and evaluate the model (same loss, gradient descent optimizer, and metric as MLPClassifier)
model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])
%time model.fit(X_train, y_train, batch_size=100, epochs=10, validation_split=0.1)

# Show the accuracy score on the test set
model.evaluate(X_test, y_test, verbose=0)[1]

Model: "sequential"

┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓
┃ Layer (type)                      ┃ Output Shape             ┃       Param # ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩
│ conv2d (Conv2D)                   │ (None, 26, 26, 32)       │           320 │
├───────────────────────────────────┼──────────────────────────┼───────────────┤
│ max_pooling2d (MaxPooling2D)      │ (None, 13, 13, 32)       │             0 │
├───────────────────────────────────┼──────────────────────────┼───────────────┤
│ conv2d_1 (Conv2D)                 │ (None, 11, 11, 64)       │        18,496 │
├───────────────────────────────────┼──────────────────────────┼───────────────┤
│ max_pooling2d_1 (MaxPooling2D)    │ (None, 5, 5, 64)         │             0 │
├───────────────────────────────────┼──────────────────────────┼───────────────┤
│ flatten (Flatten)                 │ (None, 1600)             │             0 │
├───────────────────────────────────┼──────────────────────────┼───────────────┤
│ dropout (Dropout)                 │ (None, 1600)             │             0 │
├───────────────────────────────────┼──────────────────────────┼───────────────┤
│ dense (Dense)                     │ (None, 10)               │        16,010 │
└───────────────────────────────────┴──────────────────────────┴───────────────┘

 Total params: 34,826 (136.04 KB)

 Trainable params: 34,826 (136.04 KB)

 Non-trainable params: 0 (0.00 B)

Epoch 1/10
540/540 ━━━━━━━━━━━━━━━━━━━━ 33s 54ms/step - accuracy: 0.8214 - loss: 0.6138 - val_accuracy: 0.9800 - val_loss: 0.0706
Epoch 2/10
540/540 ━━━━━━━━━━━━━━━━━━━━ 38s 51ms/step - accuracy: 0.9724 - loss: 0.0892 - val_accuracy: 0.9853 - val_loss: 0.0543
Epoch 3/10
540/540 ━━━━━━━━━━━━━━━━━━━━ 42s 53ms/step - accuracy: 0.9801 - loss: 0.0632 - val_accuracy: 0.9862 - val_loss: 0.0469
Epoch 4/10
540/540 ━━━━━━━━━━━━━━━━━━━━ 40s 51ms/step - accuracy: 0.9822 - loss: 0.0565 - val_accuracy: 0.9875 - val_loss: 0.0472
Epoch 5/10
540/540 ━━━━━━━━━━━━━━━━━━━━ 28s 51ms/step - accuracy: 0.9867 - loss: 0.0428 - val_accuracy: 0.9898 - val_loss: 0.0377
Epoch 6/10
540/540 ━━━━━━━━━━━━━━━━━━━━ 41s 52ms/step - accuracy: 0.9886 - loss: 0.0362 - val_accuracy: 0.9903 - val_loss: 0.0365
Epoch 7/10
540/540 ━━━━━━━━━━━━━━━━━━━━ 28s 52ms/step - accuracy: 0.9899 - loss: 0.0306 - val_accuracy: 0.9908 - val_loss: 0.0358
Epoch 8/10
540/540 ━━━━━━━━━━━━━━━━━━━━ 28s 51ms/step - accuracy: 0.9915 - loss: 0.0265 - val_accuracy: 0.9892 - val_loss: 0.0354
Epoch 9/10
540/540 ━━━━━━━━━━━━━━━━━━━━ 33s 62ms/step - accuracy: 0.9918 - loss: 0.0244 - val_accuracy: 0.9903 - val_loss: 0.0349
Epoch 10/10
540/540 ━━━━━━━━━━━━━━━━━━━━ 37s 54ms/step - accuracy: 0.9925 - loss: 0.0245 - val_accuracy: 0.9918 - val_loss: 0.0357
CPU times: user 8min 9s, sys: 49.2 s, total: 8min 58s
Wall time: 6min

0.991100013256073

# Build the model
mlp_keras = keras.Sequential([
    keras.Input(shape=input_shape), # (28, 28, 1)
    layers.Flatten(),
    # layers.Conv2D(32, kernel_size=kernel_size, activation="relu"),
    # layers.MaxPooling2D(pool_size=(2, 2)),
    # layers.Conv2D(64, kernel_size=kernel_size, activation="relu"),
    # layers.MaxPooling2D(pool_size=(2, 2)),
    # layers.Flatten(),
    # layers.Dropout(0.2),
    layers.Dense(16, activation="relu"),
    layers.Dense(16, activation="relu"),
    layers.Dense(num_classes, activation="softmax"),
])
mlp_keras.summary(line_length=80)

# Train and evaluate the model (same loss, gradient descent optimizer, and metric as MLPClassifier)
mlp_keras.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])
%time mlp_keras.fit(X_train, y_train, batch_size=100, epochs=10, validation_split=0.1)

# Show the accuracy score on the test set
mlp_keras.evaluate(X_test, y_test, verbose=0)[1]

Model: "sequential_1"

┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓
┃ Layer (type)                      ┃ Output Shape             ┃       Param # ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩
│ dense_1 (Dense)                   │ (None, 28, 28, 16)       │            32 │
├───────────────────────────────────┼──────────────────────────┼───────────────┤
│ dense_2 (Dense)                   │ (None, 28, 28, 16)       │           272 │
├───────────────────────────────────┼──────────────────────────┼───────────────┤
│ dense_3 (Dense)                   │ (None, 28, 28, 10)       │           170 │
└───────────────────────────────────┴──────────────────────────┴───────────────┘

 Total params: 474 (1.85 KB)

 Trainable params: 474 (1.85 KB)

 Non-trainable params: 0 (0.00 B)

fig, axs = plt.subplots(nrows=4, ncols=8, figsize=(10, 5))
conv2d = model.layers[0].weights[0].numpy()
vmin = conv2d.min()
vmax = conv2d.max()
for ax, coef in zip(axs.ravel(), conv2d.T):
    ax.matshow(coef[0].T, vmin=vmin, vmax=vmax)
    for y in range(kernel_size[0]):
        for x in range(kernel_size[1]):
            # Display the weight values rounded to 1 decimal place
            ax.text(x, y, round(coef[0, x, y], 1), va="center", ha="center")
    ax.set_axis_off()

# Construct a debugging model for extracting each layer activation from the real model
activations = models.Model(
    inputs=model.inputs,
    # Only include the first 4 layers (conv2d, max_pooling2d, conv2d_1, max_pooling2d_1)
    outputs=[layer.output for layer in model.layers[:4]],
).predict(X_train[0:1])

# Show how the input image responds to a convolution using the very first filter (kernel) above
plt.imshow(activations[0][0, ..., 0], cmap="gray")

1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 58ms/step

<matplotlib.image.AxesImage at 0x7917a1fa7a90>

plt.imshow(activations[0][0, ..., 1], cmap="gray")

images_per_row = 8

for i, activation in enumerate(activations):
    # Assume square images: image size is the same width or height
    assert activation.shape[1] == activation.shape[2]
    size = activation.shape[1]
    # Number of features (filters, learned kernels, etc) to display
    n_features = activation.shape[-1]
    n_cols = n_features // images_per_row

    # Tile all the images onto a single large grid; too many images to display individually
    grid = np.zeros((size * n_cols, images_per_row * size))
    for row in range(images_per_row):
        for col in range(n_cols):
            channel_image = activation[0, ..., col * images_per_row + row]
            grid[col * size:(col + 1) * size, row * size:(row + 1) * size] = channel_image

    # Display each grid with the same width
    scale = 1.2 / size
    plt.figure(figsize=(scale * grid.shape[1], scale * grid.shape[0]))
    plt.imshow(grid, cmap="gray")
    plt.title(model.layers[i].name)
    plt.grid(False)

Neural Networks¶

Multilayer perceptrons¶

Convolutional neural networks¶

Practice: Multilayer perceptron in Keras¶

Visualizing a convolutional neural network¶

	pixel1	pixel2	pixel3	pixel4	pixel5	pixel6	pixel7	pixel8	pixel9	pixel10	...	pixel775	pixel776	pixel777	pixel778	pixel779	pixel780	pixel781	pixel782	pixel783	pixel784
0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	...	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0
1	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	...	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0
2	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	...	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0
3	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	...	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0
4	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	...	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
69995	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	...	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0
69996	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	...	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0
69997	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	...	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0
69998	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	...	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0
69999	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	...	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0