import imageio.v3 as iio
import matplotlib.pyplot as plt
import numpy as np


def compare(*images):
    """Display one or more images side by side."""
    _, axs = plt.subplots(1, len(images), figsize=(15, 6))
    for ax, image in zip(axs, images):
        ax.imshow(image, cmap="gray")
    plt.show()

# Read image as grayscale
dubs = iio.imread("dubs.jpg", mode="L")
plt.imshow(dubs, cmap="gray");

# f(0, 0)
dubs[0, 0]

238

%%html
<iframe width="640" height="360" src="https://www.youtube-nocookie.com/embed/KuXjwB4LzSA?start=539&end=588" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share" allowfullscreen></iframe>

np.ones((3, 3)) / 9

array([[0.11111111, 0.11111111, 0.11111111],
       [0.11111111, 0.11111111, 0.11111111],
       [0.11111111, 0.11111111, 0.11111111]])

box_blur_3x3 = 1 / 9 * np.ones((3, 3))
box_blur_3x3

array([[0.11111111, 0.11111111, 0.11111111],
       [0.11111111, 0.11111111, 0.11111111],
       [0.11111111, 0.11111111, 0.11111111]])

def get_subimages(image, kernel_shape):
    """Returns an array of subimages matching the given kernel shape."""
    image_h, image_w = image.shape
    kernel_h, kernel_w = kernel_shape # kernel.shape == subimage.shape
    # This version of the program discards the first + last row, and first + last col
    #   Number of rows (subimages_h): image_h - kernel_h + 1
    subimages_h = image_h - kernel_h + 1
    #   Number of cols (subimages_w): image_w - kernel_w + 1
    subimages_w = image_w - kernel_w + 1

    # Our resulting subimages array that will be returned
    # For each row + col, we'll have a subimage where kernel.shape == subimage.shape
    subimages = np.zeros((subimages_h, subimages_w, kernel_h, kernel_w))
    for y in range(subimages_h):
        for x in range(subimages_w):
            # Fix your code (but it's correct on PollEv): swap the _w with _h

            # subimages[y, x] = image[x:x + kernel_w,     y:y + kernel_h]
            # subimages[y, x] = image[x:x + kernel_w + 1, y:y + kernel_h + 1]
            subimages[y, x] = image[y:y + kernel_h,     x:x + kernel_w]
            # subimages[y, x] = image[y:y + kernel_h + 1, x:x + kernel_w + 1]
    return subimages


dubs_eye = dubs[50:100, 300:350]
dubs_eye_subimages = get_subimages(dubs_eye, (3, 3))
compare(dubs_eye_subimages[(30, 20)], dubs_eye[30:33, 20:23])
assert np.allclose(dubs_eye_subimages[(30, 20)], dubs_eye[30:33,20:23])

dubs_eye.shape

(50, 50)

dubs_eye_subimages.shape

(48, 48, 3, 3)

def convolve(image, kernel):
    """Returns the convolution of the image and kernel where the image shrinks by kernel shape."""
    subimages = get_subimages(image, kernel.shape)
    subimages_h, subimages_w = subimages.shape[:2]
    result = np.zeros((subimages_h, subimages_w))
    for y in range(subimages_h):
        for x in range(subimages_w):
            # np.sum(ndarray) or ndarray.sum() gives the sum of all values in an array
            # How to get the element-wise products
            #   kernel.shape is 3x3
            #   subimage.shape is 3x3
            result[y, x] = (kernel * subimages[y, x]).sum()
    return result


dubs_blurred = convolve(dubs, box_blur_3x3)
compare(dubs, dubs_blurred)

box_blur_10x10 = np.ones((10, 10))
box_blur_10x10 /= box_blur_10x10.size
box_blur_10x10

array([[0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01],
       [0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01],
       [0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01],
       [0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01],
       [0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01],
       [0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01],
       [0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01],
       [0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01],
       [0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01],
       [0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01]])

dubs_blurred_10x10 = convolve(dubs, box_blur_10x10)
compare(dubs, dubs_blurred_10x10)

dubs_blurred_10x10

array([[239.94, 239.97, 239.99, ..., 231.81, 231.78, 231.83],
       [239.98, 239.99, 239.99, ..., 231.87, 231.83, 231.86],
       [240.01, 240.  , 239.99, ..., 231.93, 231.89, 231.9 ],
       ...,
       [240.  , 240.  , 240.  , ..., 236.88, 236.86, 236.84],
       [240.  , 240.  , 240.  , ..., 236.82, 236.79, 236.76],
       [240.  , 240.  , 240.  , ..., 236.76, 236.72, 236.68]])

kernel = np.zeros((3, 3))
# Identity kernel
kernel[1, 1] = 1
kernel

array([[0., 0., 0.],
       [0., 1., 0.],
       [0., 0., 0.]])

compare(dubs, convolve(dubs, kernel))

# Setup the previous kernel (identity kernel)
kernel = np.zeros((3, 3))
kernel[1, 1] = 1
# Double the previous kernel minus the box blur kernel
# This is called the sharpen kernel
# We take 2 times the identity minus box blur since that will keep units in the same range
kernel = 2 * kernel - box_blur_3x3
kernel

array([[-0.11111111, -0.11111111, -0.11111111],
       [-0.11111111,  1.88888889, -0.11111111],
       [-0.11111111, -0.11111111, -0.11111111]])

compare(dubs, convolve(dubs, kernel))

values = [[1, 2, 4, 7, 11, 11, 7, 4, 2, 1]]
kernel = np.array(values) * np.array(values).T
# Gaussian blur kernel
kernel = kernel / kernel.sum()
# Show values rounded to two decimal places
kernel.round(2)

array([[0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ],
       [0.  , 0.  , 0.  , 0.01, 0.01, 0.01, 0.01, 0.  , 0.  , 0.  ],
       [0.  , 0.  , 0.01, 0.01, 0.02, 0.02, 0.01, 0.01, 0.  , 0.  ],
       [0.  , 0.01, 0.01, 0.02, 0.03, 0.03, 0.02, 0.01, 0.01, 0.  ],
       [0.  , 0.01, 0.02, 0.03, 0.05, 0.05, 0.03, 0.02, 0.01, 0.  ],
       [0.  , 0.01, 0.02, 0.03, 0.05, 0.05, 0.03, 0.02, 0.01, 0.  ],
       [0.  , 0.01, 0.01, 0.02, 0.03, 0.03, 0.02, 0.01, 0.01, 0.  ],
       [0.  , 0.  , 0.01, 0.01, 0.02, 0.02, 0.01, 0.01, 0.  , 0.  ],
       [0.  , 0.  , 0.  , 0.01, 0.01, 0.01, 0.01, 0.  , 0.  , 0.  ],
       [0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ]])

compare(dubs, convolve(dubs, kernel))

def template_match(image, template):
    """
    Given a grayscale image and template, returns a numpy array that stores the similarity of the
    template at each position in the image.
    """
    image_h, image_w = image.shape
    template_h, template_w = template.shape

    # De-mean the template
    demeaned_template = template - template.mean()

    # Construct result of the expected output size
    result = np.zeros((image_h - template_h + 1,  image_w - template_w + 1))
    result_h, result_w = result.shape

    for y in range(result_h):
        for x in range(result_w):
            # Select corresponding subimage
            subimage = image[y:y + template_h, x:x + template_w]

            # De-mean the subimage
            demeaned_subimage = subimage - subimage.mean()

            # Compute sum of element-wise products
            similarity = (demeaned_subimage * demeaned_template).sum()
            result[y, x] = similarity

    return result


match = template_match(dubs, dubs_eye)
compare(dubs, match)
np.unravel_index(match.argmax(), match.shape)

(50, 300)

Convolutions¶

Convolutions as mathematical operators¶

Practice: Get subimages¶

Practice: Sum of element-wise products¶

Understanding kernels¶

Practice: Template match¶