{ "cells": [ { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "L8TXuf_fwYBT" }, "source": [ "# Getting some data" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "autoexec": { "startup": false, "wait_interval": 0 } }, "colab_type": "code", "id": "rvly6T8f3QDV" }, "outputs": [], "source": [ "# We'll be downloading this middlebury OpticFlow Dataset, and storing in ./data\n", "dataurl = \"http://vision.middlebury.edu/flow/data/comp/zip/eval-gray-allframes.zip\"\n", "datadir = \"./data\"\n", "dataset = \"./data/eval-data-gray/Urban\"\n", "\n", "# Intialize the images array that we will populate\n", "images = []" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "autoexec": { "startup": false, "wait_interval": 0 } }, "colab_type": "code", "id": "TDoyXuRB9Aay" }, "outputs": [], "source": [ "# Download, unzip and read dataset into images\n", "\n", "import urllib\n", "import zipfile\n", "import os\n", "from skimage import io\n", "\n", "if not os.path.exists(datadir):\n", " os.makedirs(datadir)\n", "\n", "with open(datadir + '/data.zip', 'wb') as f: \n", " f.write(urllib.request.urlopen(dataurl).read())\n", " f.close()\n", " zip_ref = zipfile.ZipFile(datadir + '/data.zip', 'r')\n", " zip_ref.extractall(datadir)\n", " \n", " imagenames = sorted([dataset + \"/\" + s for s in os.listdir(dataset)])\n", " images = [io.imread(i).astype(float)/255.0 for i in imagenames]\n" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "maleybObwf-_" }, "source": [ "# Visualizing the Dataset" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "autoexec": { "startup": false, "wait_interval": 0 } }, "colab_type": "code", "id": "4Xb2yXM69cX2" }, "outputs": [], "source": [ "# Display Utilities\n", "\n", "import numpy as np\n", "from skimage import io\n", "from skimage import transform\n", "from matplotlib import pyplot\n", "from mpl_toolkits import mplot3d\n", "\n", "\"\"\"\n", "Display a list of images or image filenames in a single figure\n", "Use 'cols' and 'scale' to adjust the layout and view dimensions\n", "\"\"\"\n", "def show_images(images, cols = 1, titles = None, scale = 1):\n", " n_images = len(images)\n", " rows = int(np.ceil(n_images/float(cols)))\n", " if titles is None: titles = ['Image (%d)' % i for i in range(0,n_images)]\n", " if isinstance(titles, str): titles = [titles] * n_images\n", " \n", " fig = pyplot.figure()\n", " pyplot.axis('off')\n", " # Load any filenames\n", " images = [ (io.imread(i) if isinstance(i,str) else i) for i in images]\n", " vmin = np.amin([np.amin(i) for i in images])\n", " vmax = np.amax([np.amax(i) for i in images])\n", " \n", " for n, (image, title) in enumerate(zip(images, titles)):\n", " # Load any filenames\n", " a = fig.add_subplot(rows, cols, n+1)\n", " a.axes.get_xaxis().set_visible(False)\n", " a.axes.get_yaxis().set_visible(False)\n", " if image.ndim == 2:\n", " pyplot.gray()\n", " pyplot.imshow(image, vmin=vmin, vmax=vmax)\n", " a.set_title(title)\n", " fig.set_size_inches( scale * np.array(fig.get_size_inches()) * np.array([cols,rows]) )\n", " pyplot.show()\n", " \n", "\"\"\"\n", "Display a 3D cost-plot over 2D parameter space image\n", "\"\"\"\n", "def show_cost_plot(costimage):\n", " normcostimage = costimage / np.amax(costimage)\n", " cm = pyplot.cm.get_cmap(\"plasma\")\n", " costplotrgb = np.array([cm(i) for i in normcostimage])\n", "\n", " fig = pyplot.figure()\n", " ax = pyplot.axes(projection='3d')\n", " ax.view_init(40, 20)\n", " C, R = np.meshgrid(\n", " np.linspace(-costimage.shape[1]/2.0, +costimage.shape[1]/2.0, costimage.shape[1]),\n", " np.linspace(-costimage.shape[0]/2.0, +costimage.shape[0]/2.0, costimage.shape[0])\n", " )\n", " ax.plot_surface(C, R, costimage, rstride=1, cstride=1, facecolors=costplotrgb);\n", " ax.plot_surface(C, R, np.zeros(shape=costimage.shape), rstride=1, cstride=1, facecolors=costplotrgb);\n", "\n", " \n", "'''\n", "Extract a size=(rows,cols) patch from 'image' with top-left pixel at pos=(row,col)\n", "Support sub-pixel locations with bilinear interpolation\n", "'''\n", "def GetPatch(image, pos=(0,0), size=(50,50)):\n", " # We're inverting the ordinates here, since EuclideanTransform expects (x,y) order\n", " return transform.warp(image, transform.EuclideanTransform(translation=(pos[1],pos[0])), output_shape=size)\n", " \n", "'''\n", "Destructures lists of tuples into tuple of lists. The logical inverse of zip.\n", "'''\n", "def unzip(x):\n", " return tuple([list(tup) for tup in zip(*x)])" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "autoexec": { "startup": false, "wait_interval": 0 }, "base_uri": "https://localhost:8080/", "height": 377 }, "colab_type": "code", "executionInfo": { "elapsed": 4056, "status": "ok", "timestamp": 1524721551626, "user": { "displayName": "Richard Newcombe", "photoUrl": "//lh6.googleusercontent.com/-_m_cAJvBJpU/AAAAAAAAAAI/AAAAAAAALo0/UJnR0YgR-Tk/s50-c-k-no/photo.jpg", "userId": "112849999869325336517" }, "user_tz": 420 }, "id": "pwAcshjAKvCy", "outputId": "05dfdca3-8f3c-4f21-b333-de04264fbf92" }, "outputs": [], "source": [ "# Display dataset as an animation:\n", "from IPython.display import HTML\n", "%matplotlib inline\n", "import numpy as np\n", "import matplotlib\n", "import matplotlib.animation as animation\n", "\n", "fig = pyplot.figure()\n", "im = pyplot.imshow(images[0], animated=True);\n", "pyplot.axis('off')\n", "def updatefig(frame):\n", " global images\n", " im.set_array(images[frame])\n", " return im,\n", "pyplot.close()\n", "\n", "HTML(animation.FuncAnimation(fig, updatefig, interval=100, blit=True, frames=range(0,len(images))).to_jshtml())\n", "\n", "# Preview dataset images\n", "# show_images( images[0::2], cols=4, scale=0.75)" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "5FZQC4TSwnkx" }, "source": [ "# Tracking Patches" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "autoexec": { "startup": false, "wait_interval": 0 }, "base_uri": "https://localhost:8080/", "height": 570 }, "colab_type": "code", "executionInfo": { "elapsed": 737, "status": "ok", "timestamp": 1524721553145, "user": { "displayName": "Richard Newcombe", "photoUrl": "//lh6.googleusercontent.com/-_m_cAJvBJpU/AAAAAAAAAAI/AAAAAAAALo0/UJnR0YgR-Tk/s50-c-k-no/photo.jpg", "userId": "112849999869325336517" }, "user_tz": 420 }, "id": "3TrNmWR0LAgI", "outputId": "c615e6e1-4b15-4332-aa7d-901a17be2ad2" }, "outputs": [], "source": [ "# Let's see how we could track a patch over time\n", "\n", "offset0 = (220.0, 220.0)\n", "#offset0 = (220.0, 460.0)\n", "patch0 = GetPatch(images[0], pos=offset0)\n", "\n", "import matplotlib.patches as patches\n", "fig,ax = pyplot.subplots(1)\n", "ax.axis('off')\n", "ax.imshow(images[0])\n", "ax.add_patch(patches.Rectangle( (offset0[1],offset0[0]), patch0.shape[0], patch0.shape[1], linewidth=1, edgecolor='r', facecolor='none'))\n", "pyplot.show()\n", "\n", "show_images([patch0],titles=['patch'])" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "_tVCJvmKgWPR" }, "source": [ "## Exhaustive Search / Parameter sweep\n", "$E(x,y) = \\sum_{i,j\\in\\Omega}(I_\\text{patch}(i,j) - I_\\text{live}(i+x,j+y))^2$" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "autoexec": { "startup": false, "wait_interval": 0 }, "base_uri": "https://localhost:8080/", "height": 266 }, "colab_type": "code", "executionInfo": { "elapsed": 1811, "status": "ok", "timestamp": 1524721555130, "user": { "displayName": "Richard Newcombe", "photoUrl": "//lh6.googleusercontent.com/-_m_cAJvBJpU/AAAAAAAAAAI/AAAAAAAALo0/UJnR0YgR-Tk/s50-c-k-no/photo.jpg", "userId": "112849999869325336517" }, "user_tz": 420 }, "id": "5ajmJ2r4REia", "outputId": "3b6a5ca1-bf8b-47be-8f13-72d8d5610b14" }, "outputs": [], "source": [ "# Exhaustive search for the patch in the next image\n", "\n", "# Generate Cost plot\n", "E = np.zeros(shape=(30,30))\n", "for r in range(0, E.shape[0]):\n", " for c in range(0, E.shape[1]):\n", " patch1 = GetPatch(images[1], (offset0[0] + r - E.shape[0]/2.0, offset0[1] + c - E.shape[1]/2.0))\n", " E[r,c] = np.sum(np.square(patch1 - patch0))\n", "\n", "# Search for minima (best_im is in patch image coords, best is in final coordinates)\n", "best_im = np.unravel_index(E.argmin(), E.shape)\n", "best = (offset0[0] + best_im[0] - E.shape[0]/2.0, offset0[1] + best_im[1] - E.shape[1]/2.0)\n", "print(\"Position of lowest cost: \", best)\n", " \n", "# Display as 3D surface\n", "show_cost_plot(E)" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "t3_x-PsyxCe9" }, "source": [ "## Images are largely smooth" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "autoexec": { "startup": false, "wait_interval": 0 }, "base_uri": "https://localhost:8080/", "height": 645 }, "colab_type": "code", "executionInfo": { "elapsed": 1957, "status": "ok", "timestamp": 1524721557131, "user": { "displayName": "Richard Newcombe", "photoUrl": "//lh6.googleusercontent.com/-_m_cAJvBJpU/AAAAAAAAAAI/AAAAAAAALo0/UJnR0YgR-Tk/s50-c-k-no/photo.jpg", "userId": "112849999869325336517" }, "user_tz": 420 }, "id": "ZTCGapboK-Yn", "outputId": "8a6323a9-e14d-41bc-ddcd-ec067bad7e74" }, "outputs": [], "source": [ "# Compute central-difference gradients in x and y\n", "\n", "derivs = [np.gradient(i) for i in images]\n", "show_images( images[0::2], cols=4, scale=0.75)\n", "show_images( [ d[0] for d in derivs[0::2]], titles=\"dx\", cols=4, scale=0.75)\n", "show_images( [ d[1] for d in derivs[0::2]], titles=\"dy\", cols=4, scale=0.75)" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "1qV1QHDUxKno" }, "source": [ "# Lucas kanade" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "autoexec": { "startup": false, "wait_interval": 0 }, "base_uri": "https://localhost:8080/", "height": 52 }, "colab_type": "code", "executionInfo": { "elapsed": 364, "status": "ok", "timestamp": 1524721557558, "user": { "displayName": "Richard Newcombe", "photoUrl": "//lh6.googleusercontent.com/-_m_cAJvBJpU/AAAAAAAAAAI/AAAAAAAALo0/UJnR0YgR-Tk/s50-c-k-no/photo.jpg", "userId": "112849999869325336517" }, "user_tz": 420 }, "id": "rTvBJxgYh4_1", "outputId": "4f9523ee-9c7f-4a9d-e9dd-a7274614e248" }, "outputs": [], "source": [ "import scipy.linalg as slin\n", "import scipy\n", "\n", "def LucasKanade2d(frame0, frame1, p0, guess_p1=None, its=20, blur_sigma=0.0):\n", " debug = []\n", " \n", " # 'reference' and 'live' images\n", " frame0 = scipy.ndimage.filters.gaussian_filter(frame0, blur_sigma)\n", " frame1 = scipy.ndimage.filters.gaussian_filter(frame1, blur_sigma)\n", " if guess_p1 is None: guess_p1 = p0\n", " p = guess_p1\n", "\n", " # reference patch\n", " patch0 = GetPatch(frame0, p0)\n", " dpatch0_drow, dpatch0_dcol = np.gradient(patch0)\n", "\n", " # Minimize Error function via non-linear least squares\n", " for i in range(0,its):\n", " patch1 = GetPatch(frame1, p)\n", "\n", " # A.x=b (A is actually constant, and can be taken out of this loop!)\n", " A = np.zeros((patch0.size,2))\n", " A[:,0] = dpatch0_drow.reshape(patch0.size)\n", " A[:,1] = dpatch0_dcol.reshape(patch0.size)\n", " diff = patch0 - patch1\n", " b = diff.reshape(diff.size)\n", " err = np.sum(np.square(b))\n", "\n", " # Form the normal equations, A'Ax = A'b\n", " AtA = np.matmul(A.transpose(), A)\n", " Atb = np.matmul(A.transpose(), b)\n", "\n", " # Solve using cholesky decomposition, since A'A is positive semi-definite\n", " L = np.linalg.cholesky(AtA)\n", " x = slin.solve_triangular(L, Atb, lower=True)\n", " debug.append( (p[0], p[1], err, patch1) )\n", " p += x\n", " return p, debug, frame0, frame1\n", "\n", "print(\"Started at: \",offset0)\n", "opt_p1, debug, frame0, frame1 = LucasKanade2d(images[0], images[1], offset0, blur_sigma=1.0)\n", "print(\"Converged to: \",opt_p1)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "autoexec": { "startup": false, "wait_interval": 0 }, "base_uri": "https://localhost:8080/", "height": 734 }, "colab_type": "code", "executionInfo": { "elapsed": 3264, "status": "ok", "timestamp": 1524721561051, "user": { "displayName": "Richard Newcombe", "photoUrl": "//lh6.googleusercontent.com/-_m_cAJvBJpU/AAAAAAAAAAI/AAAAAAAALo0/UJnR0YgR-Tk/s50-c-k-no/photo.jpg", "userId": "112849999869325336517" }, "user_tz": 420 }, "id": "1xyrcyMYELZ4", "outputId": "b43c266d-28d6-423e-edc9-300184236bc8" }, "outputs": [], "source": [ "# Visualize iterations...\n", "patch0 = GetPatch(frame0, offset0)\n", "prs, pcs, errs, p1s = unzip(debug)\n", "its = len(debug)\n", "\n", "# Generate Cost plot for comparison\n", "E = np.zeros(shape=(30,30))\n", "for r in range(0, E.shape[0]):\n", " for c in range(0, E.shape[1]):\n", " patch1 = GetPatch(frame1, (offset0[0] + r - E.shape[0]/2.0, offset0[1] + c - E.shape[1]/2.0))\n", " E[r,c] = np.sum(np.square(patch1 - patch0))\n", "\n", "# Visualize optimization trajectory and error\n", "fig = pyplot.figure()\n", "ax = pyplot.axes(projection='3d')\n", "ax.view_init(60,20)\n", "# ax.view_init(90,0)\n", "C, R = np.meshgrid(\n", " np.linspace(offset0[1]-E.shape[1]/2.0, offset0[1]+E.shape[1]/2.0, E.shape[1]),\n", " np.linspace(offset0[0]-E.shape[0]/2.0, offset0[0]+E.shape[0]/2.0, E.shape[0])\n", ")\n", "normE = E / np.amax(E)\n", "cm = pyplot.cm.get_cmap(\"plasma\")\n", "Ergb = np.array([cm(i) for i in normE])\n", "ax.plot3D(xs=pcs, ys=prs, zs=np.array(errs));\n", "ax.plot_surface(C, R, E, rstride=1, cstride=1, facecolors=Ergb);\n", "ax.plot_surface(C, R, np.zeros(shape=E.shape), rstride=1, cstride=1, facecolors=Ergb);\n", "ax.scatter([pcs[0],pcs[-1]], [prs[0],prs[-1]], [errs[0],errs[-1]], color=['red','green']);\n", "\n", "# Show image patches at each iteration (every 'sub' subsamples), at 'scale' display size\n", "sub=2\n", "scale=0.5\n", "show_images([patch0]*int(its/sub), cols=its/sub, titles='patch0', scale=scale)\n", "show_images(p1s[0::sub], cols=its/sub, titles='patch1', scale=scale)\n", "show_images([ np.sqrt(np.square(patch0 - p1)) for p1 in p1s[0::sub]], cols=its/sub, titles='residuals', scale=scale)\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "autoexec": { "startup": false, "wait_interval": 0 }, "base_uri": "https://localhost:8080/", "height": 269 }, "colab_type": "code", "executionInfo": { "elapsed": 542, "status": "ok", "timestamp": 1524721561619, "user": { "displayName": "Richard Newcombe", "photoUrl": "//lh6.googleusercontent.com/-_m_cAJvBJpU/AAAAAAAAAAI/AAAAAAAALo0/UJnR0YgR-Tk/s50-c-k-no/photo.jpg", "userId": "112849999869325336517" }, "user_tz": 420 }, "id": "DGFKHbG9KT_5", "outputId": "9f829429-5f48-4707-8228-964d2ccecdf8" }, "outputs": [], "source": [ "# Visualize the patch we found!\n", "import matplotlib.patches as patches\n", "fig,ax = pyplot.subplots(1)\n", "ax.axis('off')\n", "ax.imshow(frame0)\n", "for p in zip(pcs,prs):\n", " ax.add_patch(patches.Rectangle(p, patch1.shape[0], patch1.shape[1], linewidth=1, edgecolor='r', facecolor='none'))\n", "pyplot.show()" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "q0XaLEfWxkG8" }, "source": [ "# Putting it together: Track the Sequence" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "autoexec": { "startup": false, "wait_interval": 0 } }, "colab_type": "code", "id": "j6kWY3cVhHBL" }, "outputs": [], "source": [ "# Run patch tracking on whole sequence\n", "\n", "# Starting location\n", "p = offset0\n", "track = [p]\n", "\n", "# Run Lukas Kanade tracking on each frame\n", "for i in range(1, len(images)):\n", " # Coarse to fine (for efficiency, you would normally downsample in a pyramid. Here we just blur)\n", " p,_,_,_ = LucasKanade2d(images[0], images[i], offset0, guess_p1 = p, its=20, blur_sigma=2.0)\n", " p,_,_,_ = LucasKanade2d(images[0], images[i], offset0, guess_p1 = p, its=10, blur_sigma=0.0)\n", " track.append(tuple(p))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "autoexec": { "startup": false, "wait_interval": 0 }, "base_uri": "https://localhost:8080/", "height": 377 }, "colab_type": "code", "executionInfo": { "elapsed": 2502, "status": "ok", "timestamp": 1524721565501, "user": { "displayName": "Richard Newcombe", "photoUrl": "//lh6.googleusercontent.com/-_m_cAJvBJpU/AAAAAAAAAAI/AAAAAAAALo0/UJnR0YgR-Tk/s50-c-k-no/photo.jpg", "userId": "112849999869325336517" }, "user_tz": 420 }, "id": "wOVK_5iLoU9_", "outputId": "95a197e2-de44-4e1a-a1d6-b437ab483651" }, "outputs": [], "source": [ "# Display tracking result as an animation:\n", "%matplotlib inline\n", "from IPython.display import HTML\n", "import matplotlib.patches as patches\n", "import matplotlib.animation as animation\n", "\n", "fig,ax = pyplot.subplots(1)\n", "ax.axis('off')\n", "im = ax.imshow(frame0)\n", "pyplot.close()\n", "\n", "def updatefig(frame):\n", " global images\n", " ax.patches = []\n", " ax.add_patch(patches.Rectangle( (track[frame][1],track[frame][0]), patch1.shape[0], patch1.shape[1], linewidth=1, edgecolor='r', facecolor='none'))\n", " im.set_array(images[frame])\n", " return im,\n", " \n", "HTML(animation.FuncAnimation(fig, updatefig, interval=100, blit=True, frames=range(0,len(images))).to_jshtml())" ] } ], "metadata": { "colab": { "collapsed_sections": [], "default_view": {}, "name": "LucasKanade", "provenance": [], "version": "0.3.2", "views": {} }, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.6" } }, "nbformat": 4, "nbformat_minor": 1 }