%
% This is the LaTeX template file for lecture notes for CS294-8,
% Computational Biology for Computer Scientists. When preparing
% LaTeX notes for this class, please use this template.
%
% To familiarize yourself with this template, the body contains
% some examples of its use. Look them over. Then you can
% run LaTeX on this file. After you have LaTeXed this file then
% you can look over the result either by printing it out with
% dvips or using xdvi.
%
% This template is based on the template for Prof. Sinclair's CS 270.
\documentclass[twoside]{article}
\usepackage[usenames,dvipsnames,svgnames,table]{xcolor}
\definecolor{darkgreen}{rgb}{0.0,0,0.9}
\usepackage[colorlinks=true,pdfpagemode=UseNone,citecolor=OliveGreen,linkcolor=BrickRed,urlcolor=BrickRed,
%pagebackref,
pdfstartview=FitW]{hyperref}
%\usepackage[backref=true, backend=biber, isbn=false, url=true, firstinits=true, maxnames=20, style=alphabetic,]{biblatex}
%\addbibresource{references.bib}
\usepackage{graphics,amsmath,amsthm,thmtools,amssymb,mathtools,enumerate}
\setlength{\oddsidemargin}{0 in}
\setlength{\evensidemargin}{0 in}
\setlength{\topmargin}{-0.6 in}
\setlength{\textwidth}{6.5 in}
\setlength{\textheight}{8.5 in}
\setlength{\headsep}{0.75 in}
\setlength{\parindent}{0 in}
\setlength{\parskip}{0.1 in}
\def\X{b}
\def\tX{\tilde{b}}
\def\mX{B}
\def\tmX{\tilde{B}}
\def\bone{{\bf 1}}
\def\eps{\epsilon}
\def\R{\mathbb{R}}
\def\mP{\mathbb{P}}
\def\mE{\mathbb{E}}
\def\cT{{\cal T}}
\def\reff{\textup{Reff}}
%
% The following commands set up the lecnum (lecture number)
% counter and make various numbering schemes work relative
% to the lecture number.
%
\newcounter{lecnum}
\renewcommand{\thepage}{\thelecnum-\arabic{page}}
\renewcommand{\thesection}{\thelecnum.\arabic{section}}
\renewcommand{\theequation}{\thelecnum.\arabic{equation}}
\renewcommand{\thefigure}{\thelecnum.\arabic{figure}}
\renewcommand{\thetable}{\thelecnum.\arabic{table}}
\newcommand{\PP}[2]{\mP_{#1}\left[#2\right]}
\newcommand{\weight}[1]{w(#1)}
\renewcommand{\P}[1]{\mP\left[#1\right]}
\newcommand{\E}[1]{\mE\left[#1\right]}
\newcommand{\EE}[2]{\mE_{#1}\left[#2\right]}
\newcommand{\norm}[1]{\|#1\|}
\DeclareMathOperator{\trace}{trace}
\DeclareMathOperator{\vol}{vol}
\DeclareMathOperator{\Var}{Var}
\DeclareMathOperator{\nnz}{nnz}
%
% The following macro is used to generate the header.
%
\newcommand{\lecture}[4]{
\pagestyle{myheadings}
\thispagestyle{plain}
\newpage
\setcounter{lecnum}{#1}
\setcounter{page}{1}
\noindent
\begin{center}
\framebox{
\vbox{\vspace{2mm}
\hbox to 6.28in { {\bf CSE 521: Design and Analysis of Algorithms
\hfill Winter 2017} }
\vspace{4mm}
\hbox to 6.28in { {\Large \hfill Problem Set #1 \hfill} }
\vspace{2mm}
\hbox to 6.28in { \hfill {\it Deadline: #2} \hfill}
\vspace{2mm}}
}
\end{center}
\markboth{Problem Set #1: #2}{Problem Set #1: #2}
% {\bf Disclaimer}: {\it These notes have not been subjected to the
% usual scrutiny reserved for formal publications. They may be distributed
% outside this class only with the permission of the Instructor.}
% \vspace*{4mm}
}
%
% Convention for citations is authors' initials followed by the year.
% For example, to cite a paper by Leighton and Maggs you would type
% \cite{LM89}, and to cite a paper by Strassen you would type \cite{S69}.
% (To avoid bibliography problems, for now we redefine the \cite command.)
% Also commands that create a suitable format for the reference list.
\renewcommand{\cite}[1]{[#1]}
\def\beginrefs{\begin{list}%
{[\arabic{equation}]}{\usecounter{equation}
\setlength{\leftmargin}{2.0truecm}\setlength{\labelsep}{0.4truecm}%
\setlength{\labelwidth}{1.6truecm}}}
\def\endrefs{\end{list}}
\def\bibentry#1{\item[\hbox{[#1]}]}
%Use this command for a figure; it puts a figure in wherever you want it.
%usage: \fig{NUMBER}{SPACE-IN-INCHES}{CAPTION}
\newcommand{\fig}[3]{
\vspace{#2}
\begin{center}
Figure \thelecnum.#1:~#3
\end{center}
}
% Use these for theorems, lemmas, proofs, etc.
\newtheorem{theorem}{Theorem}[lecnum]
\newtheorem{lemma}[theorem]{Lemma}
\newtheorem{fact}[theorem]{Fact}
\newtheorem{proposition}[theorem]{Proposition}
\newtheorem{claim}[theorem]{Claim}
\newtheorem{corollary}[theorem]{Corollary}
\newtheorem{definition}[theorem]{Definition}
\newenvironment{proofof}[1]{{\em Proof of #1.}}{\hfill%\rule{2mm}{2mm}
\qed}
% **** IF YOU WANT TO DEFINE ADDITIONAL MACROS FOR YOURSELF, PUT THEM HERE:
\begin{document}
%FILL IN THE RIGHT INFO.
%\lecture{**LECTURE-NUMBER**}{**DATE**}{**LECTURER**}{**SCRIBE**}
\lecture{4}{Feb 26th (at 12:00PM) in \href{http://canvas.uw.edu}{Canvas}}
%\footnotetext{These notes are partially based on those of Nigel Mansell.}
\begin{enumerate}[1)]
%\item Show that all eigenvalues of any symmetric matrix are real.
\item Prove the following Matrix equations:
\begin{enumerate}[a)]
\item Let $A\in\R^{n\times n}$ and let $U\in \R^{k\times n}$ be a matrix with orthonormal columns $U_1,\dots,U_k$. So, $UU^T =\sum_{i=1}^k U_i U_i^T$ is a projection matrix. Show that
$$ \|A-U U^T A\|_F^2 = \|A\|_F^2 - \|U^T A\|_F^2.$$
%\item For any two matrices, $A,B\in\R^{m\times n}$,
%$$ |\trace(AB)| \leq \|A\|_F \|B\|_F.$$
%{\bf Hint:} Recall the Cauchy-Shwartz inequality: For any pair of vectors $u,v$, $\langle u,v\rangle \leq \|u\|\|v\|$.
\item We say a matrix $U\in\R^{n\times n}$ is a unitary matrix if all singular values of $U$ are 1. Show that for any unitary matrix $U$,
$$ UU^T=I.$$
Use this to show that unitary matrices are rotation matrices, i.e., for any vector $v$,
$$ \|Uv\|=\|v\|.$$
\end{enumerate}
\item In this problem we discuss a fast algorithm for approximately estimating the low rank approximation with respect to the Frobenius norm.
\begin{enumerate}[a)]
\item Let $A\in \R^{m\times n}$ and suppose we want to estimate $Av$ for a vector $v\in \R^n$.
Here is a randomized algorithm for this task. Choose the $i$-th column of $A$, $A_i$, with probability
$$ p_i=\frac{\norm{A_i}^2}{\norm{A}_F^2}$$
and let $X=A_i v_i/p_i$. Show that $\E{X}=Av$. Calculate $\Var(X)=\E{\norm{X}^2} -\norm{\mE X}^2$.
\item Next, we use a similar idea to approximate $A$. For $1\leq i\leq s$ let $X_i=\frac{A_j}{\sqrt{s_i p_j}}$ with probability $p_j$ where $1\leq j\leq n$. Let $X\in\R^{m\times s}$ and let $X_i$ be the $i$-th columns of $X$. Note that $XX^T =\sum_{i=1}^s X_iX_i^T$. Show that
$$ \mE XX^T = AA^T.$$
Show that $\mE \norm{XX^T - AA^T}_F^2 \leq \frac1s \norm{A}_F^4$.
\item {\bf Extra Credit: } Let $X=\sum_{i=1}^s \sigma_i u_i v_i^T$ be the SVD decomposition of $X$ where $\sigma_1\geq \dots\geq \sigma_s$. Let $U_k$ be the matrix with columns $u_1,\dots,u_k$. So, $U_k U_k^T=\sum_{i=1}^k u_i u_i^T$ is a projection matrix. We want to show that for any such matrix $X$ and $U_k$,
\begin{equation}\label{eq:AUgoal} \norm{A-U_kU_k^T A}_F^2 \leq \norm{A-A_k}_F^2 + 2\sqrt{k} \norm{AA^T-XX^T}_F,
\end{equation}
where $A_k$ is the best rank $k$ approximation of $A$.
Note that if this is true we can simply let $s=O(k/\eps^2)$ and then a random $X$ chosen from part (b) would give
$$ \norm{A-U_kU_k^T A}_F^2 \leq \norm{A-A_k}_F^2 + \eps \norm{A}_F^2.$$
Also, note that the algorithm runs in time $\nnz(A) + O(mk^2/\eps^4)$ as we need to compute the SVD of $X$.
It remains to prove \eqref{eq:AUgoal}. First, by part (a) of Problem 1, we have
$$ \norm{A-U_k U_k^T A}_F^2 \leq \norm{A}_F^2 - \norm{A^T U_k}_F^2.$$
Show that
$$ \left|\norm{A^TU_k}_F^2- \sum_{i=1}^k \sigma_i^2\right| \leq \sqrt{k} \norm{AA^T-XX^T}_F.$$
You can use without proof that
$$ \left| \sum_{i=1}^k \sigma_i^2 -\sum_{i=1}^k \sigma_i(A)^2\right|\leq \sqrt{k}\norm{AA^T-XX^T}_F,$$
where $\sigma_i(A)$ is the $i$-th largest singular value of $A$.
Use the above two equations to conclude \eqref{eq:AUgoal}.
\end{enumerate}
\item In this problem we see applications of expander graphs in optimization. In particular, we see that the maximum cut problem is easy in {\em strong} expander graphs. First, we explain the expander mixing lemma which asserts that expander graphs are very similar to complete graphs.
\begin{theorem}[Expander Mixing Lemma]
Let $G$ be a $d$-regular graph and $1=\lambda_1\geq \lambda_2\geq \dots \lambda_n\geq -1$ be the eigenvalues of the normalized adjacency matrix of $G$, $A/d$. Let $\lambda^*=\max\{\lambda_2,|\lambda_n|\}$. Then, for any two disjoint sets $S,T\subseteq V$,
$$ \left| |E(S,T)| - \frac{d\cdot |S|\cdot|T|}{n}\right| \leq d\cdot\lambda^*\sqrt{|S||T|}. $$
\end{theorem}
Note that $d|S||T|/n$ is the expected number of edges between $S,T$ in a random graph where is an edge between each pair of vertices $i,j$ with probability $d/n$. So, the above lemma says that in an expander graph, for any large enough sets $|S|,|T|$, then the number of edges between $S,T$ is very close to what you see in a random graph.
Use the above theorem to design an algorithm for the maximum cut problem that for any $d$ regular graph returns a set $T$ such that
$$ |E(T,\overline{T})| \geq (1-4\lambda^*)\max_S |E(S,\overline{S})|. $$
Note that the performance of such an algorithm may be terrible if $\lambda^*>1/4$, but in strong expander graphs, we have $\lambda^*\ll 1$; for example, in Ramanujan graphs we have $\lambda^*\leq 2/\sqrt{d}$. So the number of edges cut by the algorithm is very close to optimal solution as $d\to\infty$.
It turns out that in random graph $\lambda^*\leq 2/\sqrt{d}$ with high probability. So, it is easy to give a $1+O(1/\sqrt{d})$ approximation algorithm for max cut in most graphs.
\item In this problem you are supposed to implement the spectral partitioning algorithm that we discussed in class. You are given a giant network, ``com-DBLP'' input in \href{https://snap.stanford.edu/data/}{https://snap.stanford.edu/data/} and you should find a sparse cut in this network. My code has found a cut of sparsity about $1\%$. Note that since the graph is huge you need to carefully store the edges of this graph. You should also use the power method to find the 2nd smallest eigenvalue of the normalized Laplacian matrix. In the output you should write the sparsity of the cut that you find and the id of the vertices in the smaller side of the cut. Please submit your code and the output to Canvas.
\item {\bf Extra Credit.} In this problem we see applications of expander graphs in coding theory. Error correcting codes are used in all digital transmission and data storage schemes. Suppose we want to transfer $m$ bits over a noisy channel. The noise may flip some of the bits; so \textrm{0101} may become \textrm{1101}. Since the transmitter wants that the receiver correctly receives the message, he needs to send $n>m$ bits encoded such that the receiver can recover the message even in the presence of noise. For example, a naive way is to send every bit 3 times; so, $0101$ becomes $000111000111$. If only 1 bit were flipped in the transmission receiver can recover the message but even if 2 bits are flipped, e.g., $110111000111$ the recover is impossible. This is a very inefficient coding scheme.
An error correcting code is a mapping $C:\{0,1\}^m\to\{0,1\}^n$. Every string in the image of $C$ is called a codeword.
We say a coding scheme is linear, if there is a matrix $M\in\{0,1\}^{(n-m)\times n}$ such that for any $y\in\{0,1\}^n$, $y$ is a codeword if and only if
$$ My=0.$$
Note that we are doing addition and multiplication in the field $F_2$.
\begin{enumerate}[a)]
\item Suppose $C$ is a linear code. Construct a matrix $A\in\{0,1\}^{n\times m}$ such that for any $x\in\{0,1\}^m$, $Ax$ is a code word and that for any distinct $x,y\in\{0,1\}^m$, $Ax\neq Ay$.
\end{enumerate}
The rate of a code $C$ is defined as $r=m/n$. Codes of higher rate are more efficient; here we will be interested in designing codes with $r$ being an absolute constant bounded away from 0. The Hamming distance between two codewords $c^1,c^2$ is the number of bits that they differ, $\norm{c^1-c^2}_1$. The minimum distance of a code is $\min_{c^1,c^2} \norm{c^1-c^2}_1$.
\begin{enumerate}[b)]
\item Show that the minimum distance of a linear code is the minimum Hamming weight of its codewords, i.e., $\min_c \norm{c}_1$.
\end{enumerate}
Note that if $C$ has distance $d$, then it is possible to decode a message if less than $d/2$ of the bits are flipped.
The minimum relative distance of $C$ is $\delta=\frac1n \min\norm{c^1-c^2}_1$. So, ideally, we would like to have codes with constant minimum relative distance; in other words, we would like to say even if a constant fraction of the bits are flipped still one can recover the original message.
Next, we describe an error correcting code scheme based on bipartite expander graphs with constant rate and constant minimum relative distance.
A $(n_L,n_R,D,\gamma,\alpha)$ expander is a bipartite graph $G(L\cup R,E)$ such that $|L|=n_L,|R|=n_R$ and every vertex of $L$ has degree $D$ such that for any set $S\subseteq L$ of size $|S|\leq \gamma n_L$,
$$N(S) \geq \alpha|S|.$$
In the above, $N(S)\subseteq R$ is the number of neighbors of vertices of $S$.
One can generate the above family of bipartite expanders using ideas similar to Problem 1. We use the following theorem without proving it.
\begin{theorem}
For any $\eps>0$ and $m\leq n$ there exists $\gamma>0$ and $D\geq 1$ such that a $(n,m,D,\gamma,D(1-\eps))$-expander exists. Additionally, $D=\Theta(\log(n_L/n_R)/\eps)$ and $\gamma n_L=\Theta(\eps n_R/D)$.
\end{theorem}
Now, we describe how to construct the matrix $M$. We start with a $(n_L,n_R,D,\gamma,D(1-\eps))$ expander for $n_L=n,n_R=n-m$. For our calculations it is enough to let $n=2m$.
We name the vertices of $L$, $\{1,2,\dots,n\}$; so each bit of a codeword corresponds to a vertex in $L$. We let $M\in\{0,1\}^{(n-m)\times n}$ be the Tutte matrix corresponding to this graph, i.e., $M_{i,j}=1$ if and only if the $i$-th vertex in $R$ is connected to the $j$-th vertex in $L$.
Observe that by construction this code has rate $1/2$. Next, we see that $\delta$ is bounded away from $0$.
\begin{enumerate}[c)]
\item For a set $S\subseteq L$, let $U(S)$ be the set of unique neighbors of $S$, i.e., each vertex in $U(S)$ is connected to exactly one vertex of $S$. Show that for any $S\subseteq L$ such that $|S|\geq \gamma n$,
$$ |U(S)| \geq D(1-2\eps)|S|.$$
\end{enumerate}
\begin{enumerate}[d)]
\item Show that if $\eps<1/2$ the minimum relative distance of $C$ is at least $\gamma n$.
\end{enumerate}
The decoding algorithm is simple to describe but we will not describe it here.
\end{enumerate}
%\printbibliography
\end{document}