\begin{thebibliography}{1} \bibitem{duchi2011adaptive} John Duchi, Elad Hazan, and Yoram Singer. \newblock Adaptive subgradient methods for online learning and stochastic optimization. \newblock {\em The Journal of Machine Learning Research}, 12:2121--2159, 2011. \bibitem{golovin2013large} Daniel Golovin, D~Sculley, H~Brendan McMahan, and Michael Young. \newblock Large-scale learning with less ram via randomization. \newblock {\em arXiv preprint arXiv:1303.4664}, 2013. \bibitem{mcmahan2011follow} H~Brendan McMahan. \newblock Follow-the-regularized-leader and mirror descent: Equivalence theorems and l1 regularization. \newblock In {\em International Conference on Artificial Intelligence and Statistics}, pages 525--533, 2011. \bibitem{mcmahan2010adaptive} H~Brendan McMahan and Matthew Streeter. \newblock Adaptive bound optimization for online convex optimization. \newblock {\em arXiv preprint arXiv:1002.4908}, 2010. \bibitem{xiao2010dual} Lin Xiao et~al. \newblock Dual averaging methods for regularized stochastic learning and online optimization. \newblock {\em Journal of Machine Learning Research}, 11(2543-2596):4, 2010. \end{thebibliography}