1. Projected Gradient Descent
\[\begin{align}
y_{t+1} &= x_t-\gamma \nabla f(x_t)\\
x_{t+1} &= \prod_X(y_{t+1}) = \arg\min_{x}||x-y_{t+1}||^2
\end{align}
\]
\({\large \textbf{Theorem 3.1}:} X \text{ be closed and convex}\)
\[\begin{align}
&(i) (x-\prod_X(y))^T(y-\prod_X(y))\leq 0\\
&(ii)||x-\prod_X(y)||^2+||y-\prod_X(y)||^2\leq ||x-y||^2
\end{align}
\]
2. Bounded gradients: \(O(1/\epsilon^2)\) steps
\({\large \textbf{Theorem 3.2}:} f:dom(f)\rightarrow\mathbb{R}\text{ be convex and differentiable, }X\in dom(f) \text{ be convex and closed. }x^*\text{ is the minimizer. Suppose }||x_0-x^*||^2\leq R, ||\nabla f(x)||\leq B \text{ for all }x\in X.\text{ Choosing the stepsize:}\)
\[\begin{align}
\gamma = \frac{R}{B\sqrt{T}}
\end{align}
\]
\(\text{Projected gradient descent yields:}\)
\[\frac{1}{T}\sum_{t=0}^{T-1}(f(x_t)-f(x^*))\leq \frac{RB}{\sqrt{T}}
\]
\(\large\textbf{Proof:}\)
\(\text{From }(1) \text{ and previous vanilla analysis:}\)
\[\begin{align}
g_t^T(x_t-x^*)&=\frac{1}{2\gamma}[\gamma^2||g_t||^2+||x_t-x^*||^2-||y_{t+1}-x^*||^2]
\end{align}
\]
\(\text{From (4): let }x=x^*,y=y_{t+1}\)
\[\begin{align}
||x^*-\prod_X(y_{t+1})||^2+||y_{t+1}-\prod_X(y_{t+1})||^2\leq ||x^*-y_{t+1}||^2
\end{align}
\]
\(\text{i.e.}\)
\[\begin{align}
||x^*-x_{t+1}||^2+||y_{t+1}-x_{t+1}||^2\leq ||x^*-y_{t+1}||^2
\end{align}
\]
\(\text{Then we drop the second term:}\)
\[\begin{align}
||x^*-x_{t+1}||^2\leq ||x^*-y_{t+1}||^2
\end{align}
\]
\(\text{Hence we get from (6):}\)
\[\begin{align}
g_t^T(x_t-x^*)&=\frac{1}{2\gamma}[\gamma^2||g_t||^2+||x_t-x^*||^2-||y_{t+1}-x^*||^2]\\
&\leq \frac{1}{2\gamma}[\gamma^2||g_t||^2+||y_t-x^*||^2-||y_{t+1}-x^*||^2]
\end{align}
\]
\(\text{From convexity: }f(x^*)\geq f(x_t)+g_t^T(x^*-x_t),\text{ hence:}\)
\[\begin{align}
f(x_t)-f(x^*)&\leq g_t^T(x_t-x^*)\\
&\leq \frac{1}{2\gamma}[\gamma^2||g_t||^2+||y_t-x^*||^2-||y_{t+1}-x^*||^2]
\end{align}
\]
\(\text{Finally:}\)
\[\begin{align}
\frac{1}{T}\sum_{t=0}^{T-1}[f(x_t)-f(x^*)]&\leq \frac{\gamma}{2}\sum_{t=0}^{T-1}||g_t||^2+\frac{1}{2\gamma}[||y_0-x^*||^2-||y_T-x^*||^2]\\
&\leq \frac{\gamma}{2}TB^2+\frac{1}{2\gamma}R^2
\end{align}
\]
3. Smooth convex functions: \(O(1/\epsilon)\) steps
\({\large \textbf{Lemma 3.3}: } f:dom(f)\rightarrow \mathbb{R}\text{ be differentiable and smooth with }L.\text{ Choosing stepsize: }\gamma = \frac{1}{L}\)
\(\text{Projected gradient descent satisfies:}\)
\[\begin{align}
f(x_{t+1})\leq f(x_t)-\frac{1}{2L}||g_t||^2+\frac{L}{2}||y_{t+1}-x_{t+1}||^2
\end{align}
\]
\(\large\textbf{Proof:}\)
\(\text{From smoothness:}\)
\[\begin{align}
f(x_{t+1})&\leq f(x_t)+g_t^T(x_{t+1}-x_{t})+\frac{L}{2}||x_t-x_{t+1}||^2\\
&= f(x_t)-\frac{1}{2L}||g_t||^2-\frac{L}{2}(||x_{t}-x_{t+1}||^2-||y_{t+1}-x_{t+1}||^2)+\frac{L}{2}||x_t-x_{t+1}||^2\\
&\leq f(x_t)-\frac{1}{2L}||g_t||^2+\frac{L}{2}||y_{t+1}-x_{t+1}||^2
\end{align}
\]
\({\large \textbf{Theorem 3.4}:}f:dom(f)\rightarrow \mathbb{R}\text{ be convex and differentiable. Suppose }f\text{ is smooth with parameter }L. \text{ Choosing stepsize: }\gamma = \frac{1}{L}.\text{ Projected gradient descent satifies:}\)
\[\begin{align}
f(x_T)-f(x^*)\leq \frac{L}{2T}||x_0-x^*||^2
\end{align}
\]
\(\large \textbf{Proof:}\)
\(\text{From (16)}:\)
\[\begin{align}
\frac{1}{2L}||g_t||^2\leq f(x_t)-f(x_{t+1})+\frac{L}{2}||y_{t+1}-x_{t+1}||^2
\end{align}
\]
\(\text{Go back to }\)
\[\begin{align}
g_t^T(x_t-x^*)=\frac{1}{2\gamma}[\gamma^2||g_t||^2+||x_t-x^*||^2-||y_{t+1}-x^*||^2]
\end{align}
\]
\(\text{and}\)
\[\begin{align}
||x^*-x_{t+1}||^2+||y_{t+1}-x_{t+1}||^2\leq ||x^*-y_{t+1}||^2
\end{align}
\]
\(\text{Previously, we just drop the second term, now we keep it:}\)
\[\begin{align}
g_t^T(x_t-x^*)&=\frac{1}{2\gamma}[\gamma^2||g_t||^2+||x_t-x^*||^2-||y_{t+1}-x^*||^2]\\
&\leq \frac{1}{2\gamma}[\gamma^2||g_t||^2+||x_t-x^*||^2-||x_{t+1}-x^*||^2-||y_{t+1}-x_{t+1}||^2]
\end{align}
\]
\(\text{From convexity: }f(x_t)-f(x^*)\leq g_t^T(x_t-x^*)\)
\[\begin{align}
\sum_{t=0}^{T-1}[f(x_t)-f(x^*)]&\leq \sum_{t=0}^{T-1}g_t^T(x_t-x^*)\\
&\leq \frac{1}{2L}\sum_{t=0}^{T-1}||g_t||^2+\frac{L}{2}||x_0-x^*||^2-\frac{L}{2}\sum_{t=0}^{T-1}||y_{t+1}-x_{t+1}||^2
\end{align}
\]
\(\text{Now take the upper bound:}\)
\[\begin{align}
\frac{1}{2L}\sum_{t=0}^{T-1}||g_t||^2&\leq \sum_{t=0}^{T-1}[f(x_t)-f(x_{t+1})+\frac{L}{2}||y_{t+1}-x_{t+1}||^2]\\
&=f(x_0)-f(x_T)+\frac{L}{2}\sum_{t=0}^{T-1}||y_{t+1}-x_{t+1}||^2
\end{align}
\]
\(\text{Finally, combine (29) and (27):}\)
\[\begin{align}
\sum_{t=0}^{T-1}[f(x_t)-f(x^*)]&\leq f(x_0)-f(x_T)+\frac{L}{2}||x_0-x^*||^2
\end{align}
\]
\(\text{Hence:}\)
\[\begin{align}
f(x_T)-f(x^*)\leq \frac{1}{T}\sum_{t=1}^{T}[f(x_t)-f(x^*)]\leq \frac{L}{2T}||x_0-x^*||^2
\end{align}
\]