一元线性回归
\[y=a+b x+\epsilon
\]
\[\begin{aligned}
Q \equiv Q(a, b) &=\sum_{i=1}^{n} e_{i}^{2} \\
&=\sum_{t=1}^{n}\left(y_{i}-\hat{y}_{i}\right)^{2} \\
&=\sum_{i=1}^{n}\left(y_{i}-a-b x_{i}\right)^{2}
\end{aligned}
\]
\[\begin{aligned}
a &=\bar{y}-b \bar{x} \\
&=\sum_{i=1}^{n}\left[\frac{1}{n}-\frac{\bar{x}\left(x_{i}-\bar{x}\right)}{\sum_{j=1}^{n}\left(x_{i}-\bar{x}\right)^{3}}\right] y_{i}
\end{aligned}
\]
\[\begin{aligned}
\operatorname{Var}(a) &=\sum_{i=1}^{n}\left[\frac{1}{n}-\frac{\frac{i}{x}\left(x_{i}-\bar{x}\right)}{\sum_{j=1}^{n}\left(x_{i}-\bar{x}\right)^{2}}\right]^{2} \operatorname{Var}\left(y_{i}\right) \\
&=\left[\frac{1}{n}+\frac{\bar{x}^{2}}{\sum_{i=1}^{n}\left(x_{i}-\bar{x}\right)^{2}}\right] \sigma^{2}
\end{aligned}
\]
\[y=\beta_{0}+\beta_{1} x_{1}+\beta_{2} x_{2}+\ldots+\beta_{p} x_{p}+\varepsilon
\]
Gauss-Markov条件:
\[\begin{array}{l}
E\left(\varepsilon_{i}\right)=0, \operatorname{Var}\left(\varepsilon_{i}\right)=\sigma^{2}, i=1, \ldots, n \\
\operatorname{Cov}\left(\varepsilon_{i}, \varepsilon_{j}\right)=0, i \neq j, \text { andi }, j=1, \ldots, n
\end{array}\]
正态分布条件:
\[\left\{\begin{array}{l}
\varepsilon_{i} \sim N\left(0, \sigma^{2}\right) \\
\varepsilon_{1}, \varepsilon_{2}, \ldots, \varepsilon_{n} \quad \text { 相互独立 }
\end{array}\right.\]
一元线性的矩阵表示
\[y_{i}=\beta_{0}+\beta_{1} x_{i}+\varepsilon_{i}
\]
\[\begin{array}{l}
\boldsymbol{y}=\left(y_{1}, \ldots, y_{n}\right)^{\top}, \quad \mathbf{1}=(1, \ldots, 1)^{\top} \\
\boldsymbol{x}=\left(x_{1}, \ldots, x_{n}\right)^{\top}, \quad \boldsymbol{X}=(1, \boldsymbol{x})_{n \times 2} \\
\varepsilon=\left(\varepsilon_{1}, \ldots, \varepsilon_{n}\right)^{\top}, \quad \boldsymbol{\beta}=\left(\beta_{0}, \beta_{1}\right)^{\top}
\end{array}\]
\[\left\{\begin{array}{l}
y=X \beta+\varepsilon \\
E(\varepsilon)=0 \\
\operatorname{Var}(\varepsilon)=\sigma^{2} I_{n}
\end{array}\right.\]
\[Q\left(\beta_{0}, \beta_{1}\right)=\sum_{i=1}^{n}\left(y_{i}-\beta_{0}-\beta_{1} x_{i}\right)^{2}
\]
\[\hat{\boldsymbol{\beta}}=\left(\hat{\beta}_{0}, \hat{\beta}_{1}\right)^{\top}
\]
\[\hat{\boldsymbol{\beta}}=\arg \min \limits_{\boldsymbol{\beta} \in R^{2}} Q(\boldsymbol{\beta})
\]
\[\left\{\begin{array}{l}
\frac{\partial Q}{\partial \beta_{0}}=\sum_{i=1}^{n}y_{i}-\beta_{0}-\beta_{1} x_{i}=0 \\
\frac{\partial Q}{\partial \beta_{1}}=x_i\sum_{i=1}^{n}\left(y_{i}-\beta_{0}-\beta_{1} x_{i}\right)= 0
\end{array}\right.\]
适当化简:
\[\left\{\begin{array}{ll}
\hat{\beta_{0}}+\bar{x} \hat{\beta_{1}} & =\bar{y} \\
\bar{x} \hat{\beta_{0}}+\frac{\sum_{i=1}^{n} x_{i}^{2}}{n} \hat{\beta_{1}} & =\frac{\sum\limits_{i=1}^{n} x_{i} y_{i}}{n}
\end{array}\right.\]
\[\rightarrow n\bar{x}\bar{y}-\sum x_iy_i=\hat{\beta_1}(\sum x_i^2-n\bar{x}^2)
\]
\[S_{xx}=\sum\limits_{i=1}^{n}\left(x_{i}-\bar{x}\right)^{2}=\sum\limits_{i=1}^{n}x_i^2-n\bar{x}^2, S_{x y}=\sum\limits_{i=1}^{n} x_{i} y_{i}-n \bar{x} \bar{y}
\]
\[\begin{array}{l}
\hat{\beta}_{0}=\bar{y}-\hat{\beta}_{1} \bar{x} \\
\hat{\beta}_{1}=\frac{S_{x y}}{S_{x x}}
\end{array}\]
性质:
线性性:
线性的定义: 关于随机变量$$\left{y_{i}, i=1, \ldots, n\right}$$的线性称之为线性估计量
\[\begin{aligned}
\hat{\beta}_{1} &=\frac{\sum x_{i} y_{i}}{\sum x_{i}^{2}}=\frac{\sum x_{i}\left(Y_{i}-\bar{Y}\right)}{\sum x_{i}^{2}} (加减分离)\\
&=\frac{\sum x_{i} Y_{i}}{\sum x_{i}^{2}}-\frac{\bar{Y} \sum x_{i}}{\sum x_{i}^{2}}=\frac{\sum x_{i} Y_{i}}{\sum x_{i}^{2}}=KY_i\end{aligned}\]
\(k_{i}=\frac{\left(x_{i}-\bar{x}\right)}{\sum_{i=1}^{n}\left(x_{i}-\bar{x}\right)^{2}}\)
\[\sum x_{i}=\sum\left(X_{i}-\bar{X}\right)^2=\sum X_{i}^2-n \bar{X}^2=0
\]
无偏性:
\(\bar{x} =\frac{1}{n}\sum_{i=1}^{n}x_i\)
\(\hat{\beta}_{1}=\frac{S_{x y}}{S_{x x}}=\frac{y_i-\bar{y}}{x_i-\bar{x}}=\beta_{1}(error)\)
\(E(\hat{\beta}_{1})=E(\frac{y_i-\bar{y}}{x_i-\bar{x}})=E(\beta_{1})=\beta_{1}(error)\)
\[\begin{aligned}
E\left(\hat{\beta}_{1}\right) &=E\left(\sum b_{i} y_{i}\right)=\sum b_{i} E\left(y_{i}\right)=\sum b_{i}\left(\beta_{0}+\beta_{1} x_{i}\right) \\
&=\sum \frac{x_{i}-\bar{x}}{\sum\left(x_{i}-\bar{x}\right)^{2}}\left(\beta_{0}+\beta_{1} x_{i}\right) \\
&=\beta_{0} \sum \frac{x_{i}-\bar{x}}{\sum\left(x_{i}-\bar{x}\right)^{2}}+\beta_{1} \sum \frac{\left(x_{i}-\bar{x}\right) x_{i}}{\sum\left(x_{i}-\bar{x}\right)^{2}}=\beta_{1}
\end{aligned}
\]
\[\begin{array}{l}
\operatorname{Var}\left(\hat{\beta}_{1}\right)=\operatorname{Var}\left(\sum b_{i} y_{i}\right)=\sum \operatorname{Var}\left(b_{i} y_{i}\right)=\sum b_{i}^{2} \operatorname{Var}\left(y_{i}\right)=\sum\left(\frac{x_{i}-\bar{x}}{\sum\left(x_{i}-\bar{x}\right)^{2}}\right)^{2} \sigma^{2} \\
=\sigma^{2} \frac{\sum\left(x_{i}-\bar{x}\right)^{2}}{\left[\sum\left(x_{i}-\bar{x}\right)^{2}\right]^{2}}=\frac{\sigma^{2}}{\sum\left(x_{i}-\bar{x}\right)^{2}}=\frac{\sigma^{2}}{S_{x x}}
\end{array}
\]
\[\begin{aligned}
\hat{\beta}_{0} &=\bar{y}-\hat{\beta}_{1} \bar{x}=\frac{1}{n} 1^{\prime} \boldsymbol{y}-\bar{x} \boldsymbol{c}^{\prime} \boldsymbol{y} \\
\operatorname{Var}\left(\hat{\beta}_{0}\right) &=\operatorname{Var}\left(\frac{1}{n} \mathbf{1}^{\prime} \boldsymbol{y}\right)+\operatorname{Var}\left(\bar{x} \boldsymbol{c}^{\prime} \boldsymbol{y}\right)-2 \operatorname{Cov}\left(\frac{1}{n} \mathbf{1}^{\prime} \boldsymbol{y}, \bar{x} \boldsymbol{c}^{\prime} \boldsymbol{y}\right)
\end{aligned}
\]
最佳线性无偏估计(BLUE)
Best Linear Unbiased Estimation
对于参数 \(\theta\) 的一个无偏估计 \(\hat{\theta}\) ,如果对于任何一个它的无偏估计 \(\tilde{\theta}\), 都有 \(\operatorname{var}(\hat{\theta}) \leq \operatorname{var}(\tilde{\theta}),\) 则称 \(\hat{\theta}\) 是 \(\theta\) 的最佳线性无偏估计。
平方和分解
SST (total sum of squares):总离差平方和
\[SST=\sum_{i=1}^{n}\left(y_{i}-\bar{y}\right)^{2}
\]
Tips:$$\sum_{i=1}{n}\left(y_{i}-\bar{y}\right)2=\sum_{i=1}^{n} y_{i}^2-n \bar{y}^2$$
SSR(Sum of Squares for regression):回归平方和
\[SSR=\sum_{i=1}^{n}\left(\hat{y}_{i}-\bar{y}\right)^{2} =\sum(\hat{\beta_0}+\hat{\beta_1}x_i-\bar{y})=\hat{\beta_1}^2l_{xx}
\]
SSE:
\[S S E=\sum_{i=1}^{n}\left(y_{i}-\hat{y}_{i}\right)^{2}=e_i^2
\]
\[\begin{aligned}
y-\bar{y}=(y-\hat{y})+(\hat{y}-\bar{y}) & \\
\Rightarrow \sum(y-y)^{2}=\sum(y-\hat{y})^{2}+\Sigma(\hat{y}-\bar{y})^{2}+2 \Sigma(y-\hat{y})(\hat{y}-\bar{y}) \\
\because \Sigma(y-\hat{y})(\hat{y}-\bar{y}) &=\Sigma(y-\hat{y})(a+b x-\bar{y}) \\
&=\Sigma(y-\hat{y})[(a-\bar{y})+b x] \\
&=(a-\bar{y}) \Sigma(y-\hat{y})+b \Sigma(y-\hat{y}) x \\
&=(a-\bar{y}) \Sigma(y-a-b x)+b \Sigma(y-a-b x) x
\end{aligned}
\]
根据最小二乘法原理, 有:
\[\Sigma(y-a-b x)=0, \quad \Sigma(y-a-b x) x=0
\]
\(\therefore \Sigma(y-\hat{y})(\hat{y}-\bar{y})=0\)
\(\therefore \Sigma(y-y)^{2}=\Sigma(y-\hat{y})^{2}+\Sigma(\hat{y}-\bar{y})^{2}\)
\[SST=SSR+SSE
\]
OLS的方差
\[\begin{aligned}
\hat{\beta}_{0} &=\bar{y}-\hat{\beta}_{1} \bar{x}=\frac{1}{n} \mathbf{1}^{\prime} \boldsymbol{y}-\bar{x} \boldsymbol{c}^{\prime} \boldsymbol{y} \\
\operatorname{var}\left(\hat{\beta}_{0}\right) &=\operatorname{Var}\left(\frac{1}{n} \mathbf{1}^{\prime} \boldsymbol{y}\right)+\operatorname{Var}\left(\bar{x} \boldsymbol{c}^{\prime} \boldsymbol{y}\right)-2 \operatorname{cov}\left(\frac{1}{n} \boldsymbol{1}^{\prime} \boldsymbol{y}, \bar{x} \boldsymbol{c}^{\prime} \boldsymbol{y}\right)
\end{aligned}
\]