【大数据技术能力提升_3】线性回归学习

标记:

n:特征数

\(x^{(i)}\):第i个样本

\(x^{(i)}_{j}\):第i个样本的第j个特征

假设:\(h_\theta(x)=\theta^Tx=\theta_0+\theta_1x_1+\theta_2x_2+\cdots+\theta_nx_n\)

参数:\(\theta_0,\theta_1,\theta_2,\cdots,\theta_n\)

代价函数:\(J(\theta_0,\theta_1,\cdots,\theta_n)=\frac{1}{2m}\sum_{i=1}^{m}(h_\theta(x^{(i)})-y^{(i)})^2\)

梯度下降:

垂直到收敛{

\(\theta_j:=\theta_j- \alpha\frac{\partial }{\partial \theta_j}J(\theta_0,\theta_1,\theta_2,\cdots,\theta_n)\)

}

同步对所有\(j(0,\cdots,n)\)同步更新

当n=1时

垂直到收敛{

\(\theta_0:=\theta_0- \alpha\frac{1}{m}\sum_{i=1}^{m}(h_\theta(x^{(i)})-y^{(i)})\)

\(\theta_1:=\theta_1- \alpha\frac{1}{m}\sum_{i=1}^{m}(h_\theta(x^{(i)})-y^{(i)})\cdot x^{(i)}\)

}

当n>=1时

垂直到收敛{

\(\theta_j:=\theta_j- \alpha\frac{1}{m}\sum_{i=1}^{m}(h_\theta(x^{(i)})-y^{(i)})\cdot x^{(i)}_j\)

}

特征缩放

目的:可以加速梯度下降,减少迭代次数

将所有特征都放缩到大约\(-1\le x_i \le 1\)

均值标准化

\(x_i-\mu_i\)使得特征具有近似0的均值(\(x_0\)除外)

学习率

\(\theta_j:=\theta_j- \alpha\frac{\partial }{\partial \theta_j}J(\theta)\)

如何确认梯度下降正确工作?

随着迭代轮次的次数增多,\(J(\theta)\)的变化幅度非常小的时候,\(J(\theta)\)可以认为是最优的(即\(minJ(\theta)\)

如何选择学习率\(\theta\)

\(\alpha\)足够小,\(J(\theta)\)应当一直减小;但如果太小,则收敛速度太慢;但如果太大,则会出现偏离最小值,出现随着迭代轮次的次数增多,\(J(\theta)\)的变化幅度越来越大

选择\(\alpha\),尝试:\(\cdots 0.0001,0.0003,0.01,0.03,0.3,1,\cdots\)

复合特征

\(h_\theta(x)=\theta_0+\theta_1 \times width + \theta_2x_2 \times height\) 不如直接用面积作为特征效果好

正规方程

梯度下降:迭代求解参数\(\theta\)

正规方程:解析求解参数\(\theta\)(利用求导)

正规方程的公式:\(\theta=(X^TX)^{-1}X^Ty\)

梯度下降 VS 正规方程

梯度下降:需要选择学习率和迭代;但即使n很大也能很好的工作

正规方程:不需要选择学习率和迭代;但需要计算逆,以及n很大时计算速度会很慢

import numpy as np
import matplotlib.pyplot as plt

import matplotlib
#指定默认字体
matplotlib.rcParams['font.sans-serif'] = ['SimHei'] 
matplotlib.rcParams['font.family']='sans-serif'
#解决负号'-'显示为方块的问题
matplotlib.rcParams['axes.unicode_minus'] = False 
def loadDataSet(filename):
    X = []
    E = []#Y = []
    with open(filename, 'rb') as f:
        for idx, line in enumerate(f): #enumerate:枚举
            if idx < 1:
                title = line.decode('utf-8').strip()
                continue
                
            line = line.decode('utf-8').strip() #str.strip([chars]);移除字符串两边的空格或换行符            
            
            if not line:
                continue
                
            eles = line.split() #分割后,返回列表
            
            if idx ==0:
                numFea = len(eles)
            eles = map(float, eles) #map(function, iterable, ...)function:函数,iterable:一个或多个序列
            
            X.append(eles[:1])
            E.append(eles[1:])#Y.append([eles[-1]])
    return np.array(X), np.array(E), title
def h(theta, X):
    return np.dot(X, theta)
def J(theta, X, Y):
    m= len(X)
    return np.sum(np.dot((h(theta, X)-Y).T, (h(theta, X)-Y)))/(2 * m)
def bgd(alpha, maxloop, epsilon, X, Y):
    
    m,n = X.shape #m样本数, n是特征数
    
    theta = np.zeros((2,1)) 
    
    count = 0 #记录迭代次数
    converged = False #是否已经收敛的标志
    error = np.inf #当前得代价函数值
    errors = []
    thetas = {0:[theta[0,0]],1:[theta[1,0]]}
    
    while count < maxloop:
        if(converged):
            break
        count = count + 1
        temp1 = theta[0,0] - alpha / m * (h(theta, X) - Y).sum()
        temp2 = theta[1,0] - alpha / m * (np.dot(X[:,1][:,np.newaxis].T, (h(theta, X) - Y))).sum() #创建新的矩阵数轴:列[:,np.newaxis],行[np.newaxis,:]
        
        #同步更新
        theta[0, 0] = temp1
        theta[1, 0] = temp2
        thetas[0].append(temp1)
        thetas[1].append(temp2)
        
        error = J(theta, X, Y)
        errors.append(error)
        
        if(error < epsilon):
            converged = True
    return theta,errors,thetas
X, E, title = loadDataSet('test_1')#X, Y = loadDataSet('test_1')
print X.shape,X #shape返回矩阵的长度,如m*n的矩阵
print E
m, n = X.shape
X = np.concatenate((np.ones((m, 1)), X), axis=1) #concatenate中axis=1时,表示对应行的数组进行拼接;axis=0,依次拼接
X.shape
for i in range(X.shape[0]):#range(X.shape[0]):
    alpha = 0.001 #学习率
    maxloop = 20000 #最大迭代次数
    epsilon = 0.1 #收敛判断条件
    E1 = E[:, i:i+1]
    print E1,X
    result = bgd(alpha, maxloop, epsilon, X, E1)#result = bgd(alpha, maxloop, epsilon, X, Y)
    theta,errors,thetas = result
    
    xCopy = X.copy()
    xCopy.sort(0)
    yHat = h(theta, xCopy)
    
    xCopy[:,1].shape, yHat.shape, theta.shape
    
   #绘制回归直线
    plt.xlabel(u'收入')
    plt.ylabel(u'月份')
    plt.plot(xCopy[:,1], yHat,color='r')
    plt.scatter(X[:,1].flatten(), E1.T.flatten())
    plt.show()
xCopy = X.copy()
xCopy.sort(0)
yHat = h(theta, xCopy)
xCopy[:,1].shape, yHat.shape, theta.shape
((6L,), (6L, 1L), (2L, 1L))
#绘制回归直线
plt.xlabel(u'房屋面积')
plt.ylabel(u'房屋价格')
plt.plot(xCopy[:,1], yHat,color='r')
plt.scatter(X[:,1].flatten(), Y.T.flatten())
plt.show()
plt.xlim(1,1600)
plt.ylim(3,100)
plt.xlabel(u'迭代次数')
plt.ylabel(u'代价函数J')
plt.plot(range(len(errors)), errors)
plt.show()

png

#准备网格数据,以备画梯度下降过程图
%matplotlib
from mpl_toolkits.mplot3d import axes3d
size = 100
theta0Vals = np.linspace(-10,10, size)
theta1Vals = np.linspace(-2,4, size)
JVals = np.zeros((size, size))
for i in range(size):
    for j in range(size):
        col = np.matrix([[theta0Vals[i]], [theta1Vals[j]]])
        JVals[i,j] = J(col ,X ,Y)

theta0Vals, theta1Vals = np.meshgrid(theta0Vals, theta1Vals)
JVals = JVals.T
Using matplotlib backend: Qt5Agg
#绘制3D代价函数图形
contourSurf = plt.figure()
ax = contourSurf.gca(projection='3d')

ax.plot_surface(theta0Vals, theta1Vals, JVals, rstride=2, cstride=2, alpha=0.3,
               cmap=matplotlib.cm.rainbow, linewidth=0, antialiased=False)
ax.plot(theta[0], theta[1], 'rx')
ax.set_xlabel(r'$\theta_0$')
ax.set_ylabel(r'$\theta_1$')
ax.set_zlabel(r'$J(\theta)$')
#绘制代价函数等高线图
%matplotlib inline
plt.figure(figsize=(12,6))
CS = plt.contour(theta0Vals, theta1Vals, JVals, np.logspace(-5,6,30))
plt.clabel(CS ,inline=1, fontsize=10)
#绘制最优解
plt.plot(theta[0,0], theta[1,0], 'rx', markersize=10, linewidth=3)
#绘制梯度下降过程
plt.plot(thetas[0], thetas[1], 'rx', markersize=3, linewidth=1)
plt.plot(thetas[0], thetas[1], 'r-', markersize=3, linewidth=1)
plt.xlabel(r'$\theta_0$')
plt.ylabel(r'$\theta_1$')
posted @ 2019-11-18 21:16  CQ_LQJ  阅读(313)  评论(0编辑  收藏  举报