吴恩达机器学习课后作业01——ex1（单变量线性回归和多变量线性回归）

Programming Exercise 1: Linear Regression

大致说明：
假设你是一家连锁餐厅的首席执行官，正在考虑在不同的城市开设一家新的分店。你已经从各个城市的人口和卡车中获得了数据，需要预测人口和利润之间的联系。
根据ex1data1.txt（第一列是城市人口，第二列是对应的利润，其中负值代表着亏损）、ex1data2.txt（多变量使用到的数据）中的数据，进行线性拟合

%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt

2 Linear regression with one variable

datafile = 'data/ex1data1.txt'
cols = np.loadtxt(datafile,delimiter=',',usecols=(0,1),unpack=True) #读数据，分两列存储，用向量保存
#Form the usual "X" matrix and "y" vector
X = np.transpose(np.array(cols[:-1])) # cols[:-1] 取第一个向量(1,97)，即人口数据。最终转换为(97,1)
y = np.transpose(np.array(cols[-1:])) # cols[-1:] 取第二个向量，即利润数据
m = y.size # number of training examples

2.1 Plotting the Data

#绘制数据
plt.plot(X[:,0],y[:,0],'ro',markersize=5) # 设置红圆图样，大小为5
plt.ylabel('Profit in $10,000s')
plt.xlabel('Population of City in 10,000s')
plt.title('POPULATION AND PROFIT')

ex1_6_1

2.2 Gradient Descent

iterations = 1500 # 最大迭代次数
alpha = 0.01 # 初始学习率为0.01
X = np.insert(X,0,1,axis=1) # 向参数矩阵X中增加一个维度，用来表示θ0(theta_0) 的参数，即增加全为1的常数列

线性回归算法优化的目标是：选取最有可能与数据相拟合的直线。数据与直线的误差，称为建模误差 modeling error。为了使建模误差最小，我们需要调整参数θ0 和 θ1，使得代价函数Cost function J(θ0, θ1)的值最小。

在各种代价函数中，最常用的是平方误差代价函数 Squared error cost function。

"""
theta是一个n维列向量
X是一个m行n列的矩阵（单变量情况下，是两列（包含一个变量和theta_0））
y是一个m行1列的矩阵（m维的列向量）
"""
def h(X,theta): # 计算线性假设函数h(x)
    return np.dot(X,theta) #得到h(x)结果

def computeCost(X,mytheta,y): # 定义代价函数
    return float((1./(2*m)) * np.dot((h(X,mytheta)-y).T,(h(X,mytheta)-y)))#利用转置与自身的乘积，实现差的平方累加

#测试：theta参数初始为0（h(x)=0），代价结果应该为32.07；注意：*.shape 打印（行数，列数）
initial_theta = np.zeros((X.shape[1],1)) # 将theta参数初始化为0，即initial_theta是一个n维的列向量（维数与X的特征数有关，这里是二维列向量）
print(computeCost(X,initial_theta,y))# 打印测试的平均均方代价结果，正确的平均均方代价：32.07

32.07273387745567

#定义梯度下降函数：随着梯度下降的每一步，参数theta_j都会接近最优值（最小值），从而达到损失最低的J(θ)
def descendGradient(X, theta_start):
    """
    theta_start：初始的参数，为0
    X：m行n列的矩阵（m是样本数量，n指特征数，这里为2）
    """
    theta = theta_start
    costVector = [] #记录代价的变化
    thetahistory = [] #记录参数的变化
    for meaninglessvariable in range(iterations): #默认步长为1，循环iterations次（迭代执行的次数）
        tmptheta = theta
        # 记录每次迭代时，当前假设函数的代价
        costVector.append(computeCost(X,theta,y))
        # 记录每次参数迭代的变化
        thetahistory.append(list(theta[:,0]))
        # 遍历每个参数，分别进行梯度下降（simultaneously update θj for all j）
        for j in range(len(tmptheta)): # θ_0 和 θ_1
            tmptheta[j] = theta[j] - (alpha/m)*np.sum((h(X,theta) - y)*np.array(X[:,j]).reshape(m,1))
        theta = tmptheta # 更新当前参数
    return theta, thetahistory, costVector

#执行梯度下降，寻找使假设函数代价最小的最优参数
initial_theta = np.zeros((X.shape[1],1)) # X是m行n列的矩阵，所以参数是n行1列
theta, thetahistory, costVector = descendGradient(X,initial_theta)

#定义绘制代价变化的函数曲线
def plotCostByStep(costVector):
    
    plt.plot(range(len(costVector)),costVector,'b-')
    plt.title("Cost of each step")
    plt.xlabel("Iteration number")
    plt.ylabel("Cost Value")
    plt.xlim([-0.05*iterations,1.05*iterations])
    plt.ylim([4,7])
    plt.show()
    
#定义参数变化的函数曲线
def plotThetaByStep(thetahistory):
    data = np.array(thetahistory)
    thetahistory_0 = data[:,0]
    thetahistory_1 = data[:,1]
    
    plt.plot(range(thetahistory_0.size),thetahistory_0,'g-',label="Theta_0")
    plt.plot(range(thetahistory_1.size),thetahistory_1,'r-',label="Theta_population")
    plt.legend()
    plt.title("Thetas of each step")
    plt.xlabel("Iteration number")
    plt.ylabel("Theta Value")

plotCostByStep(costVector)
plotThetaByStep(thetahistory)

ex1_13_0 ex1_13_1

#返回预测函数的预测值（假设函数）
def myfit(xval):
    return theta[0] + theta[1]*xval

plt.plot(X[:,1],y[:,0],'ro',markersize=5,label='Training Data')#绘制点图
plt.plot(X[:,1],myfit(X[:,1]),'b-',label = 'Hypothesis: h(x) = %0.2f + %0.2fx'%(theta[0],theta[1]))
plt.grid(True) #网格
plt.ylabel('Profit in $10,000s')
plt.xlabel('Population of City in 10,000s')
plt.legend() #显示图例

ex1_14_1

2.4 Visualizing J($\theta$)

#导入matplotlib的三维必要包
from mpl_toolkits.mplot3d import axes3d, Axes3D
import itertools

plt.figure(figsize=(10,10))
ax = plt.axes(projection='3d')

xvals = np.arange(-5,5,.3)
yvals = np.arange(-1,4,.1)
myxs, myys, myzs = [], [], []
for david in xvals:
    for kaleko in yvals:
        myxs.append(david)
        myys.append(kaleko)
        myzs.append(computeCost(X,np.array([[david], [kaleko]]),y))

scat = ax.scatter(myxs,myys,myzs,c=np.abs(myzs),cmap=plt.get_cmap('YlOrRd'))

plt.xlabel(r'$\theta_0$',fontsize=20)
plt.ylabel(r'$\theta_1$',fontsize=20)
plt.title('Cost (Minimization Path Shown in Blue)',fontsize=30)
plt.plot([x[0] for x in thetahistory],[x[1] for x in thetahistory],costVector,'b-')

ex1_16_1

3. Linear Regression with multiple variables

%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt

datafile = 'data/ex1data2.txt'#房价训练集：第一列为房子的大小，第二列为卧室数量，第三列为房子的价格
#读取原始数据
cols = np.loadtxt(datafile,delimiter=',',usecols=(0,1,2),unpack=True) #对数据进行分隔分组，分为三组
#对数据进行分组处理：参数矩阵和y值向量
X = np.transpose(np.array(cols[:-1])) #取前两租（房子大小、卧室数量）作为参数矩阵（特征矩阵）
y = np.transpose(np.array(cols[-1:])) #取最后一组，即y值
m = y.size # 训练集大小
#在X参数矩阵中，插入回归表达式中常数项的参数theta_0的系数
X = np.insert(X,0,1,axis=1)

#可视化数据，判断是否需要特征缩放（feature normalization）
def plotFeatureCounts(x,label):
    plt.hist(x,bins=20)
    plt.title(label)
    plt.xlabel('Column Value')
    plt.ylabel('Counts')
    plt.show()
    
plotFeatureCounts(X[:,0],"Theta_0")
plotFeatureCounts(X[:,1],"The size of the house")
plotFeatureCounts(X[:,2],"The number of bedrooms")

ex1_19_0

ex1_19_1

ex1_19_2

#进行特征缩放（采用Z-score归一化方法）
stored_feature_means, stored_feature_stds = [], []
Xcopy = X.copy()
for icol in range(Xcopy.shape[1]):
    stored_feature_means.append(np.mean(Xcopy[:,icol]))#存放各个特征的均值
    stored_feature_stds.append(np.std(Xcopy[:,icol]))#存放各个特征的标准差
    #theta_0参数无须特征缩放
    if not icol: continue
    #对各个特征进行归一化
    Xcopy[:,icol] = (Xcopy[:,icol] - stored_feature_means[-1])/stored_feature_stds[-1]

#可视化特征缩放后的数据
plt.xlim([-5,5])
plt.hist(Xcopy[:,0],label = 'Theta_0')
plt.hist(Xcopy[:,1],label = 'Theta_size')
plt.hist(Xcopy[:,2],label = 'Theta_bedrooms')
plt.title('Feature Normalization Accomplished')
plt.xlabel('Column Value')
plt.ylabel('Counts')
plt.legend()

ex1_21_1

#执行多变量的梯度下降，初始参数置0
initial_theta = np.zeros((Xcopy.shape[1],1))
theta, thetahistory, costVector = descendGradient(Xcopy,initial_theta)

def plotThetaMultiByStep(thetahistory):
    data = np.array(thetahistory)
    thetahistory_0 = data[:,0]
    thetahistory_1 = data[:,1]
    thetahistory_2 = data[:,2]
    plt.plot(range(thetahistory_0.size),thetahistory_0,'g-',label="Theta_0")
    plt.plot(range(thetahistory_1.size),thetahistory_1,'r-',label="Theta_size")
    plt.plot(range(thetahistory_2.size),thetahistory_2,'b-',label="Theta_bedrooms")
    plt.legend()
    plt.title("Thetas of each step")
    plt.xlabel("Iteration number")
    plt.ylabel("Theta Value")
    
def plotCostMultiByStep(costVector):
    plt.plot(range(len(costVector)),costVector,'b-')
    plt.title("Cost of each step")
    plt.xlabel("Iteration number")
    plt.ylabel("Cost Value")
    plt.show()
    
print(theta) #打印最终参数
#绘制 代价变化和参数变化 图
plotCostMultiByStep(costVector)
plotThetaMultiByStep(thetahistory)

[[340412.56301439]
 [109371.67272252]
 [ -6502.3992545 ]]

ex1_22_1

ex1_22_2

#进行测试
print("Check of result: What is price of house with 1650 square feet and 3 bedrooms?")
ytest = np.array([1650.,3.])
ytestscaled = [(ytest[x]-stored_feature_means[x+1])/stored_feature_stds[x+1] for x in range(len(ytest))]
ytestscaled.insert(0,1) # 向测试数据中，插入theta_0的参数：1
print("$%0.2f" % float(h(ytestscaled,theta))) # 计算回归预测结果

Check of result: What is price of house with 1650 square feet and 3 bedrooms?
$293098.15

#正规方程法
from numpy.linalg import inv
#正规方程求解参数
def normEqtn(X,y):
    return np.dot(np.dot(inv(np.dot(X.T,X)),X.T),y)

print("Normal equation prediction for price of house with 1650 square feet and 3 bedrooms")
print("$%0.2f" % float(h([1,1650.,3],normEqtn(X,y))))

Normal equation prediction for price of house with 1650 square feet and 3 bedrooms
$293081.46

posted @ 2022-03-24 19:47 Komorebi_WH 阅读(374) 评论(0) 编辑收藏举报

刷新页面返回顶部

Loading

Komorebi's Blog