使用Python手撸了一个线性回归模型
## 线性回归模型 import numpy as np import pandas as pd import matplotlib.pyplot as plt def getData(): r""" 使用numpy构造满足条件的随机数() :return: """ dataX = np.random.randint(1,100,size=(1,100)) dataY = np.random.randint(1,100,size=(1,100)) data = np.concatenate((dataX,dataY), axis = 0) G1tfa = data > 50 G1tfi = np.logical_and(G1tfa[0,:], G1tfa[1,:]) G1index = np.nonzero(G1tfi)[0] G1 = data[:, G1index] G2tfa = data < 50 G2tfi = np.logical_and(G2tfa[0,:], G2tfa[1,:]) G2index = np.nonzero(G2tfi)[0] G2 = data[:, G2index] return (G1, G2) def getData2(): r""" 使用numpy生成随机数; 使用pandas构造满足条件的随机数; :return: """ df = pd.DataFrame() df['X'] = np.random.randint(1,100,size=(100)) df['Y'] = np.random.randint(1,100,size=(100)) G1 = df[(df['X']>50) & (df['Y']>50)] G2 = df[(df['X']<50) & (df['Y']<50)] return (G1.values.T, G2.values.T) def getData3(): r""" 生成正太分布的随机数; :return: """ # data = np.random.uniform(0, 1, size = 1000)#随机均匀采样 # data3 = np.random.rand(1000) #随机均匀分布 # # data2 = np.random.normal(0, 1, size = 1000)#正太分布,均值为0,方差为1 # data4 = np.random.randn(1000)#正太分布,均值为0,方差为1 point1 = (0.5, 0.5) point2 = (2.5, 2.5) a = np.random.normal(point1[0], 0.3, size = (1000,1)) b = np.random.normal(point1[1], 0.2, size = (1000,1)) c = np.random.normal(point2[0], 0.2, size = (1000,1)) d = np.random.normal(point2[1], 0.3, size = (1000,1)) G1 = pd.DataFrame(data = np.concatenate((a,b),axis=1), columns=['X','Y']) G2 = pd.DataFrame(data = np.concatenate((c,d),axis=1), columns=['X','Y']) return (G1.values.T, G2.values.T) def getData4(): r""" 生成可以用来线性回归的数据 :return: """ w = 1.2 b = 0.2 x = np.linspace(1,100,100) noise = np.random.randn(100)*10 y = w * x + b + noise return (x,y) def linefit(x, y): r""" 采用梯度下降的方法更新w和b :param x: :param y: :return: """ #初始化迭代参数; w = 0 b = 0 w_last = 0.001 b_last = 0.001 loss_last = 6330 stop = 10000 i = 0 eta = 0.00001#学习率,如果学习率很大,将无法收敛,loss震荡很大; count = 0 #如果100次都没进步,就停止迭代 loss_list = [] #用来保存loss曲线 w_list = [0.001, 0]#用来保存每次迭代后的w b_list = [0.001, 0]#用来保存每次迭代后的b while(i < stop): loss = getLoss(w,b,x,y) w_t = w - eta*(loss-loss_last)/(w-w_last) b_t = b - eta*(loss-loss_last)/(b-b_last) w_list.append(w_t) b_list.append(b_t) w_last = w w = w_t b_last = b b = b_t if abs(loss_last - loss)< 0.01: count += 1 if count >=100: print("w is {:.2f}, b is {:.2f}, error is {}, i is {}".format(w,b,loss,i)) break print("w is {:.2f}, b is {:.2f}, loss is {:.2f},i is {}".format(w,b,loss,i)) loss_list.append(loss) loss_last = loss i += 1 # xb = np.mean(x) # yb = np.mean(y) return w,b,loss_list,w_list,b_list def getLoss(w,b,x,y,MAE = True): if MAE: #L1 mean absolute error (MAE) totalLoss = np.sum(abs((w*x+b)-y)) else: #L2 mean squari error (MSE) totalLoss = np.sum((w*x+b)-y**2) return totalLoss def getErrorSurface(x,y): r""" 获取error surface :param x: :param y: :return: """ minError = 999999 w_f = 0 b_f = 0 size = 200 r = np.zeros((size,size)) for i,b in enumerate(np.linspace(-2.1,2.2,size)): for j,w in enumerate(np.linspace(-2.1,2.2,size)): r[i,j] = getLoss(w,b,x,y) if r[i,j] < minError: minError = r[i,j] w_f = w b_f = b return r,w_f, b_f def getErrorSurface2(x,y): r""" 获取error surface (塞给contour函数) :param x: :param y: :return: """ minError = 999999 w_f = 0 b_f = 0 size = 200 r = np.zeros((size,size)) wa = np.linspace(-2.1,2.2,size) ba = np.linspace(-2.1,2.2,size) z = [] for i,b in enumerate(np.linspace(-2.1,2.2,size)): for j,w in enumerate(np.linspace(-2.1,2.2,size)): r[i,j] = getLoss(w,b,x,y) z.append(r[i,j]) if r[i,j] < minError: minError = r[i,j] w_f = w b_f = b z = np.array(z) za = z.reshape((size,size)) return wa,ba,za if __name__ == "__main__": print("线性回归模型") np.random.seed (10) # 绘制散点图; fig, ax = plt.subplots() # G1, G2 = getData3() # ax.scatter(G1[0,:], G1[1,:]) # ax.scatter(G2[0,:], G2[1,:]) x,y = getData4() w1,b1,lossList,w_list,b_list = linefit(x, y) r,w,b= getErrorSurface(x,y) w2,b2,z2= getErrorSurface2(x,y) ax.scatter(x, y,color = "C0") ax.plot(np.r_[0,100],1.2*np.r_[0,100]+0.2,color = "C1",linewidth=3.0,label='Ground Truth') ax.plot(np.r_[0,100],w*np.r_[0,100]+b,color = "C2",linewidth=3.0,label='fit from error surface') ax.plot(np.r_[0,100],w1*np.r_[0,100]+b1,color = "C3",linewidth=3.0,label='Grident') ax.legend() ax.set_title("GT and scatter ") fig.show() #绘制error surface fig, ax = plt.subplots() ax.contourf(w2,b2,z2) ax.scatter(np.array(w_list), np.array(b_list)) ax.scatter(np.array(1.2), np.array(0.2), color = 'r') ax.annotate('', xy=(1.2, 0.2), xycoords='data', xytext=(1.2+0.4, 0.2+0.4), textcoords='axes fraction', arrowprops=dict(facecolor='white', shrink=0.05), horizontalalignment='right', verticalalignment='top', ) # im = ax.imshow(r) # cb = plt.colorbar(im,) ax.set_xlabel('w') # ax.set_xlim(xmin = 0.2, xmax = 2.2) ax.set_ylabel('b') # ax.set_ylim(ymin = 0, ymax = 0.4) ax.set_title("error surface ") fig.show() # 绘制loss # lossList fig, ax = plt.subplots() ax.plot(lossList) ax.set_xlabel('step') # ax.set_xlim(xmin = 0.2, xmax = 2.2) ax.set_ylabel('loss') ax.set_ylim(ymin = 700, ymax = 1000) ax.set_title("loss ") fig.show() #绘制直方图; # ax.hist(data,bins=20) #随机均匀采样 # ax.hist(data3,bins=20) #随机均匀分布 # ax.hist(data2,bins=20) #正太分布,均值为0,方差为1 # ax.hist(data4,bins=20) #正太分布,均值为0,方差为1 #固定X轴、Y轴的范围 # ax.set_ylim(ymin = 0, ymax = 130) # ax.set_xlim(xmin = -5, xmax = 5)
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· TypeScript + Deepseek 打造卜卦网站:技术与玄学的结合
· Manus的开源复刻OpenManus初探
· AI 智能体引爆开源社区「GitHub 热点速览」
· 三行代码完成国际化适配,妙~啊~
· .NET Core 中如何实现缓存的预热?