# coding-utf-8 import numpy as np import pandas as pd import matplotlib.pyplot as plt class LR: def __init__(self, data, learning_rate=0.001, iter_max=10000, batch_size=2): self.data = data self.learning_rate = learning_rate self.iter_max = iter_max self.batch_size = batch_size self.process_data() # 数据标准化 def standard_scaler(self, data): data1 = data[:, :-1] mean = np.mean(data1, axis=0) std = np.std(data1, axis=0) data1 = (data1 - mean) / std return np.hstack((data1, data[:, -1:])) def process_data(self): data = np.array(self.data) data = self.standard_scaler(data) one = np.ones((data.shape[0], 1)) self.data = np.hstack((one, data)) self.m = self.data.shape[0] # 样本总数量 self.n = self.data.shape[1] - 1 # 特征总数量 def model(self): return np.dot(self.data[:, :-1], self.theta) def mse(self, predict, y): return np.sum((predict - y) ** 2) / len(y) def cal_grad(self, predict, y): grad = np.ones(self.theta.shape) for i in range(len(grad)): grad[i] = np.mean((predict - y) * self.data[:, i]) return grad @staticmethod def draw(list_data): plt.plot(range(len(list_data)), list_data) plt.show() def train(self): loss_list = [] n = 1 # 1、初始化theta self.theta = np.ones((self.n, 1)) predict = self.model() # 2、计算误差 loss = self.mse(predict, self.data[:, -1:]) loss_list.append(loss) while True: # 3、求梯度 grad = self.cal_grad(predict, self.data[:, -1:]) # 4、更新theta self.theta = self.theta - self.learning_rate * grad # 5、计算误差 predict = self.model() loss = self.mse(predict, self.data[:, -1:]) loss_list.append(loss) # if 判断停止条件 满足则跳出训练 if n > self.iter_max: break n += 1 self.draw(loss_list) if __name__ == "__main__": data = pd.read_excel('C:/Users/jiedada/Desktop/python/回归/lr.xlsx') lr = LR(data) lr.train()