吴恩达人工智能-python实现逻辑回归
吴恩达人工智能
逻辑回归python代码实现
逐行注释
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
# 逻辑回归算法实现
# sigmoid函数和初始化数据
# 数组说第几列全是从0开始
def sigmoid(z):
return 1 / (1 + np.exp(-z))
# 初始化数据,载入新数据
def init_data():
data = np.loadtxt('data.csv')
# 截取数据集第0列到第倒数第二列而且是正向的排序,0,1,2...-1
# 也就是排除分类结果的那些列
dataMatIn = data[:, 0:-1]
# dataY是dataMatIn在第0列添加新的一列,全为1
dataY = np.insert(dataMatIn, 0, 1, axis=1)
# 第一列平方,构造关于第一列的平方根函数,
dataMatIn[:, 1] = np.power(dataMatIn[:, 1], 2)
# dataMatIn[:, 0] = np.power(dataMatIn[:, 0], 2)
dataMatIn = np.insert(dataMatIn, 0, 1, axis=1) # 特征数据集,添加1是构造常数项x0
# 进行特征缩放
# for i in range(1,3):
#
# dataMatIn[:,i]=(dataMatIn[:,i]-np.mean(dataMatIn[:,i],axis=0))/np.std(dataMatIn[:,i])
# classLabels是数据集的第1列,也是倒数第二列
classLabels = data[:, -1]
return dataY, dataMatIn, classLabels
# 梯度下降
def grad_descent(dataMatIn, classLabels):
# dataMatrix是矩阵化的dataMatIn
dataMatrix = np.mat(dataMatIn) # (m,n)
# labelMat是矩阵化的classLabels,并转置
labelMat = np.mat(classLabels).transpose()
# m是行,即有多少训练数据,n是列,
m, n = np.shape(dataMatrix)
# weights即为参数,初始化全为1
weights = np.ones((n, 1)) # 初始化回归系数(n, 1)
alpha = 0.01 # 步长
maxCycle = 5000 # 最大循环次数
# 进入梯度下降训练循环
for i in range(maxCycle):
# h是经过激活函数的 参数乘训练集(假设函数)
h = sigmoid(dataMatrix * weights) # sigmoid 函数
# 精髓的一步
# 用向量的方法实现梯度下降wj=wj-a/m*(sum(hi-yi)*xj)
# 此处是没有进行正则化的编码(正则化目的是消除过拟合)
weights = weights + alpha * dataMatrix.transpose() * (labelMat - h) / m # 梯度
# print(weights,'\n+')
# print(alpha,'\n*')
# print(dataMatrix.transpose(),'\n*')
# print(labelMat-h)
# print("=============================================================")
# weights = weights - alpha * (1/m) * (h-labelMat) * ( dataMatrix.)
return weights
# 正则化逻辑回归代码,非向量表示的
"""
def costReg(theta, X, y, learningRate):
theta = np.matrix(theta)
X = np.matrix(X)
y = np.matrix(y)
first = np.multiply(-y, np.log(sigmoid(X * theta.T)))
second = np.multiply((1 - y), np.log(1 - sigmoid(X * theta.T)))
reg = (learningRate / (2 * len(X)) * np.sum(np.power(theta[:, 1:theta.shape[1]], 2))
return np.sum(first -second) / (len(X)) + reg
"""
# 绘图
def plotBestFIt(weights):
dataY, dataMatIn, classLabels = init_data()
n = np.shape(dataMatIn)[0]
xcord1 = []
ycord1 = []
xcord2 = []
ycord2 = []
for i in range(n):
if classLabels[i] == 1:
xcord1.append(dataY[i][1])
ycord1.append(dataY[i][2])
else:
xcord2.append(dataY[i][1])
ycord2.append(dataY[i][2])
fig = plt.figure()
ax = fig.add_subplot(111)
ax.scatter(xcord1, ycord1, s=30, c='red', marker='s')
ax.scatter(xcord2, ycord2, s=30, c='green')
x = np.arange(-3, 3, 0.1)
y = np.sqrt(((-weights[0, 0] - weights[1, 0] * x) / weights[2, 0])) # matix
ax.plot(x, y)
plt.xlabel('X1')
plt.ylabel('X2')
plt.show()
# 计算结果
if __name__ == '__main__':
dataY, dataMatIn, classLabels = init_data()
r = grad_descent(dataMatIn, classLabels)
print(r)
plotBestFIt(r)