机器学习实战笔记 logistic回归

参考链接http://blog.csdn.net/lu597203933/article/details/38468303

Logistic代码

# Logistic回归分类 使用梯度上升找最佳参数
import numpy as np


def loadDataSet():
    datMat = [];
    labelMat = []
    fr = open('testSet.txt')
    for line in fr.readlines():
        lineArr = line.strip().split('\t')
        # 添加常数项对应的x值1
        datMat.append([1.0, float(lineArr[0]), float(lineArr[1])])
        labelMat.append(int(lineArr[2]))
    return datMat, labelMat


def sigmoid(inx):
    return 1.0 / (1 + np.exp(-inx))

# 可以这样理解 但事实并非如此
# f(x)=ax1+bx2+cx3
# L(a,b,c)=(1/2)(i从1到100 (f(xi)-yi)^2)的最小值
# 记下降率为rate=0.1 T0=[1,1,1]
# L(w)对a求偏导=i从1到100[(f(xi)-yi)xi1]=ra 表示a的梯度方向
# 同理 b,c
# 所以有 a=a-ra*rate   b=b-rb*rate  c=c-rc*rate
# 梯度上升法
def gradAscent(dataMatIn, classLabels):
    # m*n 100*3矩阵dataMatrix
    dataMatrix = np.mat(dataMatIn)
    # 矩阵转置
    # 类别标签矩阵  100*1
    labelMat = np.mat(classLabels).transpose()

    m, n = np.shape(dataMatrix)
    alpha = 0.001
    maxCycles = 500
    # 3*1
    weights = np.ones((n, 1))
    for k in range(maxCycles):
        # 计算整个数据集的梯度  100*1
        # 每次都是处理所有数据集
        h = sigmoid(dataMatrix * weights)
        # 真实类别与预测类别的差值
        error = (labelMat - h)
        # 更新回归系数向量   3*1=3*1+0.001*[(3*100)(100*1)])
        weights = weights + alpha * dataMatrix.transpose() * error
    return weights


# 随机梯度上升算法
# 可以进行增量式更新
def stoGradAscent0(dataMatrix, classLabels):
    m, n = np.shape(dataMatrix)
    alpha = 0.01
    # array([ 1.,  1.,  1.])
    weigths = np.ones(n)
    for i in range(m):
        # 一次仅用一个样本来更新数据
        h = sigmoid(sum(dataMatrix[i] * weigths))
        error = classLabels[i] - h
        weigths = weigths + alpha * error * dataMatrix[i]
    return weigths


# 改进的随机梯度上升算法
def stoGradAscent1(dataMatrix, classLabels, numIter=150):
    m, n = np.shape(dataMatrix)
    weights = np.ones(n)
    for j in range(numIter):
        dataIndex = list(range(m))
        for i in range(m):
            # 每次迭代时调整alpha
            alpha = 4 / (1.0 + j + i) + 0.01
            # index = int(np.random.uniform(0, len(dataIndex)))
            # randIndex = dataIndex[index]
            randIndex = int(np.random.uniform(0, len(dataIndex)))
            h = sigmoid(sum(dataMatrix[randIndex] * weights))
            error = classLabels[randIndex] - h
            weights = weights + alpha * error * dataMatrix[randIndex]
            del [dataIndex[randIndex]]
    return weights


# 画出数据集和最佳拟合直线的函数
def plotBestFit(wei):
    import matplotlib.pyplot  as plt
    weights = wei.getA()
    # weights = wei
    dataMat, labelMat = loadDataSet()
    dataArr = np.array(dataMat)
    n = np.shape(dataArr)[0]
    xcord1 = []
    ycord1 = []
    xcord2 = []
    ycord2 = []
    for i in range(n):
        if int(labelMat[i]) == 1:
            xcord1.append(dataArr[i][1])
            ycord1.append(dataArr[i][2])
        else:
            xcord2.append(dataArr[i][1])
            ycord2.append(dataArr[i][2])
    fig = plt.figure()
    ax = fig.add_subplot(111)
    ax.scatter(xcord1, ycord1, s=30, c='red', marker='s')
    ax.scatter(xcord2, ycord2, s=30, c='green')
    x = np.arange(-3.0, 3.0, 0.1)
    # z=w0*1+w1*x1+w2*x2
    # h=1.0/(1+exp(-z))
    # 当 z=0时,h=0.5正好是判断类别是1或者0的边界 从而 z=0为最佳拟合曲线
    # 即 wo*1+w1*x1+w2*x2=0  而x2=y 从而 y = (-weights[0] - weights[1] * x) / weights[2]
    y = (-weights[0] - weights[1] * x) / weights[2]
    ax.plot(x, y)
    plt.xlabel('x1')
    plt.ylabel('x2')
    plt.show()


# 修改之后的随机梯度上升法
# dataArra, labelMat = loadDataSet()
# weights = stoGradAscent1(np.array(dataArra), labelMat, numIter=150)
# print(weights)
# weights = np.matrix(weights).transpose()
# plotBestFit(weights)

# 随机梯度上升法

# dataArra, labelMat = loadDataSet()
# weights=stoGradAscent0(np.array(dataArra),labelMat)
# print(weights)
# weights = np.matrix(weights).transpose()
# plotBestFit(weights)


# 梯度上升法
dataArra, labelMat = loadDataSet()
weights=gradAscent(dataArra,labelMat)
print(weights)
weights = np.matrix(weights)
plotBestFit(weights)


# weights = gradAscent(dataArra, labelMat)
# weights = stoGradAscent1(np.array(dataArra), labelMat)
# print(weights)
# plotBestFit(np.mat(weights.transpose()))

# import matplotlib.pyplot as plt
# # plt.plot([1, 2, 3, 4], [1, 4, 9, 16],'r')
# plt.axis([0, 5, 0, 20])
# t = np.arange(0, 5, 0.2)
# plt.plot(t, t, 'r--', t, t ** 2, 'bs', t, t ** 3, 'g^')
# plt.ylabel('some numbers')
# plt.show()

 

posted @ 2017-12-16 10:09  张秀杰  阅读(211)  评论(0编辑  收藏  举报