逻辑回归以及Python代码实现

这个逻辑回归使用sigmoid函数，关于逻辑回归以及sigmoid函数的推导这位博主讲的很清楚了：

https://www.cnblogs.com/hum0ro/p/9652674.html

下面给出具体代码。

数据集可以去我的下载那里下载。https://download.csdn.net/download/qq_41938259/12129611

import os
import pandas as pd
from sklearn import linear_model
import matplotlib.pyplot as plt
import numpy as np


def sigmoid(x):
    return 1.0/(1.0 + np.exp(-x))


def gradAscent(data, label):
    dataMatrix = data.to_numpy()
    labelMat = label.to_numpy()
    m, n = dataMatrix.shape
    alpha = 0.001
    weights = np.ones((n, 1))
    for cycle in range(500):
        vector = sigmoid(dataMatrix.dot(weights))
        error = labelMat - vector
        weights = weights + alpha * (dataMatrix.T).dot(error)
    return weights


def train_OLS(x, y):
    model = linear_model.LinearRegression()
    model.fit(x, y)
    t0, t1 = model.intercept_[0], model.coef_[0][0]
    print('打印b', t0)
    print('打印a', t1)
    re = model.predict(x)
    return re


def visualize_model(x1, y1, x2, y2):
    fig = plt.figure(figsize=(6, 6), dpi=80)
    ax = fig.add_subplot(111)
    ax.set_xlabel("$distance$")
    ax.set_xticks(range(0, 3000, 500))
    ax.set_ylabel("$money$")
    ax.set_yticks(range(0, 4000, 100))
    ax.scatter(x1, y1, color="b", alpha=0.4)
    ax.scatter(x2, y2, color="r", alpha=0.4)
    plt.legend(shadow=True)
    plt.show()


if __name__ == '__main__':
    # 打开文件操作
    os.chdir('D:\\')
    data = pd.read_excel('附件1.xlsx', sep=',')
    # print(data)
    result = data['III']
    distance = data['II']
    money = data['VI']
    X = data['IV']
    Y = data['X']
    mistake = data['V']
    # print(X)
    # print(Y)
    test1 = pd.DataFrame({'result': result, 'distance': distance, 'money': money, 'mistake': mistake})

    test1 = test1[(test1.mistake == 0)]
    faster = test1[(test1.result == 1)]
    lower = test1[test1.result == 0]

    faster = pd.DataFrame({'distance': faster['distance'], 'money': faster['money']})
    # print(faster)
    # print(faster.shape[0])
    lower = pd.DataFrame({'distance': lower['distance'], 'money': lower['money']})
    # 丢弃有误数据
    lower = lower.drop(index=129)
    # print(lower)
    # print(lower.shape[0])

    visualize_model(faster['distance'], faster['money'], lower['distance'], lower['money'])

    m, n = test1.shape
    datas = pd.DataFrame({'X0': np.array([1]*m), 'X1': test1['distance'], 'X2': test1['money']})
    # print(datas)
    labels = pd.DataFrame({'label': test1['result']})
    # print(labels)
    print(gradAscent(datas, labels))