logistic回归
#!/usr/bin/env python # encoding:utf-8 import math import numpy import time import matplotlib.pyplot as plt def sigmoid(x): return 1.0 / (1 + numpy.exp(-x)) def loadData(): dataMat = [] laberMat = [] with open("test.txt", 'r') as f: for line in f.readlines(): arry = line.strip().split() dataMat.append([1.0, float(arry[0]), float(arry[1])]) laberMat.append(float(arry[2])) return numpy.mat(dataMat), numpy.mat(laberMat).transpose() def gradAscent(dataMat, laberMat, alpha=0.001, maxCycle=500): """general gradscent""" start_time = time.time() m, n = numpy.shape(dataMat) weights = numpy.ones((n, 1)) for i in range(maxCycle): h = sigmoid(dataMat * weights) error = laberMat - h weights += alpha * dataMat.transpose() * error duration = time.time() - start_time print "duration of time:", duration return weights def stocGradAscent(dataMat, laberMat, alpha=0.01): start_time = time.time() m, n = numpy.shape(dataMat) weights = numpy.ones((n, 1)) for i in range(m): h = sigmoid(dataMat[i] * weights) error = laberMat[i] - h weights += alpha * dataMat[i].transpose() * error duration = time.time() - start_time print "duration of time:", duration return weights def betterStocGradAscent(dataMat, laberMat, alpha=0.01, numIter=150): """better one, use a dynamic alpha""" start_time = time.time() m, n = numpy.shape(dataMat) weights = numpy.ones((n, 1)) for j in range(numIter): for i in range(m): alpha = 4 / (1 + j + i) + 0.01 h = sigmoid(dataMat[i] * weights) error = laberMat[i] - h weights += alpha * dataMat[i].transpose() * error duration = time.time() - start_time print "duration of time:", duration return weights start_time = time.time() def show(dataMat, laberMat, weights): m, n = numpy.shape(dataMat) min_x = min(dataMat[:, 1])[0, 0] max_x = max(dataMat[:, 1])[0, 0] xcoord1 = []; ycoord1 = [] xcoord2 = []; ycoord2 = [] for i in range(m): if int(laberMat[i, 0]) == 0: xcoord1.append(dataMat[i, 1]); ycoord1.append(dataMat[i, 2]) elif int(laberMat[i, 0]) == 1: xcoord2.append(dataMat[i, 1]); ycoord2.append(dataMat[i, 2]) fig = plt.figure() ax = fig.add_subplot(111) ax.scatter(xcoord1, ycoord1, s=30, c="red", marker="s") ax.scatter(xcoord2, ycoord2, s=30, c="green") x = numpy.arange(min_x, max_x, 0.1) y = (-weights[0] - weights[1]*x) / weights[2] ax.plot(x, y) plt.xlabel("x1"); plt.ylabel("x2") plt.show() if __name__ == "__main__": dataMat, laberMat = loadData() #weights = gradAscent(dataMat, laberMat, maxCycle=500) #weights = stocGradAscent(dataMat, laberMat) weights = betterStocGradAscent(dataMat, laberMat, numIter=80) show(dataMat, laberMat, weights)
参考:http://www.cnblogs.com/coder2012/p/4598913.html