常见machine learning模型实现

一、感知机模型

 

二、线性回归(Linear Regression)

from numpy import *

def loadData(filename):
    x = []
    y = []
    f = open(filename)
    for line in f.readlines():
        lineData = line.strip().split(',')
        x.append([1.0,float(lineData[0])])
        y.append(float(lineData[1]))
    return x,y

#预测函数,theta,x都是一维数组,dot运算得到实数,对于二维数组,dot运算就是矩阵运算
def h(theta,x):
    return theta.dot(x)

#批量梯度下降
def batch_gradient_descent(alpha,theta,x,y):
    m,n = x.shape
    newtheta = array([0] * n,dtype = float)
    for j in range(n):
        count = 0.0
        for i in range(m):
            count += (h(theta,x[i,:]) - y[i])*x[i,j]
        newtheta[j] = newtheta[j] - count * alpha / m
    return newtheta

#正则方程
def normal_equation(x,y):
    return linalg.inv(transpose(x).dot(x)).dot(transpose(x)).dot(y)

#损失函数
def cost_function(theta,x,y):
    m = x.shape[0]
    return (x.dot(theta) - y).dot(x.dot(theta) - y) / (2 * m)

def run():
    x,y = loadData('ex1data1.txt')
    x = array(x)
    y = array(y)  #列向量
    m,n = x.shape
    theta = array([0] * n,dtype = float)
    costs = []
    for iters in range(1000):
        costs.append(cost_function(theta,x,y))
        theta = batch_gradient_descent(0.01,theta,x,y)
    print "batch gradient descent:\n"
    print "theta:",theta
    print 'cost:\n',costs

    print "normal equation:\n"
    theta = normal_equation(x,y)
    print "theta:",theta


if __name__ == "__main__":
    run()

 

三、Logistic Regression

def sigmoid(x):
    return 1.0/(1 + exp(-x))

def trainLogRegres(x,y,opts):
    m,n = x.shape
    alpha = opts["alpha"]
    maxIter = opts['maxIter']
    weight = ones((n,1))

    for k in range(maxIter):
        if opts['optimizeType'] == 'batchGraDescent':
            weight = weight - alpha * x.T * (sigmoid(x*weight) - y)
        elif opts['optimizeType'] == 'stocGraDescent':
           for i in range(m):
               weight = weight - alpha * x[i,:].T * (sigmoid(x[i,:] * weight) - y[i,0])
        else:
            raise NameError('Not support optimize method type!')

    return weight

def testLogRegres(weight,x,y):
    m,n = x.shape
    trueNum = 0
    for i in range(m):
        predict = sigmoid(x[i,:] * weight)[0,0] > 0.5
        if predict == bool(y[i,0]):
            trueNum += 1
    accuracy = float(trueNum) / m
    return accuracy

#x每行对应一个样本,y是列向量
def loadData():
    x = []
    y = []
    f = open("testSet.txt")
    for line in f.readlines():
        lineArr = line.strip().split()
        x.append([1.0, float(lineArr[0]), float(lineArr[1])])
        y.append(float(lineArr[2]))
    return mat(x),mat(y).T

if __name__ == '__main__':
    x,y = loadData()
    opts = {'alpha': 0.01, 'maxIter': 50, 'optimizeType': 'stocGraDescent'}
    weight = trainLogRegres(x,y,opts)
    accuracy = testLogRegres(weight,x,y)
    print "accuracy:",accuracy

 

四、SVM

五、kmeans

https://en.wikipedia.org/wiki/Latent_semantic_analysis

posted @ 2016-10-18 20:03  合唱团abc  阅读(476)  评论(0编辑  收藏  举报