传统最小二乘法缺乏稳定性

额。就是曾加正则项

\( argmin||Xw-y||2+\alpha||w||2 \)

对应矩阵的求解方法为
\(w=(XTX+\alpha*I)X^Ty\)

其实就是添加正则项

sklearn.linear_model.Ridge

主要参数####

  • alpha
    +fit_intercept

车流量分析

# -*- coding: utf-8 -*-
"""
Created on Sun May 28 12:05:23 2017

@author: sfzyk
"""

import numpy as np
from sklearn.linear_model import Ridge
from sklearn import cross_validation
import matplotlib.pyplot as plt
from sklearn.preprocessing import PolynomialFeatures


data=np.genfromtxt(r"岭回归.csv",delimiter=',',skip_header=1)
X=data[:,1:5]
y=data[:,5]

ploy=PolynomialFeatures(6)
X=ploy.fit_transform(X)


X_train,X_test,y_train,y_test=cross_validation.train_test_split(X,y,test_size=0.2,)
R=Ridge()
R.fit(X_train,y_train)

a=R.score(X_test,y_test)

手写数字识别##

利用 full connected nn完成任务
也叫做MLP multilayers perceptron MLP
神经网络实现手写数字识别
(sklearn 实现)


# -*- coding: utf-8 -*-
"""
Created on Sun May 28 12:30:11 2017

@author: sfzyk
"""
import os
import numpy as np
import sklearn.neural_network as sklnn
def img2vector(fileName):
    retMat=np.zeros([1024],int) 
    fr=open(fileName)
    lines=fr.readlines()
    for i in range(32):
        for j in range(32):
            retMat[i*32+j]=lines[i][j]
    return retMat
    
def readDataSet(path):
    fileList=os.listdir(path)
    #获取文件夹下的所有文件
    numFiles=len(fileList)
    dataSet=np.zeros([numFiles,1024],int)
    #存放所有数字文件
    #存放所有对应的标签
    hwLabels=np.zeros([numFiles,10])
    for i in range(numFiles):
        filepath=fileList[i]
        digit=int(filepath.split("_")[0])
        hwLabels[i][digit]=1.0
        dataSet[i]=img2vector(path+'/'+filepath)
    return dataSet,hwLabels
train_dataSet,train_hwLabels=readDataSet(r"D:\mechine_learning\mooc_data\trainingDigits")
test_dataSet,test_hwLabels=readDataSet(r"D:\mechine_learning\mooc_data\testDigits")

clf=sklnn.MLPClassifier(hidden_layer_sizes=(100,),activation='logistic',learning_rate_init=0.0001,max_iter=2000,solver='adam',verbose=True)
#hidden_layer_sizes是一个元组,表示第i个隐藏层神经元个数
clf.fit(train_dataSet,train_hwLabels)

res=clf.predict(test_dataSet)
error_num=0
for i in range(len(test_hwLabels)):
    if np.sum(res[i]==test_hwLabels[i])<10:
        error_num+=1

print("%f"%(error_num/len(test_hwLabels)))