scikit-learn使用方法
1.支持向量机
#_*_ coding:utf-8 _*_ from sklearn import datasets from sklearn import svm #装载内部测试数据集 digits = datasets.load_digits() #设置参数 clf = svm.SVC(gamma = 0.001,C = 100.) #训练 clf.fit(digits.data[:-1],digits.target[:-1]) #预测 print clf.predict(digits.data[-1:])
想在scikit中保存模型的话,可以使用python的内置模块pickle
#_*_ coding:utf-8 _*_ from sklearn import datasets from sklearn import svm import pickle from sklearn.externals import joblib #装载内部测试数据集 iris = datasets.load_iris() X,y = iris.data,iris.target #初始化模型 clf = svm.SVC() #训练 clf.fit(X[:-1],y[:-1]) #保存模型 s = pickle.dumps(clf) #装载模型 clf2 = pickle.loads(s) #预测 print clf2.predict(X[-1:])
※在数据量非常大的时候,我们需要把模型保存在硬盘上,而不是字符串中
#_*_ coding:utf-8 _*_ from sklearn import datasets from sklearn import svm from sklearn.externals import joblib #装载内部测试数据集 iris = datasets.load_iris() X,y = iris.data,iris.target #初始化模型 clf = svm.SVC() #训练 clf.fit(X[:-1],y[:-1]) #保存模型 joblib.dump(clf,'filename.pkl') #装载模型 clf2 = joblib.load('filename.pkl') #预测 print clf2.predict(X[-1:])
2.如无特殊说明,输入数据都被转换成float64位,在下面的例子中X可以通过fit_transform(X)转换成float64:
#_*_ coding:utf-8 _*_ import numpy as np from sklearn import random_projection rng = np.random.RandomState(0) X = rng.rand(10,2000) Y = np.array(X) X = np.array(X,dtype='float32') print Y.dtype,X.dtype transformer = random_projection.GaussianRandomProjection() X_new = transformer.fit_transform(X) print X_new.dtype
3.重新装载并更新参数
#_*_ coding:utf-8 _*_ import numpy as np from sklearn.svm import SVC rng = np.random.RandomState(0) X = rng.rand(100,10) y = rng.binomial(1,0.5,100) X_test = rng.rand(5,10) clf = SVC() clf.set_params(kernel = 'linear').fit(X,y) print clf.predict(X_test) clf.set_params(kernel = 'rbf').fit(X,y) print clf.predict(X_test)