# -*- coding: utf-8 -*- """ Created on Tue Aug 09 22:55:06 2016 @author: Administrator """ #方法1 from sklearn import svm from sklearn.datasets import samples_generator from sklearn.feature_selection import SelectKBest from sklearn.feature_selection import f_regression from sklearn.pipeline import Pipeline # 生成数据 X, y = samples_generator.make_classification(n_informative=5, n_redundant=0, random_state=42) # 定义Pipeline,先方差分析,再SVM anova_filter = SelectKBest(f_regression, k=5) clf = svm.SVC(kernel='linear') pipe = Pipeline([('anova', anova_filter), ('svc', clf)]) # 设置anova的参数k=10,svc的参数C=0.1(用双下划线"__"连接!) pipe.set_params(anova__k=10, svc__C=.1) pipe.fit(X, y) prediction = pipe.predict(X) #管道怎么会预测,见文章末尾 pipe.score(X, y) # 得到 anova_filter 选出来的特征 s = pipe.named_steps['anova'].get_support() print(s) #方法2 import numpy as np from sklearn import linear_model, decomposition, datasets from sklearn.pipeline import Pipeline from sklearn.grid_search import GridSearchCV digits = datasets.load_digits() X_digits = digits.data y_digits = digits.target # 定义管道,先降维(pca),再逻辑回归 pca = decomposition.PCA() logistic = linear_model.LogisticRegression() pipe = Pipeline(steps=[('pca', pca), ('logistic', logistic)]) # 把管道再作为grid_search的estimator n_components = [20, 40, 64] Cs = np.logspace(-4, 4, 3) estimator = GridSearchCV(pipe, dict(pca__n_components=n_components, logistic__C=Cs)) estimator.fit(X_digits, y_digits)
#Pipeline 无预测函数,他用管道中最后一个预测函数
Applies transforms to the data, and the predict method of the final estimator. Valid only if the final estimator implements predict.