sklearn 组合分类器
组合分类器:
组合分类器有4种方法:
(1)通过处理训练数据集。如baging boosting
(2)通过处理输入特征。如 Random forest
(3)通过处理类标号。error_correcting output coding
(4)通过处理学习算法。如voting
1 bagging
1 from sklearn.ensemble import BaggingClassifier 2 from sklearn.neighbors import KNeighborsClassifier 3 4 meta_clf = KNeighborsClassifier() 5 bg_clf = BaggingClassifier(meta_clf, max_samples=0.5, max_features=0.5)
2 adaboosting
1 from sklearn.ensemble import AdaBoostClassifier 2 bdt = AdaBoostClassifier(DecisionTreeClassifier(max_depth=1), 3 algorithm="SAMME", 4 n_estimators=200) 5 6 bdt.fit(X, y)
3 voting
1 from sklearn import datasets 2 from sklearn import cross_validation 3 from sklearn.linear_model import LogisticRegression 4 from sklearn.naive_bayes import GaussianNB 5 from sklearn.ensemble import RandomForestClassifier 6 from sklearn.ensemble import VotingClassifier 7 8 iris = datasets.load_iris() 9 X, y = iris.data[:, 1:3], iris.target 10 11 clf1 = LogisticRegression(random_state=1) 12 clf2 = RandomForestClassifier(random_state=1) 13 clf3 = GaussianNB() 14 15 eclf = VotingClassifier(estimators=[('lr', clf1), ('rf', clf2), ('gnb', clf3)], voting='hard', weights=[2,1,2]) 16 17 for clf, label in zip([clf1, clf2, clf3, eclf], ['Logistic Regression', 'Random Forest', 'naive Bayes', 'Ensemble']): 18 scores = cross_validation.cross_val_score(clf, X, y, cv=5, scoring='accuracy') 19 print("Accuracy: %0.2f (+/- %0.2f) [%s]" % (scores.mean(), scores.std(), label))