1、抽取全部图像的surf特征(每个图像的特征行不固定,但是列是固定的70)
2、将图像分为两组,一组训练,一组测试
3、将训练图像全部合并为一个大矩阵,并将矩阵聚簇为30个特征。
4、将每一个图像代入聚簇函数,推测每一个图像属于若干个分组(若不够30个分组,后面补1)
5、每个图像就表示为30个特征向量
6、送入逻辑分类进行分类学习
7、得到训练结果
# -*- coding: utf-8 -*- """ Created on Thu Aug 11 20:51:19 2016 @author: Administrator """ import numpy as np import mahotas as mh from mahotas.features import surf from sklearn.linear_model import LogisticRegression from sklearn.metrics import * from sklearn.cluster import MiniBatchKMeans import glob #获取文件列表(cat 1,dog 0) all_instance_filenames = [] all_instance_targets = [] for f in glob.glob('train2/*.jpg'): target = 1 if 'cat' in f else 0 all_instance_filenames.append(f) all_instance_targets.append(target) surf_features = [] counter = 0 for f in all_instance_filenames: counter = counter+1 print 'Reading image:', f,counter/300.0 image = mh.imread(f, as_grey=True) temp_image = surf.surf(image) #[:, 5:] print temp_image.shape surf_features.append(temp_image) #分离训练和测试 #分别将训练和测试图像按照行全部罗列起来 train_len = int(len(all_instance_filenames) * .60) X_train_surf_features = np.concatenate(surf_features[:train_len]) cou1=0 for test1 in surf_features[:train_len]: cou1 = cou1+test1.shape[0] print cou1 print len(X_train_surf_features) X_test_surf_feautres = np.concatenate(surf_features[train_len:]) y_train = all_instance_targets[:train_len] y_test = all_instance_targets[train_len:] n_clusters = 30 print 'Clustering', len(X_train_surf_features), 'features' estimator = MiniBatchKMeans(n_clusters=n_clusters) estimator.fit_transform(X_train_surf_features) ''' estimator.cluster_centers_.shape Out[18]: (30L, 70L) ''' X_train = [] for instance in surf_features[:train_len]: clusters = estimator.predict(instance) features = np.bincount(clusters) if len(features) < n_clusters: features = np.append(features, np.zeros((1, n_clusters-len(features)))) X_train.append(features) X_test = [] for instance in surf_features[train_len:]: clusters = estimator.predict(instance) features = np.bincount(clusters) if len(features) < n_clusters: features = np.append(features, np.zeros((1, n_clusters-len(features)))) X_test.append(features) clf = LogisticRegression(C=0.001, penalty='l2') clf.fit_transform(X_train, y_train) predictions = clf.predict(X_test) print classification_report(y_test, predictions) print 'Precision: ', precision_score(y_test, predictions) print 'Recall: ', recall_score(y_test, predictions) print 'Accuracy: ', accuracy_score(y_test, predictions)