手写识别——KNN
#include <iostream> #include<map> #include<vector> #include<stdio.h> #include<cmath> #include<cstdlib> #include<algorithm> #include<fstream> using namespace std; typedef char tLabel; typedef double tData; typedef pair<int,double> PAIR; const int colLen = 2;//导入新的数据集时只需要修改行列参数 const int rowLen = 6; ifstream fin; ofstream fout; class KNN { private: tData dataSet[rowLen][colLen]; //用数组定义样本集 tLabel labels[rowLen]; tData testData[colLen]; int k; map<int,double> map_index_dis; map<tLabel,int> map_label_freq; double get_distance(tData *d1,tData *d2); //计算两两样本间距离函数 public: KNN(int k); //构造函数 void get_all_distance(); void get_max_freq_label(); struct CmpByValue { bool operator() (const PAIR& lhs,const PAIR& rhs) { return lhs.second < rhs.second; } }; }; KNN::KNN(int k) { this->k = k; fin.open("movie_data.txt");//导入新的数据集时只需修改文件名 if(!fin) { cout<<"can not open the file data.txt"<<endl; exit(1); } for(int i = 0; i < rowLen; i++) { for(int j = 0;j <colLen; j++) { fin>>dataSet[i][j]; } fin>>labels[i]; } cout<<"please input the test data :"<<endl; //输入测试数据 for(int i=0;i<colLen;i++) cin>>testData[i]; } double KNN:: get_distance(tData *d1,tData *d2) { double sum = 0; for(int i=0;i<colLen;i++) { sum += pow( (d1[i]-d2[i]) , 2 ); } // cout<<"the sum is = "<<sum<<endl; return sqrt(sum); } //计算测试样本与训练集中每个样本的距离 void KNN:: get_all_distance() { double distance; int i; for(i=0;i<rowLen;i++) { distance = get_distance(dataSet[i],testData); //<key,value> => <i,distance> map_index_dis[i] = distance; } //遍历map,打印各个序号和距离 map<int,double>::const_iterator it = map_index_dis.begin(); while(it!=map_index_dis.end()) { cout<<"index = "<<it->first<<" distance = "<<it->second<<endl; it++; } } //在k值设定的情况下,计算测试数据属于哪个lable,并输出 void KNN:: get_max_freq_label() { //将map_index_dis转换为vec_index_dis vector<PAIR> vec_index_dis( map_index_dis.begin(),map_index_dis.end() ); //对vec_index_dis进行从低到高排序,以获得最近距离数据 sort(vec_index_dis.begin(),vec_index_dis.end(),CmpByValue()); for(int i=0;i<k;i++) { cout<<"the index = "<<vec_index_dis[i].first<<" the distance = "<<vec_index_dis[i].second<<" the label = "<<labels[vec_index_dis[i].first]<<" the coordinate ( "<<dataSet[ vec_index_dis[i].first ][0]<<","<<dataSet[ vec_index_dis[i].first ][1]<<" )"<<endl; //calculate the count of each label map_label_freq[ labels[ vec_index_dis[i].first ] ]++; } map<tLabel,int>::const_iterator map_it = map_label_freq.begin(); tLabel label; int max_freq = 0; //find the most frequent label while( map_it != map_label_freq.end() ) { if( map_it->second > max_freq ) { max_freq = map_it->second; label = map_it->first; } map_it++; } cout<<"The test data belongs to the "<<label<<" label"<<endl; } int main() { int k ; cout<<"please input the k value : "<<endl; cin>>k; KNN knn(k); knn.get_all_distance(); knn.get_max_freq_label(); return 0; }