K-means算法
K-means算法
#include<iostream> #include<fstream> #include<stdlib.h> #include<math.h> using namespace std; #define K 4 /*数据维度*/ #define C 8 /*聚类数*/ #define N 150 /*样本数*/ #define IterMax 5/*最大迭代数*/ #define IterTherhold 0.0000001 /*结束条件*/ /*样本数据*/ typedef struct{ double p[K]; int Lable; double dis[K]; }Data; Data dat[N]; double cluster[C][K] = {0.0}; double oldfitness = 0.0; double fitness = 0.0; bool is_equal(int rand_num[], int n, int index) { for(int i = 0; i < n; i++) { if(rand_num[i] == index) { return true; } } return false; } void input_data() { ifstream in("test.data", ios::in); int i = 0; while(i < N) { for(int k = 0; k < K; k++){ in >> dat[i].p[k]; } i++; } } //初始化质心 void Init_center() { int rand_num[C] ={0} ; int i = 0; while(i < C) { int index = rand()%N; if(!is_equal(rand_num, i, index)) { rand_num[i++] = index; } } for(int i = 0; i < K ; i++) { for(int j = 0; j < C; j++) { cluster[j][i] = dat[rand_num[j]].p[i]; } } } double Eulid_dis(int x, int y) { double distance = 0.0; for(int i = 0 ; i < K ; i++) { distance += pow(dat[x].p[i] - cluster[y][i], 2); } distance = sqrt(distance); return distance; } void Make_new_cluster() { double bias = 0.0; for(int i = 0; i < N; i++) { double mindis = dat[i].dis[0]; dat[i].Lable = 0; for(int j = 1; j < C; j++) { if(mindis > dat[i].dis[j]) { mindis = dat[i].dis[j]; dat[i].Lable = j; } } } for(int i = 0; i < N; i++) { bias += dat[i].dis[dat[i].Lable]; } oldfitness = fitness; fitness = bias; } void calculate_distance() { for(int i = 0; i < N; i++) { for(int j = 0; j < C; j++) { dat[i].dis[j] = Eulid_dis(i, j); } } } void Make_new_center() { for(int i = 0; i < C; i++) { for(int k = 0; k < K; k++) { double tmp = 0.0; int total = 0; for(int j = 0; j < N; j++) { if(dat[j].Lable == i) { tmp += dat[j].p[k]; total++; } } if(total > 0) { cluster[i][k] = tmp/total; } } } } /************************************ * 主函数 * ************************************/ int main() { input_data(); Init_center(); int i = 0; double differ = 1.0; while(i < IterMax && differ > IterTherhold) { calculate_distance(); Make_new_cluster(); Make_new_center(); differ = abs(oldfitness - fitness); cout << fitness << endl; i++; } for (int i = 0; i < C; ++i) { for(int j = 0; j < K ; j++) { cout << cluster[i][j] << "\t"; } cout << endl; } return 0; }