K-means算法

K-means算法

#include<iostream>
#include<fstream>
#include<stdlib.h>
#include<math.h>

using namespace std;

#define K 4 /*数据维度*/
#define C 8 /*聚类数*/
#define N 150 /*样本数*/
#define IterMax 5/*最大迭代数*/
#define IterTherhold  0.0000001 /*结束条件*/

/*样本数据*/
typedef struct{
	double p[K];
	int Lable;
	double dis[K];
}Data;

Data dat[N];
double cluster[C][K] = {0.0};
double oldfitness = 0.0;
double fitness = 0.0;

bool is_equal(int rand_num[], int n, int index) 
{
	for(int i = 0; i < n; i++) {
		if(rand_num[i] == index) {
			return true;
		}
	}
	return false;
}

void input_data()
{
	ifstream in("test.data", ios::in);

	int i = 0;
	while(i < N) {
		for(int k = 0; k < K; k++){
			in >> dat[i].p[k];
		}	
		i++;
	}
}
//初始化质心
void Init_center()
{
	int rand_num[C] ={0} ;
	int i = 0;
	while(i < C) {
		int index = rand()%N;
		if(!is_equal(rand_num, i, index)) {
			rand_num[i++] = index;
		}
	}
	for(int i = 0; i < K ; i++) {
		for(int j = 0; j < C; j++) {
			cluster[j][i] = dat[rand_num[j]].p[i];
		}
	}
}
double Eulid_dis(int x, int y) {
	double distance = 0.0;
	for(int i = 0 ; i < K ; i++) {
		distance += pow(dat[x].p[i] - cluster[y][i], 2);
	}
	distance = sqrt(distance);
	return distance;
}
void Make_new_cluster()
{
	double bias = 0.0;
	for(int i = 0; i < N; i++) {
		double mindis = dat[i].dis[0];
		dat[i].Lable = 0;
		for(int j = 1; j < C; j++) {
			if(mindis > dat[i].dis[j]) {
				mindis = dat[i].dis[j];
				dat[i].Lable = j;
			}
		}
	}
	for(int i = 0; i < N; i++) {
		bias += dat[i].dis[dat[i].Lable];
	}
	
	oldfitness = fitness;
	fitness = bias;
}
void calculate_distance() 
{
	for(int i = 0; i < N; i++) {
		for(int j = 0; j < C; j++) {
			dat[i].dis[j] = Eulid_dis(i, j);
		}
	}
}
void Make_new_center()
{
	for(int i = 0; i < C; i++) {
		for(int k = 0; k < K; k++) {
			double tmp = 0.0;
			int total = 0;
			for(int j = 0; j < N; j++) {
				if(dat[j].Lable == i) {
					tmp += dat[j].p[k];
					total++;
				}
			}
			if(total > 0) {
				cluster[i][k] = tmp/total;
			}
		}
	}
}
/************************************
*              主函数               *
************************************/
int main()
{
	input_data();
	Init_center();
	int i = 0;
	double differ = 1.0;
	while(i < IterMax && differ > IterTherhold) {
		calculate_distance(); 
		Make_new_cluster();
		Make_new_center();
		differ = abs(oldfitness - fitness);
		cout << fitness << endl;
		i++;
	}
	for (int i = 0; i < C; ++i) {
		for(int j = 0; j < K ; j++) {
			cout << cluster[i][j] << "\t";
		}
		cout << endl;
	}
	return 0;
}

  

posted @ 2015-02-26 13:10  bitgirl_coder  阅读(194)  评论(0编辑  收藏  举报