python-k中心聚类代码

# -*- coding: utf-8 -*-
"""
Created on Mon Feb 18 14:59:53 2019

@author: Administrator
"""

#from pyclust import KMedoids #保留,用于切换函数
import numpy as np
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt
import random

def im_txt(file):
    """
    读取数据
    """
    data=np.loadtxt(file,dtype=np.float32)
    return data

def out_txt(outfile,line):
    f=open(outfile,"w")
    try:
        for i in line:
            f.write(str(i))
            f.write("\n")
        f.close()
    except:
        f.close()
        print("分类数据未保存!!!!")

def initianlize_centers(n_clusters):
    """初始化,生成随机聚类中心"""
    global n_data
    centers=[]  #聚类中心位置信息例:[101,205,5,3,7]
    i=0
    while i<n_clusters:
        temp=random.randint(0,n_data-1)
        if temp not in centers:
            centers.append(temp)
            i=i+1
        else:
            pass
    return centers

def clus_process(centers,data):
    """根据聚类中心进行聚类"""
    result_clusters=[]
    centers=np.array(centers)
    """遍历每个样本"""
    for i in range(0,len(data)):
        uni_temp=[] #临时存储距离数据
        for j in centers:
            temp=np.sqrt(np.sum(np.square(data[i]-data[j])))
            uni_temp.append(temp)
        c_min=min(uni_temp) #距离最小值
        result_clusters.append(uni_temp.index(c_min))  #距离最小值所在位置即为归属簇
    return result_clusters

def chose_centers(result_clusters,n_clusters):
    centers=[]
    for i in range(0,n_clusters):  #逐个簇进行随机
        temp=[]  #记录每个簇样本在data中的位置
        for j in range(0,len(result_clusters)):   #遍历每个样本 
            if result_clusters[j]==i:     #寻找簇i的样本
                temp.append(j)
        try:
            c_temp=random.sample(temp,1)   #在样本中随机取一个值作为新的聚类中心
        except:
            print("sample bug")
            print(temp)
        centers.append(c_temp[0])
        
    return centers

def count_E(centers_new,data,result_clusters_new):
    """计算价值函数"""
    E=0
    for i in range(0,len(centers_new)):
        for j in range(0,len(data)):
            if result_clusters_new[j]==i:
                temp=np.sqrt(np.sum(np.square(data[j]-data[centers_new[i]])))
                E+=temp
    return E
            
def KMedoids(n_clusters,data,max_iter):
    """初始化"""
    centers=initianlize_centers(n_clusters)
    """根据随机中心进行聚类"""
    result_clusters=clus_process(centers,data)
    """重新选择聚类中心,并比较"""
    xie=0  #计数器
    E=5*5000
    """
    _old:用来记录上一次的聚类结果
    _new:新一次聚类的结果
    无old和new:输出结果
    """
    while xie<=max_iter:
        centers_new=chose_centers(result_clusters,n_clusters)  #新的聚类中心
        result_clusters_new=clus_process(centers,data)  #新的聚类结果
        """计算价值函数E"""
        E_new=count_E(centers_new,data,result_clusters_new)
        """价值函数变小,则更新聚类中心和聚类结果"""
        if E_new<E:
           centers=centers_new
           result_clusters=result_clusters_new
           E=E_new
           print("价值函数为:%s"%E)
           print("聚类中心:%s"%centers)
           xie=0
        """阈值计数器"""
        xie=xie+1
        if xie%10==0 and xie!=0:
            print(xie)

    return centers,result_clusters


def randomcolor(x):
    """随机生成十六进制编码"""
    colors=[]
    i=0

    while i<x:
        colorArr = ['1','2','3','4','5','6','7','8','9','A','B','C','D','E','F']
        color = ""
        j=0
        while j<6:
            color += colorArr[random.randint(0,14)]
            j=j+1
        color="#"+color
        if color in colors:
            continue
        else:
            colors.append(color)
            i=i+1
    return colors
                    
def main():
    global n_data
    file="text.txt"
    data=im_txt(file)
    n_data=len(data)
    '''准备可视化需要的降维数据'''
    data_TSNE = TSNE(learning_rate=100,n_iter=5000).fit_transform(data)
    
    '''对不同的k进行试探性K-medoids聚类并可视化'''
    plt.figure(figsize=(12,8))
    """聚类数"""
    k=18  ###
    centers,result_clusters = KMedoids(k,data,10) ###
    color=randomcolor(k)
    colors = ([color[k] for k in result_clusters])
    plt.subplot(222)
    plt.rcParams['figure.dpi'] = 300
    plt.scatter(data_TSNE[:,0],data_TSNE[:,1],s=10,c=colors)
    plt.title('K-medoids Resul of '.format(str(k)))
    out_txt("分类数数(ture).txt",result_clusters)

main()

  

posted @ 2019-02-23 13:39  晚餐吃不起麻辣烫  阅读(1505)  评论(0编辑  收藏  举报