K-Means聚类算法
K-Means聚类算法实现
代码:
import pandas as pd
import random as rd
import matplotlib.pyplot as plt
from matplotlib.pylab import style
iris_data = pd.read_csv("iris.csv", header=None, usecols=[0, 2, 4])
iris_data[5] = -1
# 设置初始距离
iris_data[6] = 10000
data = iris_data.values
k = 3
# 测试数据:[第一列, 第三列, label, 属于哪个类, 与中心点距离] data
# 中心点:[第几类][第一列, 第三列] cent_point
cent_point = [data[rd.randint(0, 50), 0:2], data[rd.randint(50, 100), 0:2], data[rd.randint(100, 150), 0:2]]
def get_new_label(point):
for i in range(k):
dis = ((cent_point[i][0] - point[0]) ** 2 + (cent_point[i][1] - point[1]) ** 2) ** 0.5
if dis < point[-1]:
point[-1] = dis
point[-2] = i
return point
def get_cent_point():
temp_point_map = [[0 for i in range(2)] for i in range(k)]
count = [0 for i in range(k)]
for i in data:
temp_point_map[int(i[3])][0] += i[0]
temp_point_map[int(i[3])][1] += i[1]
count[int(i[3])] += 1
res = [[0.0 for i in range(2)] for i in range(k)]
for i in range(len(res)):
res[i] = [temp_point_map[i][0] / count[i], temp_point_map[i][1] / count[i]]
return res
go_on = True
cnt = 0
while go_on:
go_on = False
for j in range(150):
temp = data[j][3]
get_new_label(data[j])
if temp != data[j][3]:
go_on = True
cent_point = get_cent_point()
cnt += 1
print("第{}次迭代中...".format(cnt))
print("迭代完成!")
# print(data)
res = pd.DataFrame(data).iloc[:, 0: 4]
pd.set_option('display.max_columns', None)
# 显示所有行
pd.set_option('display.max_rows', None)
# 设置value的显示长度为100,默认为50
pd.set_option('max_colwidth', 100)
print(res)
print("迭代次数为:{}".format(cnt))
# 下面为可视化代码
# 下面三行解决中文乱码
style.use('ggplot')
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
show_dateX = [[], [], []]
show_dateY = [[], [], []]
res_list = res.values
for i in res_list:
show_dateX[int(i[-1])].append(i[0])
show_dateY[int(i[-1])].append(i[1])
x = show_dateX[0]
y = show_dateY[0]
plt.xlabel("第一列")
plt.ylabel("第三列")
plt.scatter(x, y, marker="o", c="green")
x = show_dateX[1]
y = show_dateY[1]
plt.scatter(x, y, marker="o", c="purple")
x = show_dateX[2]
y = show_dateY[2]
plt.scatter(x, y, marker="o", c="blue")
plt.scatter(cent_point[0][0], cent_point[0][1], marker="o", c="red")
plt.scatter(cent_point[1][0], cent_point[1][1], marker="o", c="red")
plt.scatter(cent_point[2][0], cent_point[2][1], marker="o", c="red")
plt.show()
数据集:
输出:
第1次迭代中...
第2次迭代中...
第3次迭代中...
第4次迭代中...
第5次迭代中...
迭代完成!
0 1 2 3
0 5.1 1.4 0.0 0.0
1 4.9 1.4 0.0 0.0
2 4.7 1.3 0.0 0.0
3 4.6 1.5 0.0 0.0
4 5.0 1.4 0.0 0.0
5 5.4 1.7 0.0 0.0
6 4.6 1.4 0.0 0.0
7 5.0 1.5 0.0 0.0
8 4.4 1.4 0.0 0.0
9 4.9 1.5 0.0 0.0
10 5.4 1.5 0.0 0.0
11 4.8 1.6 0.0 0.0
12 4.8 1.4 0.0 0.0
13 4.3 1.1 0.0 0.0
14 5.8 1.2 0.0 0.0
15 5.7 1.5 0.0 0.0
16 5.4 1.3 0.0 0.0
17 5.1 1.4 0.0 0.0
18 5.7 1.7 0.0 0.0
19 5.1 1.5 0.0 0.0
20 5.4 1.7 0.0 0.0
21 5.1 1.5 0.0 0.0
22 4.6 1.0 0.0 0.0
23 5.1 1.7 0.0 0.0
24 4.8 1.9 0.0 0.0
25 5.0 1.6 0.0 0.0
26 5.0 1.6 0.0 0.0
27 5.2 1.5 0.0 0.0
28 5.2 1.4 0.0 0.0
29 4.7 1.6 0.0 0.0
30 4.8 1.6 0.0 0.0
31 5.4 1.5 0.0 0.0
32 5.2 1.5 0.0 0.0
33 5.5 1.4 0.0 0.0
34 4.9 1.5 0.0 0.0
35 5.0 1.2 0.0 0.0
36 5.5 1.3 0.0 0.0
37 4.9 1.5 0.0 0.0
38 4.4 1.3 0.0 0.0
39 5.1 1.5 0.0 0.0
40 5.0 1.3 0.0 0.0
41 4.5 1.3 0.0 0.0
42 4.4 1.3 0.0 0.0
43 5.0 1.6 0.0 0.0
44 5.1 1.9 0.0 0.0
45 4.8 1.4 0.0 0.0
46 5.1 1.6 0.0 0.0
47 4.6 1.4 0.0 0.0
48 5.3 1.5 0.0 0.0
49 5.0 1.4 0.0 0.0
50 7.0 4.7 1.0 2.0
51 6.4 4.5 1.0 1.0
52 6.9 4.9 1.0 2.0
53 5.5 4.0 1.0 1.0
54 6.5 4.6 1.0 2.0
55 5.7 4.5 1.0 1.0
56 6.3 4.7 1.0 2.0
57 4.9 3.3 1.0 1.0
58 6.6 4.6 1.0 2.0
59 5.2 3.9 1.0 1.0
60 5.0 3.5 1.0 1.0
61 5.9 4.2 1.0 1.0
62 6.0 4.0 1.0 1.0
63 6.1 4.7 1.0 1.0
64 5.6 3.6 1.0 1.0
65 6.7 4.4 1.0 2.0
66 5.6 4.5 1.0 1.0
67 5.8 4.1 1.0 1.0
68 6.2 4.5 1.0 1.0
69 5.6 3.9 1.0 1.0
70 5.9 4.8 1.0 1.0
71 6.1 4.0 1.0 1.0
72 6.3 4.9 1.0 2.0
73 6.1 4.7 1.0 1.0
74 6.4 4.3 1.0 1.0
75 6.6 4.4 1.0 2.0
76 6.8 4.8 1.0 2.0
77 6.7 5.0 1.0 2.0
78 6.0 4.5 1.0 1.0
79 5.7 3.5 1.0 1.0
80 5.5 3.8 1.0 1.0
81 5.5 3.7 1.0 1.0
82 5.8 3.9 1.0 1.0
83 6.0 5.1 1.0 2.0
84 5.4 4.5 1.0 1.0
85 6.0 4.5 1.0 1.0
86 6.7 4.7 1.0 2.0
87 6.3 4.4 1.0 1.0
88 5.6 4.1 1.0 1.0
89 5.5 4.0 1.0 1.0
90 5.5 4.4 1.0 1.0
91 6.1 4.6 1.0 1.0
92 5.8 4.0 1.0 1.0
93 5.0 3.3 1.0 1.0
94 5.6 4.2 1.0 1.0
95 5.7 4.2 1.0 1.0
96 5.7 4.2 1.0 1.0
97 6.2 4.3 1.0 1.0
98 5.1 3.0 1.0 1.0
99 5.7 4.1 1.0 1.0
100 6.3 6.0 2.0 2.0
101 5.8 5.1 2.0 2.0
102 7.1 5.9 2.0 2.0
103 6.3 5.6 2.0 2.0
104 6.5 5.8 2.0 2.0
105 7.6 6.6 2.0 2.0
106 4.9 4.5 2.0 1.0
107 7.3 6.3 2.0 2.0
108 6.7 5.8 2.0 2.0
109 7.2 6.1 2.0 2.0
110 6.5 5.1 2.0 2.0
111 6.4 5.3 2.0 2.0
112 6.8 5.5 2.0 2.0
113 5.7 5.0 2.0 1.0
114 5.8 5.1 2.0 2.0
115 6.4 5.3 2.0 2.0
116 6.5 5.5 2.0 2.0
117 7.7 6.7 2.0 2.0
118 7.7 6.9 2.0 2.0
119 6.0 5.0 2.0 2.0
120 6.9 5.7 2.0 2.0
121 5.6 4.9 2.0 1.0
122 7.7 6.7 2.0 2.0
123 6.3 4.9 2.0 2.0
124 6.7 5.7 2.0 2.0
125 7.2 6.0 2.0 2.0
126 6.2 4.8 2.0 2.0
127 6.1 4.9 2.0 2.0
128 6.4 5.6 2.0 2.0
129 7.2 5.8 2.0 2.0
130 7.4 6.1 2.0 2.0
131 7.9 6.4 2.0 2.0
132 6.4 5.6 2.0 2.0
133 6.3 5.1 2.0 2.0
134 6.1 5.6 2.0 2.0
135 7.7 6.1 2.0 2.0
136 6.3 5.6 2.0 2.0
137 6.4 5.5 2.0 2.0
138 6.0 4.8 2.0 1.0
139 6.9 5.4 2.0 2.0
140 6.7 5.6 2.0 2.0
141 6.9 5.1 2.0 2.0
142 5.8 5.1 2.0 2.0
143 6.8 5.9 2.0 2.0
144 6.7 5.7 2.0 2.0
145 6.7 5.2 2.0 2.0
146 6.3 5.0 2.0 2.0
147 6.5 5.2 2.0 2.0
148 6.2 5.4 2.0 2.0
149 5.9 5.1 2.0 2.0
迭代次数为:5