Loading

K-Means聚类算法

K-Means聚类算法实现

代码:

import pandas as pd
import random as rd
import matplotlib.pyplot as plt
from matplotlib.pylab import style

iris_data = pd.read_csv("iris.csv", header=None, usecols=[0, 2, 4])
iris_data[5] = -1
# 设置初始距离
iris_data[6] = 10000

data = iris_data.values
k = 3
# 测试数据:[第一列, 第三列, label, 属于哪个类, 与中心点距离] data
# 中心点:[第几类][第一列, 第三列] cent_point
cent_point = [data[rd.randint(0, 50), 0:2], data[rd.randint(50, 100), 0:2], data[rd.randint(100, 150), 0:2]]


def get_new_label(point):
    for i in range(k):
        dis = ((cent_point[i][0] - point[0]) ** 2 + (cent_point[i][1] - point[1]) ** 2) ** 0.5
        if dis < point[-1]:
            point[-1] = dis
            point[-2] = i
    return point


def get_cent_point():
    temp_point_map = [[0 for i in range(2)] for i in range(k)]
    count = [0 for i in range(k)]
    for i in data:
        temp_point_map[int(i[3])][0] += i[0]
        temp_point_map[int(i[3])][1] += i[1]
        count[int(i[3])] += 1
    res = [[0.0 for i in range(2)] for i in range(k)]
    for i in range(len(res)):
        res[i] = [temp_point_map[i][0] / count[i], temp_point_map[i][1] / count[i]]
    return res


go_on = True
cnt = 0
while go_on:
    go_on = False
    for j in range(150):
        temp = data[j][3]
        get_new_label(data[j])
        if temp != data[j][3]:
            go_on = True
    cent_point = get_cent_point()
    cnt += 1
    print("第{}次迭代中...".format(cnt))

print("迭代完成!")

# print(data)
res = pd.DataFrame(data).iloc[:, 0: 4]
pd.set_option('display.max_columns', None)
# 显示所有行
pd.set_option('display.max_rows', None)
# 设置value的显示长度为100,默认为50
pd.set_option('max_colwidth', 100)
print(res)
print("迭代次数为:{}".format(cnt))

# 下面为可视化代码
# 下面三行解决中文乱码
style.use('ggplot')
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False

show_dateX = [[], [], []]
show_dateY = [[], [], []]
res_list = res.values
for i in res_list:
    show_dateX[int(i[-1])].append(i[0])
    show_dateY[int(i[-1])].append(i[1])

x = show_dateX[0]
y = show_dateY[0]
plt.xlabel("第一列")
plt.ylabel("第三列")
plt.scatter(x, y, marker="o", c="green")
x = show_dateX[1]
y = show_dateY[1]
plt.scatter(x, y, marker="o", c="purple")
x = show_dateX[2]
y = show_dateY[2]
plt.scatter(x, y, marker="o", c="blue")
plt.scatter(cent_point[0][0], cent_point[0][1], marker="o", c="red")
plt.scatter(cent_point[1][0], cent_point[1][1], marker="o", c="red")
plt.scatter(cent_point[2][0], cent_point[2][1], marker="o", c="red")

plt.show()

数据集:

点此查看

点此下载

输出:

第1次迭代中...
第2次迭代中...
第3次迭代中...
第4次迭代中...
第5次迭代中...
迭代完成!
       0    1    2    3
0    5.1  1.4  0.0  0.0
1    4.9  1.4  0.0  0.0
2    4.7  1.3  0.0  0.0
3    4.6  1.5  0.0  0.0
4    5.0  1.4  0.0  0.0
5    5.4  1.7  0.0  0.0
6    4.6  1.4  0.0  0.0
7    5.0  1.5  0.0  0.0
8    4.4  1.4  0.0  0.0
9    4.9  1.5  0.0  0.0
10   5.4  1.5  0.0  0.0
11   4.8  1.6  0.0  0.0
12   4.8  1.4  0.0  0.0
13   4.3  1.1  0.0  0.0
14   5.8  1.2  0.0  0.0
15   5.7  1.5  0.0  0.0
16   5.4  1.3  0.0  0.0
17   5.1  1.4  0.0  0.0
18   5.7  1.7  0.0  0.0
19   5.1  1.5  0.0  0.0
20   5.4  1.7  0.0  0.0
21   5.1  1.5  0.0  0.0
22   4.6  1.0  0.0  0.0
23   5.1  1.7  0.0  0.0
24   4.8  1.9  0.0  0.0
25   5.0  1.6  0.0  0.0
26   5.0  1.6  0.0  0.0
27   5.2  1.5  0.0  0.0
28   5.2  1.4  0.0  0.0
29   4.7  1.6  0.0  0.0
30   4.8  1.6  0.0  0.0
31   5.4  1.5  0.0  0.0
32   5.2  1.5  0.0  0.0
33   5.5  1.4  0.0  0.0
34   4.9  1.5  0.0  0.0
35   5.0  1.2  0.0  0.0
36   5.5  1.3  0.0  0.0
37   4.9  1.5  0.0  0.0
38   4.4  1.3  0.0  0.0
39   5.1  1.5  0.0  0.0
40   5.0  1.3  0.0  0.0
41   4.5  1.3  0.0  0.0
42   4.4  1.3  0.0  0.0
43   5.0  1.6  0.0  0.0
44   5.1  1.9  0.0  0.0
45   4.8  1.4  0.0  0.0
46   5.1  1.6  0.0  0.0
47   4.6  1.4  0.0  0.0
48   5.3  1.5  0.0  0.0
49   5.0  1.4  0.0  0.0
50   7.0  4.7  1.0  2.0
51   6.4  4.5  1.0  1.0
52   6.9  4.9  1.0  2.0
53   5.5  4.0  1.0  1.0
54   6.5  4.6  1.0  2.0
55   5.7  4.5  1.0  1.0
56   6.3  4.7  1.0  2.0
57   4.9  3.3  1.0  1.0
58   6.6  4.6  1.0  2.0
59   5.2  3.9  1.0  1.0
60   5.0  3.5  1.0  1.0
61   5.9  4.2  1.0  1.0
62   6.0  4.0  1.0  1.0
63   6.1  4.7  1.0  1.0
64   5.6  3.6  1.0  1.0
65   6.7  4.4  1.0  2.0
66   5.6  4.5  1.0  1.0
67   5.8  4.1  1.0  1.0
68   6.2  4.5  1.0  1.0
69   5.6  3.9  1.0  1.0
70   5.9  4.8  1.0  1.0
71   6.1  4.0  1.0  1.0
72   6.3  4.9  1.0  2.0
73   6.1  4.7  1.0  1.0
74   6.4  4.3  1.0  1.0
75   6.6  4.4  1.0  2.0
76   6.8  4.8  1.0  2.0
77   6.7  5.0  1.0  2.0
78   6.0  4.5  1.0  1.0
79   5.7  3.5  1.0  1.0
80   5.5  3.8  1.0  1.0
81   5.5  3.7  1.0  1.0
82   5.8  3.9  1.0  1.0
83   6.0  5.1  1.0  2.0
84   5.4  4.5  1.0  1.0
85   6.0  4.5  1.0  1.0
86   6.7  4.7  1.0  2.0
87   6.3  4.4  1.0  1.0
88   5.6  4.1  1.0  1.0
89   5.5  4.0  1.0  1.0
90   5.5  4.4  1.0  1.0
91   6.1  4.6  1.0  1.0
92   5.8  4.0  1.0  1.0
93   5.0  3.3  1.0  1.0
94   5.6  4.2  1.0  1.0
95   5.7  4.2  1.0  1.0
96   5.7  4.2  1.0  1.0
97   6.2  4.3  1.0  1.0
98   5.1  3.0  1.0  1.0
99   5.7  4.1  1.0  1.0
100  6.3  6.0  2.0  2.0
101  5.8  5.1  2.0  2.0
102  7.1  5.9  2.0  2.0
103  6.3  5.6  2.0  2.0
104  6.5  5.8  2.0  2.0
105  7.6  6.6  2.0  2.0
106  4.9  4.5  2.0  1.0
107  7.3  6.3  2.0  2.0
108  6.7  5.8  2.0  2.0
109  7.2  6.1  2.0  2.0
110  6.5  5.1  2.0  2.0
111  6.4  5.3  2.0  2.0
112  6.8  5.5  2.0  2.0
113  5.7  5.0  2.0  1.0
114  5.8  5.1  2.0  2.0
115  6.4  5.3  2.0  2.0
116  6.5  5.5  2.0  2.0
117  7.7  6.7  2.0  2.0
118  7.7  6.9  2.0  2.0
119  6.0  5.0  2.0  2.0
120  6.9  5.7  2.0  2.0
121  5.6  4.9  2.0  1.0
122  7.7  6.7  2.0  2.0
123  6.3  4.9  2.0  2.0
124  6.7  5.7  2.0  2.0
125  7.2  6.0  2.0  2.0
126  6.2  4.8  2.0  2.0
127  6.1  4.9  2.0  2.0
128  6.4  5.6  2.0  2.0
129  7.2  5.8  2.0  2.0
130  7.4  6.1  2.0  2.0
131  7.9  6.4  2.0  2.0
132  6.4  5.6  2.0  2.0
133  6.3  5.1  2.0  2.0
134  6.1  5.6  2.0  2.0
135  7.7  6.1  2.0  2.0
136  6.3  5.6  2.0  2.0
137  6.4  5.5  2.0  2.0
138  6.0  4.8  2.0  1.0
139  6.9  5.4  2.0  2.0
140  6.7  5.6  2.0  2.0
141  6.9  5.1  2.0  2.0
142  5.8  5.1  2.0  2.0
143  6.8  5.9  2.0  2.0
144  6.7  5.7  2.0  2.0
145  6.7  5.2  2.0  2.0
146  6.3  5.0  2.0  2.0
147  6.5  5.2  2.0  2.0
148  6.2  5.4  2.0  2.0
149  5.9  5.1  2.0  2.0
迭代次数为:5
posted @ 2021-11-08 20:20  [X_O]  阅读(69)  评论(0编辑  收藏  举报