一:简单实现K-近邻算法
(一)导入数据
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
def CreateDataSet():
data = np.array([[1.0,1.1],[1.0,1.0],[0,0],[0,0.1]])
labels = np.array(['A','A','B','B'])
return data,labels
data,labels = CreateDataSet()
print(data)
print(labels)
plt.figure()
plt.scatter(data[:,0],data[:,1],c="b")
for i in range(data.shape[0]):
plt.text(data[i,0]+0.02,data[i,1],labels[i])
plt.show()
(二)实现KNN算法
def KNNClassfy(preData,dataSet,labels,k):
distance = np.sum(np.power(dataSet - preData,2),1) #注意:这里我们不进行开方,可以少算一次
sortDistIdx = np.argsort(distance,0) #小到大排序,获取索引
labels_idx = {}
for i in range(k): #获取分类
idx = sortDistIdx[i] #获取索引
label = labels[idx] #获取标签
labels_idx[label] = labels_idx.get(label,0)+1
labels_sort = sorted(labels_idx.items(),key=lambda x:x[1],reverse=True)
return labels_sort[0][0] #获取最大可能分类
(三)结果测试
preData = np.array([0,0.3])
preLab = KNNClassfy(preData,data,labels,3)
print(preLab)
二:使用KNN算法分析喜好---多维
(一)读取数据
40920 8.326976 0.953952 3
14488 7.153469 1.673904 2
26052 1.441871 0.805124 1
75136 13.147394 0.428964 1
38344 1.669788 0.134296 1
72993 10.141740 1.032955 1
35948 6.830792 1.213192 3
42666 13.276369 0.543880 3
67497 8.631577 0.749278 1
35483 12.273169 1.508053 3
50242 3.723498 0.831917 1
63275 8.385879 1.669485 1
5569 4.875435 0.728658 2
51052 4.680098 0.625224 1
77372 15.299570 0.331351 1
43673 1.889461 0.191283 1
61364 7.516754 1.269164 1
69673 14.239195 0.261333 1
15669 0.000000 1.250185 2
28488 10.528555 1.304844 3
6487 3.540265 0.822483 2
37708 2.991551 0.833920 1
22620 5.297865 0.638306 2
28782 6.593803 0.187108 3
19739 2.816760 1.686209 2
36788 12.458258 0.649617 3
5741 0.000000 1.656418 2
28567 9.968648 0.731232 3
6808 1.364838 0.640103 2
41611 0.230453 1.151996 1
36661 11.865402 0.882810 3
43605 0.120460 1.352013 1
15360 8.545204 1.340429 3
63796 5.856649 0.160006 1
10743 9.665618 0.778626 2
70808 9.778763 1.084103 1
72011 4.932976 0.632026 1
5914 2.216246 0.587095 2
14851 14.305636 0.632317 3
33553 12.591889 0.686581 3
44952 3.424649 1.004504 1
17934 0.000000 0.147573 2
27738 8.533823 0.205324 3
29290 9.829528 0.238620 3
42330 11.492186 0.263499 3
36429 3.570968 0.832254 1
39623 1.771228 0.207612 1
32404 3.513921 0.991854 1
27268 4.398172 0.975024 1
5477 4.276823 1.174874 2
14254 5.946014 1.614244 2
68613 13.798970 0.724375 1
41539 10.393591 1.663724 3
7917 3.007577 0.297302 2
21331 1.031938 0.486174 2
8338 4.751212 0.064693 2
5176 3.692269 1.655113 2
18983 10.448091 0.267652 3
68837 10.585786 0.329557 1
13438 1.604501 0.069064 2
48849 3.679497 0.961466 1
12285 3.795146 0.696694 2
7826 2.531885 1.659173 2
5565 9.733340 0.977746 2
10346 6.093067 1.413798 2
1823 7.712960 1.054927 2
9744 11.470364 0.760461 3
16857 2.886529 0.934416 2
39336 10.054373 1.138351 3
65230 9.972470 0.881876 1
2463 2.335785 1.366145 2
27353 11.375155 1.528626 3
16191 0.000000 0.605619 2
12258 4.126787 0.357501 2
42377 6.319522 1.058602 1
25607 8.680527 0.086955 3
77450 14.856391 1.129823 1
58732 2.454285 0.222380 1
46426 7.292202 0.548607 3
32688 8.745137 0.857348 3
64890 8.579001 0.683048 1
8554 2.507302 0.869177 2
28861 11.415476 1.505466 3
42050 4.838540 1.680892 1
32193 10.339507 0.583646 3
64895 6.573742 1.151433 1
2355 6.539397 0.462065 2
0 2.209159 0.723567 2
70406 11.196378 0.836326 1
57399 4.229595 0.128253 1
41732 9.505944 0.005273 3
11429 8.652725 1.348934 3
75270 17.101108 0.490712 1
5459 7.871839 0.717662 2
73520 8.262131 1.361646 1
40279 9.015635 1.658555 3
21540 9.215351 0.806762 3
17694 6.375007 0.033678 2
22329 2.262014 1.022169 1
46570 5.677110 0.709469 1
42403 11.293017 0.207976 3
33654 6.590043 1.353117 1
9171 4.711960 0.194167 2
28122 8.768099 1.108041 3
34095 11.502519 0.545097 3
1774 4.682812 0.578112 2
40131 12.446578 0.300754 3
13994 12.908384 1.657722 3
77064 12.601108 0.974527 1
11210 3.929456 0.025466 2
6122 9.751503 1.182050 3
15341 3.043767 0.888168 2
44373 4.391522 0.807100 1
28454 11.695276 0.679015 3
63771 7.879742 0.154263 1
9217 5.613163 0.933632 2
69076 9.140172 0.851300 1
24489 4.258644 0.206892 1
16871 6.799831 1.221171 2
39776 8.752758 0.484418 3
5901 1.123033 1.180352 2
40987 10.833248 1.585426 3
7479 3.051618 0.026781 2
38768 5.308409 0.030683 3
4933 1.841792 0.028099 2
32311 2.261978 1.605603 1
26501 11.573696 1.061347 3
37433 8.038764 1.083910 3
23503 10.734007 0.103715 3
68607 9.661909 0.350772 1
27742 9.005850 0.548737 3
11303 0.000000 0.539131 2
0 5.757140 1.062373 2
32729 9.164656 1.624565 3
24619 1.318340 1.436243 1
42414 14.075597 0.695934 3
20210 10.107550 1.308398 3
33225 7.960293 1.219760 3
54483 6.317292 0.018209 1
18475 12.664194 0.595653 3
33926 2.906644 0.581657 1
43865 2.388241 0.913938 1
26547 6.024471 0.486215 3
44404 7.226764 1.255329 3
16674 4.183997 1.275290 2
8123 11.850211 1.096981 3
42747 11.661797 1.167935 3
56054 3.574967 0.494666 1
10933 0.000000 0.107475 2
18121 7.937657 0.904799 3
11272 3.365027 1.014085 2
16297 0.000000 0.367491 2
28168 13.860672 1.293270 3
40963 10.306714 1.211594 3
31685 7.228002 0.670670 3
55164 4.508740 1.036192 1
17595 0.366328 0.163652 2
1862 3.299444 0.575152 2
57087 0.573287 0.607915 1
63082 9.183738 0.012280 1
51213 7.842646 1.060636 3
6487 4.750964 0.558240 2
4805 11.438702 1.556334 3
30302 8.243063 1.122768 3
68680 7.949017 0.271865 1
17591 7.875477 0.227085 2
74391 9.569087 0.364856 1
37217 7.750103 0.869094 3
42814 0.000000 1.515293 1
14738 3.396030 0.633977 2
19896 11.916091 0.025294 3
14673 0.460758 0.689586 2
32011 13.087566 0.476002 3
58736 4.589016 1.672600 1
54744 8.397217 1.534103 1
29482 5.562772 1.689388 1
27698 10.905159 0.619091 3
11443 1.311441 1.169887 2
56117 10.647170 0.980141 3
39514 0.000000 0.481918 1
26627 8.503025 0.830861 3
16525 0.436880 1.395314 2
24368 6.127867 1.102179 1
22160 12.112492 0.359680 3
6030 1.264968 1.141582 2
6468 6.067568 1.327047 2
22945 8.010964 1.681648 3
18520 3.791084 0.304072 2
34914 11.773195 1.262621 3
6121 8.339588 1.443357 2
38063 2.563092 1.464013 1
23410 5.954216 0.953782 1
35073 9.288374 0.767318 3
52914 3.976796 1.043109 1
16801 8.585227 1.455708 3
9533 1.271946 0.796506 2
16721 0.000000 0.242778 2
5832 0.000000 0.089749 2
44591 11.521298 0.300860 3
10143 1.139447 0.415373 2
21609 5.699090 1.391892 2
23817 2.449378 1.322560 1
15640 0.000000 1.228380 2
8847 3.168365 0.053993 2
50939 10.428610 1.126257 3
28521 2.943070 1.446816 1
32901 10.441348 0.975283 3
42850 12.478764 1.628726 3
13499 5.856902 0.363883 2
40345 2.476420 0.096075 1
43547 1.826637 0.811457 1
70758 4.324451 0.328235 1
19780 1.376085 1.178359 2
44484 5.342462 0.394527 1
54462 11.835521 0.693301 3
20085 12.423687 1.424264 3
42291 12.161273 0.071131 3
47550 8.148360 1.649194 3
11938 1.531067 1.549756 2
40699 3.200912 0.309679 1
70908 8.862691 0.530506 1
73989 6.370551 0.369350 1
11872 2.468841 0.145060 2
48463 11.054212 0.141508 3
15987 2.037080 0.715243 2
70036 13.364030 0.549972 1
32967 10.249135 0.192735 3
63249 10.464252 1.669767 1
42795 9.424574 0.013725 3
14459 4.458902 0.268444 2
19973 0.000000 0.575976 2
5494 9.686082 1.029808 3
67902 13.649402 1.052618 1
25621 13.181148 0.273014 3
27545 3.877472 0.401600 1
58656 1.413952 0.451380 1
7327 4.248986 1.430249 2
64555 8.779183 0.845947 1
8998 4.156252 0.097109 2
11752 5.580018 0.158401 2
76319 15.040440 1.366898 1
27665 12.793870 1.307323 3
67417 3.254877 0.669546 1
21808 10.725607 0.588588 3
15326 8.256473 0.765891 2
20057 8.033892 1.618562 3
79341 10.702532 0.204792 1
15636 5.062996 1.132555 2
35602 10.772286 0.668721 3
28544 1.892354 0.837028 1
57663 1.019966 0.372320 1
78727 15.546043 0.729742 1
68255 11.638205 0.409125 1
14964 3.427886 0.975616 2
21835 11.246174 1.475586 3
7487 0.000000 0.645045 2
8700 0.000000 1.424017 2
26226 8.242553 0.279069 3
65899 8.700060 0.101807 1
6543 0.812344 0.260334 2
46556 2.448235 1.176829 1
71038 13.230078 0.616147 1
47657 0.236133 0.340840 1
19600 11.155826 0.335131 3
37422 11.029636 0.505769 3
1363 2.901181 1.646633 2
26535 3.924594 1.143120 1
47707 2.524806 1.292848 1
38055 3.527474 1.449158 1
6286 3.384281 0.889268 2
10747 0.000000 1.107592 2
44883 11.898890 0.406441 3
56823 3.529892 1.375844 1
68086 11.442677 0.696919 1
70242 10.308145 0.422722 1
11409 8.540529 0.727373 2
67671 7.156949 1.691682 1
61238 0.720675 0.847574 1
17774 0.229405 1.038603 2
53376 3.399331 0.077501 1
30930 6.157239 0.580133 1
28987 1.239698 0.719989 1
13655 6.036854 0.016548 2
7227 5.258665 0.933722 2
40409 12.393001 1.571281 3
13605 9.627613 0.935842 2
26400 11.130453 0.597610 3
13491 8.842595 0.349768 3
30232 10.690010 1.456595 3
43253 5.714718 1.674780 3
55536 3.052505 1.335804 1
8807 0.000000 0.059025 2
25783 9.945307 1.287952 3
22812 2.719723 1.142148 1
77826 11.154055 1.608486 1
38172 2.687918 0.660836 1
31676 10.037847 0.962245 3
74038 12.404762 1.112080 1
44738 10.237305 0.633422 3
17410 4.745392 0.662520 2
5688 4.639461 1.569431 2
36642 3.149310 0.639669 1
29956 13.406875 1.639194 3
60350 6.068668 0.881241 1
23758 9.477022 0.899002 3
25780 3.897620 0.560201 2
11342 5.463615 1.203677 2
36109 3.369267 1.575043 1
14292 5.234562 0.825954 2
11160 0.000000 0.722170 2
23762 12.979069 0.504068 3
39567 5.376564 0.557476 1
25647 13.527910 1.586732 3
14814 2.196889 0.784587 2
73590 10.691748 0.007509 1
35187 1.659242 0.447066 1
49459 8.369667 0.656697 3
31657 13.157197 0.143248 3
6259 8.199667 0.908508 2
33101 4.441669 0.439381 3
27107 9.846492 0.644523 3
17824 0.019540 0.977949 2
43536 8.253774 0.748700 3
67705 6.038620 1.509646 1
35283 6.091587 1.694641 3
71308 8.986820 1.225165 1
31054 11.508473 1.624296 3
52387 8.807734 0.713922 3
40328 0.000000 0.816676 1
34844 8.889202 1.665414 3
11607 3.178117 0.542752 2
64306 7.013795 0.139909 1
32721 9.605014 0.065254 3
33170 1.230540 1.331674 1
37192 10.412811 0.890803 3
13089 0.000000 0.567161 2
66491 9.699991 0.122011 1
15941 0.000000 0.061191 2
4272 4.455293 0.272135 2
48812 3.020977 1.502803 1
28818 8.099278 0.216317 3
35394 1.157764 1.603217 1
71791 10.105396 0.121067 1
40668 11.230148 0.408603 3
39580 9.070058 0.011379 3
11786 0.566460 0.478837 2
19251 0.000000 0.487300 2
56594 8.956369 1.193484 3
54495 1.523057 0.620528 1
11844 2.749006 0.169855 2
45465 9.235393 0.188350 3
31033 10.555573 0.403927 3
16633 6.956372 1.519308 2
13887 0.636281 1.273984 2
52603 3.574737 0.075163 1
72000 9.032486 1.461809 1
68497 5.958993 0.023012 1
35135 2.435300 1.211744 1
26397 10.539731 1.638248 3
7313 7.646702 0.056513 2
91273 20.919349 0.644571 1
24743 1.424726 0.838447 1
31690 6.748663 0.890223 3
15432 2.289167 0.114881 2
58394 5.548377 0.402238 1
33962 6.057227 0.432666 1
31442 10.828595 0.559955 3
31044 11.318160 0.271094 3
29938 13.265311 0.633903 3
9875 0.000000 1.496715 2
51542 6.517133 0.402519 3
11878 4.934374 1.520028 2
69241 10.151738 0.896433 1
37776 2.425781 1.559467 1
68997 9.778962 1.195498 1
67416 12.219950 0.657677 1
59225 7.394151 0.954434 1
29138 8.518535 0.742546 3
5962 2.798700 0.662632 2
10847 0.637930 0.617373 2
70527 10.750490 0.097415 1
9610 0.625382 0.140969 2
64734 10.027968 0.282787 1
25941 9.817347 0.364197 3
2763 0.646828 1.266069 2
55601 3.347111 0.914294 1
31128 11.816892 0.193798 3
5181 0.000000 1.480198 2
69982 10.945666 0.993219 1
52440 10.244706 0.280539 3
57350 2.579801 1.149172 1
57869 2.630410 0.098869 1
56557 11.746200 1.695517 3
42342 8.104232 1.326277 3
15560 12.409743 0.790295 3
34826 12.167844 1.328086 3
8569 3.198408 0.299287 2
77623 16.055513 0.541052 1
78184 7.138659 0.158481 1
7036 4.831041 0.761419 2
69616 10.082890 1.373611 1
21546 10.066867 0.788470 3
36715 8.129538 0.329913 3
20522 3.012463 1.138108 2
42349 3.720391 0.845974 1
9037 0.773493 1.148256 2
26728 10.962941 1.037324 3
587 0.177621 0.162614 2
48915 3.085853 0.967899 1
9824 8.426781 0.202558 2
4135 1.825927 1.128347 2
9666 2.185155 1.010173 2
59333 7.184595 1.261338 1
36198 0.000000 0.116525 1
34909 8.901752 1.033527 3
47516 2.451497 1.358795 1
55807 3.213631 0.432044 1
14036 3.974739 0.723929 2
42856 9.601306 0.619232 3
64007 8.363897 0.445341 1
59428 6.381484 1.365019 1
13730 0.000000 1.403914 2
41740 9.609836 1.438105 3
63546 9.904741 0.985862 1
30417 7.185807 1.489102 3
69636 5.466703 1.216571 1
64660 0.000000 0.915898 1
14883 4.575443 0.535671 2
7965 3.277076 1.010868 2
68620 10.246623 1.239634 1
8738 2.341735 1.060235 2
7544 3.201046 0.498843 2
6377 6.066013 0.120927 2
36842 8.829379 0.895657 3
81046 15.833048 1.568245 1
67736 13.516711 1.220153 1
32492 0.664284 1.116755 1
39299 6.325139 0.605109 3
77289 8.677499 0.344373 1
33835 8.188005 0.964896 3
71890 9.414263 0.384030 1
32054 9.196547 1.138253 3
38579 10.202968 0.452363 3
55984 2.119439 1.481661 1
72694 13.635078 0.858314 1
42299 0.083443 0.701669 1
26635 9.149096 1.051446 3
8579 1.933803 1.374388 2
37302 14.115544 0.676198 3
22878 8.933736 0.943352 3
4364 2.661254 0.946117 2
4985 0.988432 1.305027 2
37068 2.063741 1.125946 1
41137 2.220590 0.690754 1
67759 6.424849 0.806641 1
11831 1.156153 1.613674 2
34502 3.032720 0.601847 1
4088 3.076828 0.952089 2
15199 0.000000 0.318105 2
17309 7.750480 0.554015 3
42816 10.958135 1.482500 3
43751 10.222018 0.488678 3
58335 2.367988 0.435741 1
75039 7.686054 1.381455 1
42878 11.464879 1.481589 3
42770 11.075735 0.089726 3
8848 3.543989 0.345853 2
31340 8.123889 1.282880 3
41413 4.331769 0.754467 3
12731 0.120865 1.211961 2
22447 6.116109 0.701523 3
33564 7.474534 0.505790 3
48907 8.819454 0.649292 3
8762 6.802144 0.615284 2
46696 12.666325 0.931960 3
36851 8.636180 0.399333 3
67639 11.730991 1.289833 1
171 8.132449 0.039062 2
26674 10.296589 1.496144 3
8739 7.583906 1.005764 2
66668 9.777806 0.496377 1
68732 8.833546 0.513876 1
69995 4.907899 1.518036 1
82008 8.362736 1.285939 1
25054 9.084726 1.606312 3
33085 14.164141 0.560970 3
41379 9.080683 0.989920 3
39417 6.522767 0.038548 3
12556 3.690342 0.462281 2
39432 3.563706 0.242019 1
38010 1.065870 1.141569 1
69306 6.683796 1.456317 1
38000 1.712874 0.243945 1
46321 13.109929 1.280111 3
66293 11.327910 0.780977 1
22730 4.545711 1.233254 1
5952 3.367889 0.468104 2
72308 8.326224 0.567347 1
60338 8.978339 1.442034 1
13301 5.655826 1.582159 2
27884 8.855312 0.570684 3
11188 6.649568 0.544233 2
56796 3.966325 0.850410 1
8571 1.924045 1.664782 2
4914 6.004812 0.280369 2
10784 0.000000 0.375849 2
39296 9.923018 0.092192 3
13113 2.389084 0.119284 2
70204 13.663189 0.133251 1
46813 11.434976 0.321216 3
11697 0.358270 1.292858 2
44183 9.598873 0.223524 3
2225 6.375275 0.608040 2
29066 11.580532 0.458401 3
4245 5.319324 1.598070 2
34379 4.324031 1.603481 1
44441 2.358370 1.273204 1
2022 0.000000 1.182708 2
26866 12.824376 0.890411 3
57070 1.587247 1.456982 1
32932 8.510324 1.520683 3
51967 10.428884 1.187734 3
44432 8.346618 0.042318 3
67066 7.541444 0.809226 1
17262 2.540946 1.583286 2
79728 9.473047 0.692513 1
14259 0.352284 0.474080 2
6122 0.000000 0.589826 2
76879 12.405171 0.567201 1
11426 4.126775 0.871452 2
2493 0.034087 0.335848 2
19910 1.177634 0.075106 2
10939 0.000000 0.479996 2
17716 0.994909 0.611135 2
31390 11.053664 1.180117 3
20375 0.000000 1.679729 2
26309 2.495011 1.459589 1
33484 11.516831 0.001156 3
45944 9.213215 0.797743 3
4249 5.332865 0.109288 2
6089 0.000000 1.689771 2
7513 0.000000 1.126053 2
27862 12.640062 1.690903 3
39038 2.693142 1.317518 1
19218 3.328969 0.268271 2
62911 7.193166 1.117456 1
77758 6.615512 1.521012 1
27940 8.000567 0.835341 3
2194 4.017541 0.512104 2
37072 13.245859 0.927465 3
15585 5.970616 0.813624 2
25577 11.668719 0.886902 3
8777 4.283237 1.272728 2
29016 10.742963 0.971401 3
21910 12.326672 1.592608 3
12916 0.000000 0.344622 2
10976 0.000000 0.922846 2
79065 10.602095 0.573686 1
36759 10.861859 1.155054 3
50011 1.229094 1.638690 1
1155 0.410392 1.313401 2
71600 14.552711 0.616162 1
30817 14.178043 0.616313 3
54559 14.136260 0.362388 1
29764 0.093534 1.207194 1
69100 10.929021 0.403110 1
47324 11.432919 0.825959 3
73199 9.134527 0.586846 1
44461 5.071432 1.421420 1
45617 11.460254 1.541749 3
28221 11.620039 1.103553 3
7091 4.022079 0.207307 2
6110 3.057842 1.631262 2
79016 7.782169 0.404385 1
18289 7.981741 0.929789 3
43679 4.601363 0.268326 1
22075 2.595564 1.115375 1
23535 10.049077 0.391045 3
25301 3.265444 1.572970 2
32256 11.780282 1.511014 3
36951 3.075975 0.286284 1
31290 1.795307 0.194343 1
38953 11.106979 0.202415 3
35257 5.994413 0.800021 1
25847 9.706062 1.012182 3
32680 10.582992 0.836025 3
62018 7.038266 1.458979 1
9074 0.023771 0.015314 2
33004 12.823982 0.676371 3
44588 3.617770 0.493483 1
32565 8.346684 0.253317 3
38563 6.104317 0.099207 1
75668 16.207776 0.584973 1
9069 6.401969 1.691873 2
53395 2.298696 0.559757 1
28631 7.661515 0.055981 3
71036 6.353608 1.645301 1
71142 10.442780 0.335870 1
37653 3.834509 1.346121 1
76839 10.998587 0.584555 1
9916 2.695935 1.512111 2
38889 3.356646 0.324230 1
39075 14.677836 0.793183 3
48071 1.551934 0.130902 1
7275 2.464739 0.223502 2
41804 1.533216 1.007481 1
35665 12.473921 0.162910 3
67956 6.491596 0.032576 1
41892 10.506276 1.510747 3
38844 4.380388 0.748506 1
74197 13.670988 1.687944 1
14201 8.317599 0.390409 2
3908 0.000000 0.556245 2
2459 0.000000 0.290218 2
32027 10.095799 1.188148 3
12870 0.860695 1.482632 2
9880 1.557564 0.711278 2
72784 10.072779 0.756030 1
17521 0.000000 0.431468 2
50283 7.140817 0.883813 3
33536 11.384548 1.438307 3
9452 3.214568 1.083536 2
37457 11.720655 0.301636 3
17724 6.374475 1.475925 3
43869 5.749684 0.198875 3
264 3.871808 0.552602 2
25736 8.336309 0.636238 3
39584 9.710442 1.503735 3
31246 1.532611 1.433898 1
49567 9.785785 0.984614 3
7052 2.633627 1.097866 2
35493 9.238935 0.494701 3
10986 1.205656 1.398803 2
49508 3.124909 1.670121 1
5734 7.935489 1.585044 2
65479 12.746636 1.560352 1
77268 10.732563 0.545321 1
28490 3.977403 0.766103 1
13546 4.194426 0.450663 2
37166 9.610286 0.142912 3
16381 4.797555 1.260455 2
10848 1.615279 0.093002 2
35405 4.614771 1.027105 1
15917 0.000000 1.369726 2
6131 0.608457 0.512220 2
67432 6.558239 0.667579 1
30354 12.315116 0.197068 3
69696 7.014973 1.494616 1
33481 8.822304 1.194177 3
43075 10.086796 0.570455 3
38343 7.241614 1.661627 3
14318 4.602395 1.511768 2
5367 7.434921 0.079792 2
37894 10.467570 1.595418 3
36172 9.948127 0.003663 3
40123 2.478529 1.568987 1
10976 5.938545 0.878540 2
12705 0.000000 0.948004 2
12495 5.559181 1.357926 2
35681 9.776654 0.535966 3
46202 3.092056 0.490906 1
11505 0.000000 1.623311 2
22834 4.459495 0.538867 1
49901 8.334306 1.646600 3
71932 11.226654 0.384686 1
13279 3.904737 1.597294 2
49112 7.038205 1.211329 3
77129 9.836120 1.054340 1
37447 1.990976 0.378081 1
62397 9.005302 0.485385 1
0 1.772510 1.039873 2
15476 0.458674 0.819560 2
40625 10.003919 0.231658 3
36706 0.520807 1.476008 1
28580 10.678214 1.431837 3
25862 4.425992 1.363842 1
63488 12.035355 0.831222 1
33944 10.606732 1.253858 3
30099 1.568653 0.684264 1
13725 2.545434 0.024271 2
36768 10.264062 0.982593 3
64656 9.866276 0.685218 1
14927 0.142704 0.057455 2
43231 9.853270 1.521432 3
66087 6.596604 1.653574 1
19806 2.602287 1.321481 2
41081 10.411776 0.664168 3
10277 7.083449 0.622589 2
7014 2.080068 1.254441 2
17275 0.522844 1.622458 2
31600 10.362000 1.544827 3
59956 3.412967 1.035410 1
42181 6.796548 1.112153 3
51743 4.092035 0.075804 1
5194 2.763811 1.564325 2
30832 12.547439 1.402443 3
7976 5.708052 1.596152 2
14602 4.558025 0.375806 2
41571 11.642307 0.438553 3
55028 3.222443 0.121399 1
5837 4.736156 0.029871 2
39808 10.839526 0.836323 3
20944 4.194791 0.235483 2
22146 14.936259 0.888582 3
42169 3.310699 1.521855 1
7010 2.971931 0.034321 2
3807 9.261667 0.537807 2
29241 7.791833 1.111416 3
52696 1.480470 1.028750 1
42545 3.677287 0.244167 1
24437 2.202967 1.370399 1
16037 5.796735 0.935893 2
8493 3.063333 0.144089 2
68080 11.233094 0.492487 1
59016 1.965570 0.005697 1
11810 8.616719 0.137419 2
68630 6.609989 1.083505 1
7629 1.712639 1.086297 2
71992 10.117445 1.299319 1
13398 0.000000 1.104178 2
26241 9.824777 1.346821 3
11160 1.653089 0.980949 2
76701 18.178822 1.473671 1
32174 6.781126 0.885340 3
45043 8.206750 1.549223 3
42173 10.081853 1.376745 3
69801 6.288742 0.112799 1
41737 3.695937 1.543589 1
46979 6.726151 1.069380 3
79267 12.969999 1.568223 1
4615 2.661390 1.531933 2
32907 7.072764 1.117386 3
37444 9.123366 1.318988 3
569 3.743946 1.039546 2
8723 2.341300 0.219361 2
6024 0.541913 0.592348 2
52252 2.310828 1.436753 1
8358 6.226597 1.427316 2
26166 7.277876 0.489252 3
18471 0.000000 0.389459 2
3386 7.218221 1.098828 2
41544 8.777129 1.111464 3
10480 2.813428 0.819419 2
5894 2.268766 1.412130 2
7273 6.283627 0.571292 2
22272 7.520081 1.626868 3
31369 11.739225 0.027138 3
10708 3.746883 0.877350 2
69364 12.089835 0.521631 1
37760 12.310404 0.259339 3
13004 0.000000 0.671355 2
37885 2.728800 0.331502 1
52555 10.814342 0.607652 3
38997 12.170268 0.844205 3
69698 6.698371 0.240084 1
11783 3.632672 1.643479 2
47636 10.059991 0.892361 3
15744 1.887674 0.756162 2
69058 8.229125 0.195886 1
33057 7.817082 0.476102 3
28681 12.277230 0.076805 3
34042 10.055337 1.115778 3
29928 3.596002 1.485952 1
9734 2.755530 1.420655 2
7344 7.780991 0.513048 2
7387 0.093705 0.391834 2
33957 8.481567 0.520078 3
9936 3.865584 0.110062 2
36094 9.683709 0.779984 3
39835 10.617255 1.359970 3
64486 7.203216 1.624762 1
0 7.601414 1.215605 2
39539 1.386107 1.417070 1
66972 9.129253 0.594089 1
15029 1.363447 0.620841 2
44909 3.181399 0.359329 1
38183 13.365414 0.217011 3
37372 4.207717 1.289767 1
0 4.088395 0.870075 2
17786 3.327371 1.142505 2
39055 1.303323 1.235650 1
37045 7.999279 1.581763 3
6435 2.217488 0.864536 2
72265 7.751808 0.192451 1
28152 14.149305 1.591532 3
25931 8.765721 0.152808 3
7538 3.408996 0.184896 2
1315 1.251021 0.112340 2
12292 6.160619 1.537165 2
49248 1.034538 1.585162 1
9025 0.000000 1.034635 2
13438 2.355051 0.542603 2
69683 6.614543 0.153771 1
25374 10.245062 1.450903 3
55264 3.467074 1.231019 1
38324 7.487678 1.572293 3
69643 4.624115 1.185192 1
44058 8.995957 1.436479 3
41316 11.564476 0.007195 3
29119 3.440948 0.078331 1
51656 1.673603 0.732746 1
3030 4.719341 0.699755 2
35695 10.304798 1.576488 3
1537 2.086915 1.199312 2
9083 6.338220 1.131305 2
47744 8.254926 0.710694 3
71372 16.067108 0.974142 1
37980 1.723201 0.310488 1
42385 3.785045 0.876904 1
22687 2.557561 0.123738 1
39512 9.852220 1.095171 3
11885 3.679147 1.557205 2
4944 9.789681 0.852971 2
73230 14.958998 0.526707 1
17585 11.182148 1.288459 3
68737 7.528533 1.657487 1
13818 5.253802 1.378603 2
31662 13.946752 1.426657 3
86686 15.557263 1.430029 1
43214 12.483550 0.688513 3
24091 2.317302 1.411137 1
52544 10.069724 0.766119 3
61861 5.792231 1.615483 1
47903 4.138435 0.475994 1
37190 12.929517 0.304378 3
6013 9.378238 0.307392 2
27223 8.361362 1.643204 3
69027 7.939406 1.325042 1
78642 10.735384 0.705788 1
30254 11.592723 0.286188 3
21704 10.098356 0.704748 3
34985 9.299025 0.545337 3
31316 11.158297 0.218067 3
76368 16.143900 0.558388 1
27953 10.971700 1.221787 3
152 0.000000 0.681478 2
9146 3.178961 1.292692 2
75346 17.625350 0.339926 1
26376 1.995833 0.267826 1
35255 10.640467 0.416181 3
19198 9.628339 0.985462 3
12518 4.662664 0.495403 2
25453 5.754047 1.382742 2
12530 0.000000 0.037146 2
62230 9.334332 0.198118 1
9517 3.846162 0.619968 2
71161 10.685084 0.678179 1
1593 4.752134 0.359205 2
33794 0.697630 0.966786 1
39710 10.365836 0.505898 3
16941 0.461478 0.352865 2
69209 11.339537 1.068740 1
4446 5.420280 0.127310 2
9347 3.469955 1.619947 2
55635 8.517067 0.994858 3
65889 8.306512 0.413690 1
10753 2.628690 0.444320 2
7055 0.000000 0.802985 2
7905 0.000000 1.170397 2
53447 7.298767 1.582346 3
9194 7.331319 1.277988 2
61914 9.392269 0.151617 1
15630 5.541201 1.180596 2
79194 15.149460 0.537540 1
12268 5.515189 0.250562 2
33682 7.728898 0.920494 3
26080 11.318785 1.510979 3
19119 3.574709 1.531514 2
30902 7.350965 0.026332 3
63039 7.122363 1.630177 1
51136 1.828412 1.013702 1
35262 10.117989 1.156862 3
42776 11.309897 0.086291 3
64191 8.342034 1.388569 1
15436 0.241714 0.715577 2
14402 10.482619 1.694972 2
6341 9.289510 1.428879 2
14113 4.269419 0.134181 2
6390 0.000000 0.189456 2
8794 0.817119 0.143668 2
43432 1.508394 0.652651 1
38334 9.359918 0.052262 3
34068 10.052333 0.550423 3
30819 11.111660 0.989159 3
22239 11.265971 0.724054 3
28725 10.383830 0.254836 3
57071 3.878569 1.377983 1
72420 13.679237 0.025346 1
28294 10.526846 0.781569 3
9896 0.000000 0.924198 2
65821 4.106727 1.085669 1
7645 8.118856 1.470686 2
71289 7.796874 0.052336 1
5128 2.789669 1.093070 2
13711 6.226962 0.287251 2
22240 10.169548 1.660104 3
15092 0.000000 1.370549 2
5017 7.513353 0.137348 2
10141 8.240793 0.099735 2
35570 14.612797 1.247390 3
46893 3.562976 0.445386 1
8178 3.230482 1.331698 2
55783 3.612548 1.551911 1
1148 0.000000 0.332365 2
10062 3.931299 0.487577 2
74124 14.752342 1.155160 1
66603 10.261887 1.628085 1
11893 2.787266 1.570402 2
50908 15.112319 1.324132 3
39891 5.184553 0.223382 3
65915 3.868359 0.128078 1
65678 3.507965 0.028904 1
62996 11.019254 0.427554 1
36851 3.812387 0.655245 1
36669 11.056784 0.378725 3
38876 8.826880 1.002328 3
26878 11.173861 1.478244 3
46246 11.506465 0.421993 3
12761 7.798138 0.147917 3
35282 10.155081 1.370039 3
68306 10.645275 0.693453 1
31262 9.663200 1.521541 3
34754 10.790404 1.312679 3
13408 2.810534 0.219962 2
30365 9.825999 1.388500 3
10709 1.421316 0.677603 2
24332 11.123219 0.809107 3
45517 13.402206 0.661524 3
6178 1.212255 0.836807 2
10639 1.568446 1.297469 2
29613 3.343473 1.312266 1
22392 5.400155 0.193494 1
51126 3.818754 0.590905 1
53644 7.973845 0.307364 3
51417 9.078824 0.734876 3
24859 0.153467 0.766619 1
61732 8.325167 0.028479 1
71128 7.092089 1.216733 1
27276 5.192485 1.094409 3
30453 10.340791 1.087721 3
18670 2.077169 1.019775 2
70600 10.151966 0.993105 1
12683 0.046826 0.809614 2
81597 11.221874 1.395015 1
69959 14.497963 1.019254 1
8124 3.554508 0.533462 2
18867 3.522673 0.086725 2
80886 14.531655 0.380172 1
55895 3.027528 0.885457 1
31587 1.845967 0.488985 1
10591 10.226164 0.804403 3
70096 10.965926 1.212328 1
53151 2.129921 1.477378 1
11992 0.000000 1.606849 2
33114 9.489005 0.827814 3
7413 0.000000 1.020797 2
10583 0.000000 1.270167 2
58668 6.556676 0.055183 1
35018 9.959588 0.060020 3
70843 7.436056 1.479856 1
14011 0.404888 0.459517 2
35015 9.952942 1.650279 3
70839 15.600252 0.021935 1
3024 2.723846 0.387455 2
5526 0.513866 1.323448 2
5113 0.000000 0.861859 2
20851 7.280602 1.438470 2
40999 9.161978 1.110180 3
15823 0.991725 0.730979 2
35432 7.398380 0.684218 3
53711 12.149747 1.389088 3
64371 9.149678 0.874905 1
9289 9.666576 1.370330 2
60613 3.620110 0.287767 1
18338 5.238800 1.253646 2
22845 14.715782 1.503758 3
74676 14.445740 1.211160 1
34143 13.609528 0.364240 3
14153 3.141585 0.424280 2
9327 0.000000 0.120947 2
18991 0.454750 1.033280 2
9193 0.510310 0.016395 2
2285 3.864171 0.616349 2
9493 6.724021 0.563044 2
2371 4.289375 0.012563 2
13963 0.000000 1.437030 2
2299 3.733617 0.698269 2
5262 2.002589 1.380184 2
4659 2.502627 0.184223 2
17582 6.382129 0.876581 2
27750 8.546741 0.128706 3
9868 2.694977 0.432818 2
18333 3.951256 0.333300 2
3780 9.856183 0.329181 2
18190 2.068962 0.429927 2
11145 3.410627 0.631838 2
68846 9.974715 0.669787 1
26575 10.650102 0.866627 3
48111 9.134528 0.728045 3
43757 7.882601 1.332446 3
datingTestSet2.txt
data = np.loadtxt("datingTestSet2.txt",delimiter='\t') #读取数据
hob_data = data[:,:-1]
hob_labels = data[:,-1]
(二)归一化处理
def dataNorm(data): #归一化操作
mn = np.mean(data,0)
sigma = np.std(data,0,ddof=0)
return (data - mn)/sigma,mn,sigma
(三)简单划分训练集,测试集
#划分测试集和训练集数据
hoRatio = 0.4 #测试集比例
m = hob_labels.size
m_test = int(hob_labels.size*hoRatio)
hob_data_norm,mn,sigma = dataNorm(hob_data)
#获取训练集数据
X = hob_data_norm[:m-m_test,:]
y = hob_labels[:m-m_test]
#获取测试集数据
X_test = hob_data_norm[m-m_test:m,:]
y_test = hob_labels[m-m_test:m]
(四)进行测试,获取错误率
#进行测试
error_count = 0
for i in range(y_test.size):
clf = KNNClassfy(X_test[i],X,y,3)
if clf != y_test[i]:
error_count = error_count + 1
print("{} --- {}".format(clf,y_test[i]))
print("error rate is:",error_count/y_test.size)
(五)进行预测
#进行预测
resultList = ['not at all','in small doses','in large doses']
hb_1 = float(input("爱好一:"))
hb_2 = float(input("爱好二:"))
hb_3 = float(input("爱好三:"))
preData = np.array([[hb_1,hb_2,hb_3]])
preData_norm = ((preData - mn)/sigma).flatten()
clf = KNNClassfy(preData_norm,hob_data_norm,hob_labels,3)
print("喜欢程度:{}".format(resultList[int(clf)-1]))
三:手写数字识别
(一)数据展示
数据存放形式:比如7,是32*32像素
文件存放形式:_前面是数字,_后面表示是该数字的第几种形式
(二)数据读取---将图像转向量
from os import listdir
import codecs
#将每一个数字文件转换为矩阵向量
def image2Vector(filename):
data = []
with codecs.open(filename,'r') as fp:
for i in range(32):
linestr = fp.readline() #读取一行数据
for j in range(32):
data.append(int(linestr[j])) #添加数据
fp.close()
return np.array(data)
(三)获取训练集和测试集
#获取数据集
def getDataSet(path):
#读取数据
hwLabels = []
filelist = listdir(path) #获取所有文件目录
m = len(filelist)
data = np.zeros((m,1024))
#先获取标签值
for i in range(m):
filename = filelist[i]
hwLabels.append(int(filename.split('_')[0])) #添加标签值
data[i,:] = image2Vector("%s/%s"%(path,filename))
return data,hwLabels
#获取训练集
data,labels = getDataSet("trainingDigits")
#获取测试集
data_test,labels_test = getDataSet("testDigits")
print(data_test.shape)
(三)实现KNN,改变部分
def KNNClassfy(preData,dataSet,labels,k):
distance = np.sum(np.power(dataSet - preData,2),1) #注意:这里我们不进行开方,可以少算一次
sortDistIdx = np.argsort(distance,0) #小到大排序,获取索引
labels_idx = {}
for i in range(k): #获取分类
idx = sortDistIdx[i] #获取索引
label = labels[idx] #获取标签
labels_idx[label] = labels_idx.get(label,0)
labels_sort = sorted(labels_idx.items(),key=lambda x:x[1],reverse=True)
return labels_sort[0][0] #获取最大可能分类
(四)结果测试
#进行测试
error_count = 0
for i in range(data_test.shape[0]):
clf = KNNClassfy(data_test[i,:],data,labels,3)
if clf != labels_test[i]:
error_count += 1
print(error_count)
print("{} {}".format(error_count,error_count/data_test.shape[0]))