机器学习实战---K-近邻

一:简单实现K-近邻算法

(一)导入数据

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

def CreateDataSet():
    data = np.array([[1.0,1.1],[1.0,1.0],[0,0],[0,0.1]])
    labels = np.array(['A','A','B','B'])
    return data,labels

data,labels = CreateDataSet()
print(data)
print(labels)

plt.figure()
plt.scatter(data[:,0],data[:,1],c="b")
for i in range(data.shape[0]):
    plt.text(data[i,0]+0.02,data[i,1],labels[i])
plt.show()

(二)实现KNN算法

def KNNClassfy(preData,dataSet,labels,k):
    distance = np.sum(np.power(dataSet - preData,2),1)  #注意:这里我们不进行开方,可以少算一次
    sortDistIdx = np.argsort(distance,0)  #小到大排序,获取索引
    labels_idx = {}
    for i in range(k):  #获取分类
        idx = sortDistIdx[i] #获取索引
        label = labels[idx] #获取标签
        labels_idx[label] = labels_idx.get(label,0)+1
    labels_sort = sorted(labels_idx.items(),key=lambda x:x[1],reverse=True)
    return labels_sort[0][0]  #获取最大可能分类

(三)结果测试

preData = np.array([0,0.3])
preLab = KNNClassfy(preData,data,labels,3)
print(preLab)

二:使用KNN算法分析喜好---多维 

(一)读取数据

40920    8.326976    0.953952    3
14488    7.153469    1.673904    2
26052    1.441871    0.805124    1
75136    13.147394    0.428964    1
38344    1.669788    0.134296    1
72993    10.141740    1.032955    1
35948    6.830792    1.213192    3
42666    13.276369    0.543880    3
67497    8.631577    0.749278    1
35483    12.273169    1.508053    3
50242    3.723498    0.831917    1
63275    8.385879    1.669485    1
5569    4.875435    0.728658    2
51052    4.680098    0.625224    1
77372    15.299570    0.331351    1
43673    1.889461    0.191283    1
61364    7.516754    1.269164    1
69673    14.239195    0.261333    1
15669    0.000000    1.250185    2
28488    10.528555    1.304844    3
6487    3.540265    0.822483    2
37708    2.991551    0.833920    1
22620    5.297865    0.638306    2
28782    6.593803    0.187108    3
19739    2.816760    1.686209    2
36788    12.458258    0.649617    3
5741    0.000000    1.656418    2
28567    9.968648    0.731232    3
6808    1.364838    0.640103    2
41611    0.230453    1.151996    1
36661    11.865402    0.882810    3
43605    0.120460    1.352013    1
15360    8.545204    1.340429    3
63796    5.856649    0.160006    1
10743    9.665618    0.778626    2
70808    9.778763    1.084103    1
72011    4.932976    0.632026    1
5914    2.216246    0.587095    2
14851    14.305636    0.632317    3
33553    12.591889    0.686581    3
44952    3.424649    1.004504    1
17934    0.000000    0.147573    2
27738    8.533823    0.205324    3
29290    9.829528    0.238620    3
42330    11.492186    0.263499    3
36429    3.570968    0.832254    1
39623    1.771228    0.207612    1
32404    3.513921    0.991854    1
27268    4.398172    0.975024    1
5477    4.276823    1.174874    2
14254    5.946014    1.614244    2
68613    13.798970    0.724375    1
41539    10.393591    1.663724    3
7917    3.007577    0.297302    2
21331    1.031938    0.486174    2
8338    4.751212    0.064693    2
5176    3.692269    1.655113    2
18983    10.448091    0.267652    3
68837    10.585786    0.329557    1
13438    1.604501    0.069064    2
48849    3.679497    0.961466    1
12285    3.795146    0.696694    2
7826    2.531885    1.659173    2
5565    9.733340    0.977746    2
10346    6.093067    1.413798    2
1823    7.712960    1.054927    2
9744    11.470364    0.760461    3
16857    2.886529    0.934416    2
39336    10.054373    1.138351    3
65230    9.972470    0.881876    1
2463    2.335785    1.366145    2
27353    11.375155    1.528626    3
16191    0.000000    0.605619    2
12258    4.126787    0.357501    2
42377    6.319522    1.058602    1
25607    8.680527    0.086955    3
77450    14.856391    1.129823    1
58732    2.454285    0.222380    1
46426    7.292202    0.548607    3
32688    8.745137    0.857348    3
64890    8.579001    0.683048    1
8554    2.507302    0.869177    2
28861    11.415476    1.505466    3
42050    4.838540    1.680892    1
32193    10.339507    0.583646    3
64895    6.573742    1.151433    1
2355    6.539397    0.462065    2
0    2.209159    0.723567    2
70406    11.196378    0.836326    1
57399    4.229595    0.128253    1
41732    9.505944    0.005273    3
11429    8.652725    1.348934    3
75270    17.101108    0.490712    1
5459    7.871839    0.717662    2
73520    8.262131    1.361646    1
40279    9.015635    1.658555    3
21540    9.215351    0.806762    3
17694    6.375007    0.033678    2
22329    2.262014    1.022169    1
46570    5.677110    0.709469    1
42403    11.293017    0.207976    3
33654    6.590043    1.353117    1
9171    4.711960    0.194167    2
28122    8.768099    1.108041    3
34095    11.502519    0.545097    3
1774    4.682812    0.578112    2
40131    12.446578    0.300754    3
13994    12.908384    1.657722    3
77064    12.601108    0.974527    1
11210    3.929456    0.025466    2
6122    9.751503    1.182050    3
15341    3.043767    0.888168    2
44373    4.391522    0.807100    1
28454    11.695276    0.679015    3
63771    7.879742    0.154263    1
9217    5.613163    0.933632    2
69076    9.140172    0.851300    1
24489    4.258644    0.206892    1
16871    6.799831    1.221171    2
39776    8.752758    0.484418    3
5901    1.123033    1.180352    2
40987    10.833248    1.585426    3
7479    3.051618    0.026781    2
38768    5.308409    0.030683    3
4933    1.841792    0.028099    2
32311    2.261978    1.605603    1
26501    11.573696    1.061347    3
37433    8.038764    1.083910    3
23503    10.734007    0.103715    3
68607    9.661909    0.350772    1
27742    9.005850    0.548737    3
11303    0.000000    0.539131    2
0    5.757140    1.062373    2
32729    9.164656    1.624565    3
24619    1.318340    1.436243    1
42414    14.075597    0.695934    3
20210    10.107550    1.308398    3
33225    7.960293    1.219760    3
54483    6.317292    0.018209    1
18475    12.664194    0.595653    3
33926    2.906644    0.581657    1
43865    2.388241    0.913938    1
26547    6.024471    0.486215    3
44404    7.226764    1.255329    3
16674    4.183997    1.275290    2
8123    11.850211    1.096981    3
42747    11.661797    1.167935    3
56054    3.574967    0.494666    1
10933    0.000000    0.107475    2
18121    7.937657    0.904799    3
11272    3.365027    1.014085    2
16297    0.000000    0.367491    2
28168    13.860672    1.293270    3
40963    10.306714    1.211594    3
31685    7.228002    0.670670    3
55164    4.508740    1.036192    1
17595    0.366328    0.163652    2
1862    3.299444    0.575152    2
57087    0.573287    0.607915    1
63082    9.183738    0.012280    1
51213    7.842646    1.060636    3
6487    4.750964    0.558240    2
4805    11.438702    1.556334    3
30302    8.243063    1.122768    3
68680    7.949017    0.271865    1
17591    7.875477    0.227085    2
74391    9.569087    0.364856    1
37217    7.750103    0.869094    3
42814    0.000000    1.515293    1
14738    3.396030    0.633977    2
19896    11.916091    0.025294    3
14673    0.460758    0.689586    2
32011    13.087566    0.476002    3
58736    4.589016    1.672600    1
54744    8.397217    1.534103    1
29482    5.562772    1.689388    1
27698    10.905159    0.619091    3
11443    1.311441    1.169887    2
56117    10.647170    0.980141    3
39514    0.000000    0.481918    1
26627    8.503025    0.830861    3
16525    0.436880    1.395314    2
24368    6.127867    1.102179    1
22160    12.112492    0.359680    3
6030    1.264968    1.141582    2
6468    6.067568    1.327047    2
22945    8.010964    1.681648    3
18520    3.791084    0.304072    2
34914    11.773195    1.262621    3
6121    8.339588    1.443357    2
38063    2.563092    1.464013    1
23410    5.954216    0.953782    1
35073    9.288374    0.767318    3
52914    3.976796    1.043109    1
16801    8.585227    1.455708    3
9533    1.271946    0.796506    2
16721    0.000000    0.242778    2
5832    0.000000    0.089749    2
44591    11.521298    0.300860    3
10143    1.139447    0.415373    2
21609    5.699090    1.391892    2
23817    2.449378    1.322560    1
15640    0.000000    1.228380    2
8847    3.168365    0.053993    2
50939    10.428610    1.126257    3
28521    2.943070    1.446816    1
32901    10.441348    0.975283    3
42850    12.478764    1.628726    3
13499    5.856902    0.363883    2
40345    2.476420    0.096075    1
43547    1.826637    0.811457    1
70758    4.324451    0.328235    1
19780    1.376085    1.178359    2
44484    5.342462    0.394527    1
54462    11.835521    0.693301    3
20085    12.423687    1.424264    3
42291    12.161273    0.071131    3
47550    8.148360    1.649194    3
11938    1.531067    1.549756    2
40699    3.200912    0.309679    1
70908    8.862691    0.530506    1
73989    6.370551    0.369350    1
11872    2.468841    0.145060    2
48463    11.054212    0.141508    3
15987    2.037080    0.715243    2
70036    13.364030    0.549972    1
32967    10.249135    0.192735    3
63249    10.464252    1.669767    1
42795    9.424574    0.013725    3
14459    4.458902    0.268444    2
19973    0.000000    0.575976    2
5494    9.686082    1.029808    3
67902    13.649402    1.052618    1
25621    13.181148    0.273014    3
27545    3.877472    0.401600    1
58656    1.413952    0.451380    1
7327    4.248986    1.430249    2
64555    8.779183    0.845947    1
8998    4.156252    0.097109    2
11752    5.580018    0.158401    2
76319    15.040440    1.366898    1
27665    12.793870    1.307323    3
67417    3.254877    0.669546    1
21808    10.725607    0.588588    3
15326    8.256473    0.765891    2
20057    8.033892    1.618562    3
79341    10.702532    0.204792    1
15636    5.062996    1.132555    2
35602    10.772286    0.668721    3
28544    1.892354    0.837028    1
57663    1.019966    0.372320    1
78727    15.546043    0.729742    1
68255    11.638205    0.409125    1
14964    3.427886    0.975616    2
21835    11.246174    1.475586    3
7487    0.000000    0.645045    2
8700    0.000000    1.424017    2
26226    8.242553    0.279069    3
65899    8.700060    0.101807    1
6543    0.812344    0.260334    2
46556    2.448235    1.176829    1
71038    13.230078    0.616147    1
47657    0.236133    0.340840    1
19600    11.155826    0.335131    3
37422    11.029636    0.505769    3
1363    2.901181    1.646633    2
26535    3.924594    1.143120    1
47707    2.524806    1.292848    1
38055    3.527474    1.449158    1
6286    3.384281    0.889268    2
10747    0.000000    1.107592    2
44883    11.898890    0.406441    3
56823    3.529892    1.375844    1
68086    11.442677    0.696919    1
70242    10.308145    0.422722    1
11409    8.540529    0.727373    2
67671    7.156949    1.691682    1
61238    0.720675    0.847574    1
17774    0.229405    1.038603    2
53376    3.399331    0.077501    1
30930    6.157239    0.580133    1
28987    1.239698    0.719989    1
13655    6.036854    0.016548    2
7227    5.258665    0.933722    2
40409    12.393001    1.571281    3
13605    9.627613    0.935842    2
26400    11.130453    0.597610    3
13491    8.842595    0.349768    3
30232    10.690010    1.456595    3
43253    5.714718    1.674780    3
55536    3.052505    1.335804    1
8807    0.000000    0.059025    2
25783    9.945307    1.287952    3
22812    2.719723    1.142148    1
77826    11.154055    1.608486    1
38172    2.687918    0.660836    1
31676    10.037847    0.962245    3
74038    12.404762    1.112080    1
44738    10.237305    0.633422    3
17410    4.745392    0.662520    2
5688    4.639461    1.569431    2
36642    3.149310    0.639669    1
29956    13.406875    1.639194    3
60350    6.068668    0.881241    1
23758    9.477022    0.899002    3
25780    3.897620    0.560201    2
11342    5.463615    1.203677    2
36109    3.369267    1.575043    1
14292    5.234562    0.825954    2
11160    0.000000    0.722170    2
23762    12.979069    0.504068    3
39567    5.376564    0.557476    1
25647    13.527910    1.586732    3
14814    2.196889    0.784587    2
73590    10.691748    0.007509    1
35187    1.659242    0.447066    1
49459    8.369667    0.656697    3
31657    13.157197    0.143248    3
6259    8.199667    0.908508    2
33101    4.441669    0.439381    3
27107    9.846492    0.644523    3
17824    0.019540    0.977949    2
43536    8.253774    0.748700    3
67705    6.038620    1.509646    1
35283    6.091587    1.694641    3
71308    8.986820    1.225165    1
31054    11.508473    1.624296    3
52387    8.807734    0.713922    3
40328    0.000000    0.816676    1
34844    8.889202    1.665414    3
11607    3.178117    0.542752    2
64306    7.013795    0.139909    1
32721    9.605014    0.065254    3
33170    1.230540    1.331674    1
37192    10.412811    0.890803    3
13089    0.000000    0.567161    2
66491    9.699991    0.122011    1
15941    0.000000    0.061191    2
4272    4.455293    0.272135    2
48812    3.020977    1.502803    1
28818    8.099278    0.216317    3
35394    1.157764    1.603217    1
71791    10.105396    0.121067    1
40668    11.230148    0.408603    3
39580    9.070058    0.011379    3
11786    0.566460    0.478837    2
19251    0.000000    0.487300    2
56594    8.956369    1.193484    3
54495    1.523057    0.620528    1
11844    2.749006    0.169855    2
45465    9.235393    0.188350    3
31033    10.555573    0.403927    3
16633    6.956372    1.519308    2
13887    0.636281    1.273984    2
52603    3.574737    0.075163    1
72000    9.032486    1.461809    1
68497    5.958993    0.023012    1
35135    2.435300    1.211744    1
26397    10.539731    1.638248    3
7313    7.646702    0.056513    2
91273    20.919349    0.644571    1
24743    1.424726    0.838447    1
31690    6.748663    0.890223    3
15432    2.289167    0.114881    2
58394    5.548377    0.402238    1
33962    6.057227    0.432666    1
31442    10.828595    0.559955    3
31044    11.318160    0.271094    3
29938    13.265311    0.633903    3
9875    0.000000    1.496715    2
51542    6.517133    0.402519    3
11878    4.934374    1.520028    2
69241    10.151738    0.896433    1
37776    2.425781    1.559467    1
68997    9.778962    1.195498    1
67416    12.219950    0.657677    1
59225    7.394151    0.954434    1
29138    8.518535    0.742546    3
5962    2.798700    0.662632    2
10847    0.637930    0.617373    2
70527    10.750490    0.097415    1
9610    0.625382    0.140969    2
64734    10.027968    0.282787    1
25941    9.817347    0.364197    3
2763    0.646828    1.266069    2
55601    3.347111    0.914294    1
31128    11.816892    0.193798    3
5181    0.000000    1.480198    2
69982    10.945666    0.993219    1
52440    10.244706    0.280539    3
57350    2.579801    1.149172    1
57869    2.630410    0.098869    1
56557    11.746200    1.695517    3
42342    8.104232    1.326277    3
15560    12.409743    0.790295    3
34826    12.167844    1.328086    3
8569    3.198408    0.299287    2
77623    16.055513    0.541052    1
78184    7.138659    0.158481    1
7036    4.831041    0.761419    2
69616    10.082890    1.373611    1
21546    10.066867    0.788470    3
36715    8.129538    0.329913    3
20522    3.012463    1.138108    2
42349    3.720391    0.845974    1
9037    0.773493    1.148256    2
26728    10.962941    1.037324    3
587    0.177621    0.162614    2
48915    3.085853    0.967899    1
9824    8.426781    0.202558    2
4135    1.825927    1.128347    2
9666    2.185155    1.010173    2
59333    7.184595    1.261338    1
36198    0.000000    0.116525    1
34909    8.901752    1.033527    3
47516    2.451497    1.358795    1
55807    3.213631    0.432044    1
14036    3.974739    0.723929    2
42856    9.601306    0.619232    3
64007    8.363897    0.445341    1
59428    6.381484    1.365019    1
13730    0.000000    1.403914    2
41740    9.609836    1.438105    3
63546    9.904741    0.985862    1
30417    7.185807    1.489102    3
69636    5.466703    1.216571    1
64660    0.000000    0.915898    1
14883    4.575443    0.535671    2
7965    3.277076    1.010868    2
68620    10.246623    1.239634    1
8738    2.341735    1.060235    2
7544    3.201046    0.498843    2
6377    6.066013    0.120927    2
36842    8.829379    0.895657    3
81046    15.833048    1.568245    1
67736    13.516711    1.220153    1
32492    0.664284    1.116755    1
39299    6.325139    0.605109    3
77289    8.677499    0.344373    1
33835    8.188005    0.964896    3
71890    9.414263    0.384030    1
32054    9.196547    1.138253    3
38579    10.202968    0.452363    3
55984    2.119439    1.481661    1
72694    13.635078    0.858314    1
42299    0.083443    0.701669    1
26635    9.149096    1.051446    3
8579    1.933803    1.374388    2
37302    14.115544    0.676198    3
22878    8.933736    0.943352    3
4364    2.661254    0.946117    2
4985    0.988432    1.305027    2
37068    2.063741    1.125946    1
41137    2.220590    0.690754    1
67759    6.424849    0.806641    1
11831    1.156153    1.613674    2
34502    3.032720    0.601847    1
4088    3.076828    0.952089    2
15199    0.000000    0.318105    2
17309    7.750480    0.554015    3
42816    10.958135    1.482500    3
43751    10.222018    0.488678    3
58335    2.367988    0.435741    1
75039    7.686054    1.381455    1
42878    11.464879    1.481589    3
42770    11.075735    0.089726    3
8848    3.543989    0.345853    2
31340    8.123889    1.282880    3
41413    4.331769    0.754467    3
12731    0.120865    1.211961    2
22447    6.116109    0.701523    3
33564    7.474534    0.505790    3
48907    8.819454    0.649292    3
8762    6.802144    0.615284    2
46696    12.666325    0.931960    3
36851    8.636180    0.399333    3
67639    11.730991    1.289833    1
171    8.132449    0.039062    2
26674    10.296589    1.496144    3
8739    7.583906    1.005764    2
66668    9.777806    0.496377    1
68732    8.833546    0.513876    1
69995    4.907899    1.518036    1
82008    8.362736    1.285939    1
25054    9.084726    1.606312    3
33085    14.164141    0.560970    3
41379    9.080683    0.989920    3
39417    6.522767    0.038548    3
12556    3.690342    0.462281    2
39432    3.563706    0.242019    1
38010    1.065870    1.141569    1
69306    6.683796    1.456317    1
38000    1.712874    0.243945    1
46321    13.109929    1.280111    3
66293    11.327910    0.780977    1
22730    4.545711    1.233254    1
5952    3.367889    0.468104    2
72308    8.326224    0.567347    1
60338    8.978339    1.442034    1
13301    5.655826    1.582159    2
27884    8.855312    0.570684    3
11188    6.649568    0.544233    2
56796    3.966325    0.850410    1
8571    1.924045    1.664782    2
4914    6.004812    0.280369    2
10784    0.000000    0.375849    2
39296    9.923018    0.092192    3
13113    2.389084    0.119284    2
70204    13.663189    0.133251    1
46813    11.434976    0.321216    3
11697    0.358270    1.292858    2
44183    9.598873    0.223524    3
2225    6.375275    0.608040    2
29066    11.580532    0.458401    3
4245    5.319324    1.598070    2
34379    4.324031    1.603481    1
44441    2.358370    1.273204    1
2022    0.000000    1.182708    2
26866    12.824376    0.890411    3
57070    1.587247    1.456982    1
32932    8.510324    1.520683    3
51967    10.428884    1.187734    3
44432    8.346618    0.042318    3
67066    7.541444    0.809226    1
17262    2.540946    1.583286    2
79728    9.473047    0.692513    1
14259    0.352284    0.474080    2
6122    0.000000    0.589826    2
76879    12.405171    0.567201    1
11426    4.126775    0.871452    2
2493    0.034087    0.335848    2
19910    1.177634    0.075106    2
10939    0.000000    0.479996    2
17716    0.994909    0.611135    2
31390    11.053664    1.180117    3
20375    0.000000    1.679729    2
26309    2.495011    1.459589    1
33484    11.516831    0.001156    3
45944    9.213215    0.797743    3
4249    5.332865    0.109288    2
6089    0.000000    1.689771    2
7513    0.000000    1.126053    2
27862    12.640062    1.690903    3
39038    2.693142    1.317518    1
19218    3.328969    0.268271    2
62911    7.193166    1.117456    1
77758    6.615512    1.521012    1
27940    8.000567    0.835341    3
2194    4.017541    0.512104    2
37072    13.245859    0.927465    3
15585    5.970616    0.813624    2
25577    11.668719    0.886902    3
8777    4.283237    1.272728    2
29016    10.742963    0.971401    3
21910    12.326672    1.592608    3
12916    0.000000    0.344622    2
10976    0.000000    0.922846    2
79065    10.602095    0.573686    1
36759    10.861859    1.155054    3
50011    1.229094    1.638690    1
1155    0.410392    1.313401    2
71600    14.552711    0.616162    1
30817    14.178043    0.616313    3
54559    14.136260    0.362388    1
29764    0.093534    1.207194    1
69100    10.929021    0.403110    1
47324    11.432919    0.825959    3
73199    9.134527    0.586846    1
44461    5.071432    1.421420    1
45617    11.460254    1.541749    3
28221    11.620039    1.103553    3
7091    4.022079    0.207307    2
6110    3.057842    1.631262    2
79016    7.782169    0.404385    1
18289    7.981741    0.929789    3
43679    4.601363    0.268326    1
22075    2.595564    1.115375    1
23535    10.049077    0.391045    3
25301    3.265444    1.572970    2
32256    11.780282    1.511014    3
36951    3.075975    0.286284    1
31290    1.795307    0.194343    1
38953    11.106979    0.202415    3
35257    5.994413    0.800021    1
25847    9.706062    1.012182    3
32680    10.582992    0.836025    3
62018    7.038266    1.458979    1
9074    0.023771    0.015314    2
33004    12.823982    0.676371    3
44588    3.617770    0.493483    1
32565    8.346684    0.253317    3
38563    6.104317    0.099207    1
75668    16.207776    0.584973    1
9069    6.401969    1.691873    2
53395    2.298696    0.559757    1
28631    7.661515    0.055981    3
71036    6.353608    1.645301    1
71142    10.442780    0.335870    1
37653    3.834509    1.346121    1
76839    10.998587    0.584555    1
9916    2.695935    1.512111    2
38889    3.356646    0.324230    1
39075    14.677836    0.793183    3
48071    1.551934    0.130902    1
7275    2.464739    0.223502    2
41804    1.533216    1.007481    1
35665    12.473921    0.162910    3
67956    6.491596    0.032576    1
41892    10.506276    1.510747    3
38844    4.380388    0.748506    1
74197    13.670988    1.687944    1
14201    8.317599    0.390409    2
3908    0.000000    0.556245    2
2459    0.000000    0.290218    2
32027    10.095799    1.188148    3
12870    0.860695    1.482632    2
9880    1.557564    0.711278    2
72784    10.072779    0.756030    1
17521    0.000000    0.431468    2
50283    7.140817    0.883813    3
33536    11.384548    1.438307    3
9452    3.214568    1.083536    2
37457    11.720655    0.301636    3
17724    6.374475    1.475925    3
43869    5.749684    0.198875    3
264    3.871808    0.552602    2
25736    8.336309    0.636238    3
39584    9.710442    1.503735    3
31246    1.532611    1.433898    1
49567    9.785785    0.984614    3
7052    2.633627    1.097866    2
35493    9.238935    0.494701    3
10986    1.205656    1.398803    2
49508    3.124909    1.670121    1
5734    7.935489    1.585044    2
65479    12.746636    1.560352    1
77268    10.732563    0.545321    1
28490    3.977403    0.766103    1
13546    4.194426    0.450663    2
37166    9.610286    0.142912    3
16381    4.797555    1.260455    2
10848    1.615279    0.093002    2
35405    4.614771    1.027105    1
15917    0.000000    1.369726    2
6131    0.608457    0.512220    2
67432    6.558239    0.667579    1
30354    12.315116    0.197068    3
69696    7.014973    1.494616    1
33481    8.822304    1.194177    3
43075    10.086796    0.570455    3
38343    7.241614    1.661627    3
14318    4.602395    1.511768    2
5367    7.434921    0.079792    2
37894    10.467570    1.595418    3
36172    9.948127    0.003663    3
40123    2.478529    1.568987    1
10976    5.938545    0.878540    2
12705    0.000000    0.948004    2
12495    5.559181    1.357926    2
35681    9.776654    0.535966    3
46202    3.092056    0.490906    1
11505    0.000000    1.623311    2
22834    4.459495    0.538867    1
49901    8.334306    1.646600    3
71932    11.226654    0.384686    1
13279    3.904737    1.597294    2
49112    7.038205    1.211329    3
77129    9.836120    1.054340    1
37447    1.990976    0.378081    1
62397    9.005302    0.485385    1
0    1.772510    1.039873    2
15476    0.458674    0.819560    2
40625    10.003919    0.231658    3
36706    0.520807    1.476008    1
28580    10.678214    1.431837    3
25862    4.425992    1.363842    1
63488    12.035355    0.831222    1
33944    10.606732    1.253858    3
30099    1.568653    0.684264    1
13725    2.545434    0.024271    2
36768    10.264062    0.982593    3
64656    9.866276    0.685218    1
14927    0.142704    0.057455    2
43231    9.853270    1.521432    3
66087    6.596604    1.653574    1
19806    2.602287    1.321481    2
41081    10.411776    0.664168    3
10277    7.083449    0.622589    2
7014    2.080068    1.254441    2
17275    0.522844    1.622458    2
31600    10.362000    1.544827    3
59956    3.412967    1.035410    1
42181    6.796548    1.112153    3
51743    4.092035    0.075804    1
5194    2.763811    1.564325    2
30832    12.547439    1.402443    3
7976    5.708052    1.596152    2
14602    4.558025    0.375806    2
41571    11.642307    0.438553    3
55028    3.222443    0.121399    1
5837    4.736156    0.029871    2
39808    10.839526    0.836323    3
20944    4.194791    0.235483    2
22146    14.936259    0.888582    3
42169    3.310699    1.521855    1
7010    2.971931    0.034321    2
3807    9.261667    0.537807    2
29241    7.791833    1.111416    3
52696    1.480470    1.028750    1
42545    3.677287    0.244167    1
24437    2.202967    1.370399    1
16037    5.796735    0.935893    2
8493    3.063333    0.144089    2
68080    11.233094    0.492487    1
59016    1.965570    0.005697    1
11810    8.616719    0.137419    2
68630    6.609989    1.083505    1
7629    1.712639    1.086297    2
71992    10.117445    1.299319    1
13398    0.000000    1.104178    2
26241    9.824777    1.346821    3
11160    1.653089    0.980949    2
76701    18.178822    1.473671    1
32174    6.781126    0.885340    3
45043    8.206750    1.549223    3
42173    10.081853    1.376745    3
69801    6.288742    0.112799    1
41737    3.695937    1.543589    1
46979    6.726151    1.069380    3
79267    12.969999    1.568223    1
4615    2.661390    1.531933    2
32907    7.072764    1.117386    3
37444    9.123366    1.318988    3
569    3.743946    1.039546    2
8723    2.341300    0.219361    2
6024    0.541913    0.592348    2
52252    2.310828    1.436753    1
8358    6.226597    1.427316    2
26166    7.277876    0.489252    3
18471    0.000000    0.389459    2
3386    7.218221    1.098828    2
41544    8.777129    1.111464    3
10480    2.813428    0.819419    2
5894    2.268766    1.412130    2
7273    6.283627    0.571292    2
22272    7.520081    1.626868    3
31369    11.739225    0.027138    3
10708    3.746883    0.877350    2
69364    12.089835    0.521631    1
37760    12.310404    0.259339    3
13004    0.000000    0.671355    2
37885    2.728800    0.331502    1
52555    10.814342    0.607652    3
38997    12.170268    0.844205    3
69698    6.698371    0.240084    1
11783    3.632672    1.643479    2
47636    10.059991    0.892361    3
15744    1.887674    0.756162    2
69058    8.229125    0.195886    1
33057    7.817082    0.476102    3
28681    12.277230    0.076805    3
34042    10.055337    1.115778    3
29928    3.596002    1.485952    1
9734    2.755530    1.420655    2
7344    7.780991    0.513048    2
7387    0.093705    0.391834    2
33957    8.481567    0.520078    3
9936    3.865584    0.110062    2
36094    9.683709    0.779984    3
39835    10.617255    1.359970    3
64486    7.203216    1.624762    1
0    7.601414    1.215605    2
39539    1.386107    1.417070    1
66972    9.129253    0.594089    1
15029    1.363447    0.620841    2
44909    3.181399    0.359329    1
38183    13.365414    0.217011    3
37372    4.207717    1.289767    1
0    4.088395    0.870075    2
17786    3.327371    1.142505    2
39055    1.303323    1.235650    1
37045    7.999279    1.581763    3
6435    2.217488    0.864536    2
72265    7.751808    0.192451    1
28152    14.149305    1.591532    3
25931    8.765721    0.152808    3
7538    3.408996    0.184896    2
1315    1.251021    0.112340    2
12292    6.160619    1.537165    2
49248    1.034538    1.585162    1
9025    0.000000    1.034635    2
13438    2.355051    0.542603    2
69683    6.614543    0.153771    1
25374    10.245062    1.450903    3
55264    3.467074    1.231019    1
38324    7.487678    1.572293    3
69643    4.624115    1.185192    1
44058    8.995957    1.436479    3
41316    11.564476    0.007195    3
29119    3.440948    0.078331    1
51656    1.673603    0.732746    1
3030    4.719341    0.699755    2
35695    10.304798    1.576488    3
1537    2.086915    1.199312    2
9083    6.338220    1.131305    2
47744    8.254926    0.710694    3
71372    16.067108    0.974142    1
37980    1.723201    0.310488    1
42385    3.785045    0.876904    1
22687    2.557561    0.123738    1
39512    9.852220    1.095171    3
11885    3.679147    1.557205    2
4944    9.789681    0.852971    2
73230    14.958998    0.526707    1
17585    11.182148    1.288459    3
68737    7.528533    1.657487    1
13818    5.253802    1.378603    2
31662    13.946752    1.426657    3
86686    15.557263    1.430029    1
43214    12.483550    0.688513    3
24091    2.317302    1.411137    1
52544    10.069724    0.766119    3
61861    5.792231    1.615483    1
47903    4.138435    0.475994    1
37190    12.929517    0.304378    3
6013    9.378238    0.307392    2
27223    8.361362    1.643204    3
69027    7.939406    1.325042    1
78642    10.735384    0.705788    1
30254    11.592723    0.286188    3
21704    10.098356    0.704748    3
34985    9.299025    0.545337    3
31316    11.158297    0.218067    3
76368    16.143900    0.558388    1
27953    10.971700    1.221787    3
152    0.000000    0.681478    2
9146    3.178961    1.292692    2
75346    17.625350    0.339926    1
26376    1.995833    0.267826    1
35255    10.640467    0.416181    3
19198    9.628339    0.985462    3
12518    4.662664    0.495403    2
25453    5.754047    1.382742    2
12530    0.000000    0.037146    2
62230    9.334332    0.198118    1
9517    3.846162    0.619968    2
71161    10.685084    0.678179    1
1593    4.752134    0.359205    2
33794    0.697630    0.966786    1
39710    10.365836    0.505898    3
16941    0.461478    0.352865    2
69209    11.339537    1.068740    1
4446    5.420280    0.127310    2
9347    3.469955    1.619947    2
55635    8.517067    0.994858    3
65889    8.306512    0.413690    1
10753    2.628690    0.444320    2
7055    0.000000    0.802985    2
7905    0.000000    1.170397    2
53447    7.298767    1.582346    3
9194    7.331319    1.277988    2
61914    9.392269    0.151617    1
15630    5.541201    1.180596    2
79194    15.149460    0.537540    1
12268    5.515189    0.250562    2
33682    7.728898    0.920494    3
26080    11.318785    1.510979    3
19119    3.574709    1.531514    2
30902    7.350965    0.026332    3
63039    7.122363    1.630177    1
51136    1.828412    1.013702    1
35262    10.117989    1.156862    3
42776    11.309897    0.086291    3
64191    8.342034    1.388569    1
15436    0.241714    0.715577    2
14402    10.482619    1.694972    2
6341    9.289510    1.428879    2
14113    4.269419    0.134181    2
6390    0.000000    0.189456    2
8794    0.817119    0.143668    2
43432    1.508394    0.652651    1
38334    9.359918    0.052262    3
34068    10.052333    0.550423    3
30819    11.111660    0.989159    3
22239    11.265971    0.724054    3
28725    10.383830    0.254836    3
57071    3.878569    1.377983    1
72420    13.679237    0.025346    1
28294    10.526846    0.781569    3
9896    0.000000    0.924198    2
65821    4.106727    1.085669    1
7645    8.118856    1.470686    2
71289    7.796874    0.052336    1
5128    2.789669    1.093070    2
13711    6.226962    0.287251    2
22240    10.169548    1.660104    3
15092    0.000000    1.370549    2
5017    7.513353    0.137348    2
10141    8.240793    0.099735    2
35570    14.612797    1.247390    3
46893    3.562976    0.445386    1
8178    3.230482    1.331698    2
55783    3.612548    1.551911    1
1148    0.000000    0.332365    2
10062    3.931299    0.487577    2
74124    14.752342    1.155160    1
66603    10.261887    1.628085    1
11893    2.787266    1.570402    2
50908    15.112319    1.324132    3
39891    5.184553    0.223382    3
65915    3.868359    0.128078    1
65678    3.507965    0.028904    1
62996    11.019254    0.427554    1
36851    3.812387    0.655245    1
36669    11.056784    0.378725    3
38876    8.826880    1.002328    3
26878    11.173861    1.478244    3
46246    11.506465    0.421993    3
12761    7.798138    0.147917    3
35282    10.155081    1.370039    3
68306    10.645275    0.693453    1
31262    9.663200    1.521541    3
34754    10.790404    1.312679    3
13408    2.810534    0.219962    2
30365    9.825999    1.388500    3
10709    1.421316    0.677603    2
24332    11.123219    0.809107    3
45517    13.402206    0.661524    3
6178    1.212255    0.836807    2
10639    1.568446    1.297469    2
29613    3.343473    1.312266    1
22392    5.400155    0.193494    1
51126    3.818754    0.590905    1
53644    7.973845    0.307364    3
51417    9.078824    0.734876    3
24859    0.153467    0.766619    1
61732    8.325167    0.028479    1
71128    7.092089    1.216733    1
27276    5.192485    1.094409    3
30453    10.340791    1.087721    3
18670    2.077169    1.019775    2
70600    10.151966    0.993105    1
12683    0.046826    0.809614    2
81597    11.221874    1.395015    1
69959    14.497963    1.019254    1
8124    3.554508    0.533462    2
18867    3.522673    0.086725    2
80886    14.531655    0.380172    1
55895    3.027528    0.885457    1
31587    1.845967    0.488985    1
10591    10.226164    0.804403    3
70096    10.965926    1.212328    1
53151    2.129921    1.477378    1
11992    0.000000    1.606849    2
33114    9.489005    0.827814    3
7413    0.000000    1.020797    2
10583    0.000000    1.270167    2
58668    6.556676    0.055183    1
35018    9.959588    0.060020    3
70843    7.436056    1.479856    1
14011    0.404888    0.459517    2
35015    9.952942    1.650279    3
70839    15.600252    0.021935    1
3024    2.723846    0.387455    2
5526    0.513866    1.323448    2
5113    0.000000    0.861859    2
20851    7.280602    1.438470    2
40999    9.161978    1.110180    3
15823    0.991725    0.730979    2
35432    7.398380    0.684218    3
53711    12.149747    1.389088    3
64371    9.149678    0.874905    1
9289    9.666576    1.370330    2
60613    3.620110    0.287767    1
18338    5.238800    1.253646    2
22845    14.715782    1.503758    3
74676    14.445740    1.211160    1
34143    13.609528    0.364240    3
14153    3.141585    0.424280    2
9327    0.000000    0.120947    2
18991    0.454750    1.033280    2
9193    0.510310    0.016395    2
2285    3.864171    0.616349    2
9493    6.724021    0.563044    2
2371    4.289375    0.012563    2
13963    0.000000    1.437030    2
2299    3.733617    0.698269    2
5262    2.002589    1.380184    2
4659    2.502627    0.184223    2
17582    6.382129    0.876581    2
27750    8.546741    0.128706    3
9868    2.694977    0.432818    2
18333    3.951256    0.333300    2
3780    9.856183    0.329181    2
18190    2.068962    0.429927    2
11145    3.410627    0.631838    2
68846    9.974715    0.669787    1
26575    10.650102    0.866627    3
48111    9.134528    0.728045    3
43757    7.882601    1.332446    3
datingTestSet2.txt
data = np.loadtxt("datingTestSet2.txt",delimiter='\t') #读取数据
hob_data = data[:,:-1]
hob_labels = data[:,-1]

(二)归一化处理

def dataNorm(data): #归一化操作
    mn = np.mean(data,0)
    sigma = np.std(data,0,ddof=0)
    return (data - mn)/sigma,mn,sigma

(三)简单划分训练集,测试集

#划分测试集和训练集数据
hoRatio = 0.4 #测试集比例
m = hob_labels.size
m_test = int(hob_labels.size*hoRatio)

hob_data_norm,mn,sigma = dataNorm(hob_data)
#获取训练集数据
X = hob_data_norm[:m-m_test,:]
y = hob_labels[:m-m_test]
#获取测试集数据
X_test = hob_data_norm[m-m_test:m,:]
y_test = hob_labels[m-m_test:m]

(四)进行测试,获取错误率

#进行测试
error_count = 0

for i in range(y_test.size):
    clf = KNNClassfy(X_test[i],X,y,3)
    if clf != y_test[i]:
        error_count = error_count + 1
        
    print("{} --- {}".format(clf,y_test[i]))
    
print("error rate is:",error_count/y_test.size)

(五)进行预测

#进行预测
resultList = ['not at all','in small doses','in large doses']
hb_1 = float(input("爱好一:"))
hb_2 = float(input("爱好二:"))
hb_3 = float(input("爱好三:"))
preData = np.array([[hb_1,hb_2,hb_3]])
preData_norm = ((preData - mn)/sigma).flatten()

clf = KNNClassfy(preData_norm,hob_data_norm,hob_labels,3)
print("喜欢程度:{}".format(resultList[int(clf)-1]))

三:手写数字识别

(一)数据展示

数据存放形式:比如7,是32*32像素

文件存放形式:_前面是数字,_后面表示是该数字的第几种形式

(二)数据读取---将图像转向量

from os import listdir
import codecs

#将每一个数字文件转换为矩阵向量
def image2Vector(filename):
    data = []
    with codecs.open(filename,'r') as fp:
        for i in range(32):
            linestr = fp.readline() #读取一行数据
            for j in range(32):
                data.append(int(linestr[j])) #添加数据
        fp.close()
    return np.array(data)

(三)获取训练集和测试集

#获取数据集
def getDataSet(path):
    #读取数据
    hwLabels = []
    filelist = listdir(path) #获取所有文件目录
    m = len(filelist)
    data = np.zeros((m,1024))

    #先获取标签值
    for i in range(m):
        filename = filelist[i]
        hwLabels.append(int(filename.split('_')[0])) #添加标签值
        data[i,:] = image2Vector("%s/%s"%(path,filename))
        
    return data,hwLabels
#获取训练集
data,labels = getDataSet("trainingDigits")

#获取测试集
data_test,labels_test = getDataSet("testDigits")
print(data_test.shape)

(三)实现KNN,改变部分

def KNNClassfy(preData,dataSet,labels,k):
    distance = np.sum(np.power(dataSet - preData,2),1)  #注意:这里我们不进行开方,可以少算一次
    sortDistIdx = np.argsort(distance,0)  #小到大排序,获取索引
    labels_idx = {}
    for i in range(k):  #获取分类
        idx = sortDistIdx[i] #获取索引
        label = labels[idx] #获取标签
        labels_idx[label] = labels_idx.get(label,0)
    labels_sort = sorted(labels_idx.items(),key=lambda x:x[1],reverse=True)
    return labels_sort[0][0]  #获取最大可能分类

(四)结果测试

#进行测试
error_count = 0
for i in range(data_test.shape[0]):
    clf = KNNClassfy(data_test[i,:],data,labels,3)
    if clf != labels_test[i]:
        error_count += 1
print(error_count)
print("{} {}".format(error_count,error_count/data_test.shape[0]))

posted @ 2020-06-04 18:56  山上有风景  阅读(441)  评论(0编辑  收藏  举报