KNN_Cancer_predict(癌症预测)

import numpy as np
import pandas as pd 
from pandas import Series,DataFrame
from sklearn.neighbors import KNeighborsClassifier
cancer = pd.read_csv('./data/cancer.csv',sep='\t')
print(cancer.shape)
cancer
#target中m恶性,b良性
data = cancer.iloc[:,2:]
target = cancer.iloc[:,1]
display(data.head(),target.head())
knn = KNeighborsClassifier(n_neighbors=15)
#打乱顺序,并且给它一分为二,训练数据,预测数据
#sklearn为我们能提供方法
from sklearn.model_selection import train_test_split
#使用其进行分割
X_train,X_test,y_train,y_test = train_test_split(data,target,test_size = 0.1)
knn.fit(X_train,y_train)
KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=1, n_neighbors=15, p=2,
           weights='uniform')
score = knn.score(X_test,y_test)
y_ = knn.predict(X_test)
print(score)

0.9473684210526315
#可以说明真实值和预测值拿一些数据不同了
pd.crosstab(index=y_,columns=y_test,rownames=['Predict'],colnames=['True'])

#提升准确度
#对数据进行清洗
data
#归一化
#(num -min)/(max-min)
columns = data.columns
for col in columns:
    data_min = data[col].min()
    data_max = data[col].max()
#     data[col] = (data[col] -data_min)/(data_max-data_min) 
    print(data[col])
    break;
posted @ 2019-07-01 16:40  海予心  阅读(883)  评论(0编辑  收藏  举报