from pandas import read_csv from pandas.plotting import scatter_matrix from matplotlib import pyplot from sklearn.model_selection import train_test_split from sklearn.model_selection import KFold from sklearn.model_selection import cross_val_score from sklearn.metrics import confusion_matrix from sklearn.metrics import classification_report from sklearn.metrics import accuracy_score from sklearn.neighbors import KNeighborsClassifier from sklearn.linear_model import LogisticRegression from sklearn.tree import DecisionTreeClassifier from sklearn.discriminant_analysis import LinearDiscriminantAnalysis from sklearn.naive_bayes import GaussianNB from sklearn.svm import SVC # 读取数据 filename = 'iris.data.csv' names = ['separ-length', 'separ-width', 'petal-length', 'petal-width', 'class'] dataset = read_csv(filename, names=names) # print(dataset) # print(dataset.head(10)) # dataset = read_csv('iris.data.csv') # print(dataset.shape) # print(dataset.head(10)) # print('数据维度: 行 %s,列 %s'% dataset.shape) # print(dataset.describe()) #数据描述 # print(dataset.groupby('class').size()) #数据分类 # print(dataset.groupby('separ-width').size()) # dataset.plot(kind='box', subplots=True, layout=(2, 2), sharex=False, sharey=False) #箱线图 # pyplot.show() # dataset.hist() #直方图 # pyplot.show() # scatter_matrix(dataset) #散点图 # pyplot.show() array = dataset.values #数据集拆分 X = array[:, :4] Y = array[:, 4] X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=7) # print(X_train.shape)
明天补充