15 手写数字识别-小数据集

  • from sklearn.datasets import load_digits
  • digits = load_digits()
  • #加载数据
    from sklearn.datasets import load_digits
    import numpy as np
    import pandas as pd
    from sklearn.preprocessing import MinMaxScaler
    from sklearn.preprocessing import OneHotEncoder
    digits=load_digits()
    
    #对X轴归一化,采用MinMaxScaler()
    X_data = digits.data.astype(np.float32)
    Y_data = digits.target.astype(np.float32).reshape(-1,1)
    print(X_data,Y_data)
    

     

 

 

 

2.图片数据预处理

  • x:归一化MinMaxScaler()
  • y:独热编码OneHotEncoder()或to_categorical
  • 训练集测试集划分
  • 张量结构
  • import numpy as np
    from sklearn.datasets import load_digits
    from sklearn.preprocessing import MinMaxScaler
    from sklearn.preprocessing import OneHotEncoder
    digits=load_digits()
    
    # 独热编码
    X_data = digits.data.astype(np.float32)
    Y_data = digits.target.astype(np.float32).reshape(-1,1)
    scale=MinMaxScaler()
    X_data=scale.fit_transform(X_data)
    # print(X_data)
    
    Y_hot=OneHotEncoder().fit_transform(Y_data).todense()
    print(Y_hot)
    
    
    
    
    ## 转图片格式
    X=X_data.reshape(-1,8,8,1)
    print(X.shape)
    
    
    from sklearn.model_selection import train_test_split
    X_train,X_test,y_train,y_test = train_test_split(X,Y_hot,test_size=0.25,random_state=0,stratify=Y_hot)
    
    print (X_train,X_test,y_train,y_test)
    

     

 

 

 

 

 

 

 

 

3.设计卷积神经网络结构

  • 绘制模型结构图,并说明设计依据。
from keras.models import Sequential
from keras.layers import Dense,Activation
model = Sequential()
model.add(Conv2D(
          filters=32,  # 输出32*32
          kernel_size=(5,5),  # 卷积核的大小
          padding='same',  # 保证卷积核大小,不够补零
          input_shape=X_train.shape[1:],  # (8, 8, 1)
          activation='relu')) 
model.add(MaxPool2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(128,activation='relu'))
model.add(Dropout(0.25))
model.add(Dense(10,activation='softmax'))

 

 

 

 

4.模型训练

  • model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
  • train_history = model.fit(x=X_train,y=y_train,validation_split=0.2, batch_size=300,epochs=10,verbose=2)
model.compile(loss='categorical_crossentropy',
             optimizer='adam',
             metrics=['accuracy'])
train_history=model.fit(x=X_train,y=y_train,validation_split=0.2,batch_size=300,epochs=50,verbose=2)

 

 

5.模型评价

  • model.evaluate()
  • 交叉表与交叉矩阵
  • pandas.crosstab
  • seaborn.heatmap
y-predict=model.predict_classes(X_test)
y_testtrue=np.argmax(y_test,axis=1).reshape(-1)
y_true=np.array(y_testtrue[0]).reshape(-1)
pd.crosstab(y_true,y-predict,rownames=['true'],colnames=['predict'])

 

## 交叉矩阵
import seaborn as sns
import matplotlib.pylot as plt

a=pd.crosstab(np.array(y_testtrue)[0].reshape(-1),y-predict)
df=pd.DataFrame(a)
sns.heatmap(df,annot=True,cmap="Purples",linewidths=0.2,linecolor="G")
plt.show()

 

 

 

 

 

posted @ 2020-06-14 20:04  ling9709  阅读(289)  评论(0编辑  收藏  举报