深度学习与医学图像处理 案例学习2——CNN肺炎检测(CXR图像)

文章来源:https://www.kaggle.com/paultimothymooney/chest-xray-pneumonia/notebooks

什么是肺炎?

肺炎是肺部的一种炎症状态,主要影响小肺泡。典型的症状包括干咳、胸痛、发热和呼吸困难。病情的严重程度是可变的。肺炎通常由病毒或细菌感染引起,很少由其他微生物、某些药物或疾病(如自身免疫性疾病)引起。危险因素包括囊性纤维化、慢性阻塞性肺疾病(COPD)、哮喘、糖尿病、心力衰竭、吸烟史、咳嗽能力差(如中风后)和免疫系统弱。诊断通常是基于症状和身体检查。胸部x光检查、血液检查和痰液培养可以帮助确诊。该病可根据获得性进行分类,如社区或医院获得性肺炎或卫生保健相关肺炎。

数据集中共有训练、验证、测试三个文件夹,每个文件夹又包含正常与肺炎两个子文件夹,图片格式为jpeq。

 

#导入包

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import keras
from keras.models import Sequential
from keras.layers import Dense, Conv2D , MaxPool2D , Flatten , Dropout , BatchNormalization
from keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report,confusion_matrix
from keras.callbacks import ReduceLROnPlateau
import cv2
import os

#查看样本基本情况

PATH='/kaggle/input/chest-xray-pneumonia/chest_xray/chest_xray/'
print('number of normal in training set:',len(os.listdir(PATH+'train/'+'NORMAL')))
print('number of pneumonia in training set:',len(os.listdir(PATH+'train/'+'PNEUMONIA')))
print('number of normal in validation set:',len(os.listdir(PATH+'val/'+'NORMAL')))
print('number of pneumonia in validation set:',len(os.listdir(PATH+'val/'+'PNEUMONIA')))
print('number of normal in test set:',len(os.listdir(PATH+'test/'+'NORMAL')))
print('number of pneumonia in test set:',len(os.listdir(PATH+'test/'+'PNEUMONIA')))

样本不均衡,训练集中肺炎约为正常的三倍,验证集数据数量过小,只有9个。

#数据读取

labels=['PNEUMONIA','NORMAL']
img_size=150
def get_training_data(data_dir):
    x_data=[]
    y_data=[]
    for label in labels:
        path=os.path.join(data_dir,label)
        class_num=labels.index(label)
        for img in os.listdir(path):
            try:
                img_arr=cv2.imread(os.path.join(path,img),cv2.IMREAD_GRAYSCALE)  #加载灰度图像
                resized_arr=cv2.resize(img_arr,(img_size,img_size))
                x_data.append(resized_arr)  #n,150,150
                y_data.append(class_num)  #n
            except Exception as e:
                print(e)
    return np.array(x_data),np.array(y_data)
x_train,y_train = get_training_data('/kaggle/input/chest-xray-pneumonia/chest_xray/chest_xray/train')
x_test,y_test = get_training_data('/kaggle/input/chest-xray-pneumonia/chest_xray/chest_xray/test')
x_val,y_val = get_training_data('/kaggle/input/chest-xray-pneumonia/chest_xray/chest_xray/val')

 #显示类别数量对比

sns.set_style('darkgrid')
sns.countplot(y_train)

#显示图像

plt.figure(figsize = (5,5))
plt.imshow(x_train[0], cmap='gray')
plt.title(labels[y_train[0]])

plt.figure(figsize = (5,5))
plt.imshow(x_train[-1], cmap='gray')
plt.title(labels[y_train[-1]])

#归一化,增加维度准备喂入网络

x_train = np.array(x_train) / 255
x_val = np.array(x_val) / 255
x_test = np.array(x_test) / 255
x_train = x_train.reshape(-1, img_size, img_size, 1)
x_val = x_val.reshape(-1, img_size, img_size, 1)
x_test = x_test.reshape(-1, img_size, img_size, 1)

#数据增强器

datagen = ImageDataGenerator(
        featurewise_center=False,  # set input mean to 0 over the dataset
        samplewise_center=False,  # set each sample mean to 0
        featurewise_std_normalization=False,  # divide inputs by std of the dataset
        samplewise_std_normalization=False,  # divide each input by its std
        zca_whitening=False,  # apply ZCA whitening
        rotation_range = 30,  # randomly rotate images in the range (degrees, 0 to 180)
        zoom_range = 0.2, # Randomly zoom image 
        width_shift_range=0.1,  # randomly shift images horizontally (fraction of total width)
        height_shift_range=0.1,  # randomly shift images vertically (fraction of total height)
        horizontal_flip = True,  # randomly flip images
        vertical_flip=False)  # randomly flip images


datagen.fit(x_train)

#卷积神经网络CNN

                          

model = Sequential()
model.add(Conv2D(32 , (3,3) , strides = 1 , padding = 'same' , activation = 'relu' , input_shape = (150,150,1)))  #150,150,32
model.add(BatchNormalization()) #150,150,32
model.add(MaxPool2D((2,2) , strides = 2 , padding = 'same'))  #75,75,32
model.add(Conv2D(64 , (3,3) , strides = 1 , padding = 'same' , activation = 'relu'))  #75,75,64
model.add(Dropout(0.1)) #75,75,64
model.add(BatchNormalization()) #75,75,64
model.add(MaxPool2D((2,2) , strides = 2 , padding = 'same')) #38,38,64
model.add(Conv2D(64 , (3,3) , strides = 1 , padding = 'same' , activation = 'relu')) #38,38,64
model.add(BatchNormalization())  #38,38,64
model.add(MaxPool2D((2,2) , strides = 2 , padding = 'same')) #19,19,64
model.add(Conv2D(128 , (3,3) , strides = 1 , padding = 'same' , activation = 'relu')) #19,19,128
model.add(Dropout(0.2))  #19,19,128
model.add(BatchNormalization()) #19,19,128
model.add(MaxPool2D((2,2) , strides = 2 , padding = 'same')) #10,10,128
model.add(Conv2D(256 , (3,3) , strides = 1 , padding = 'same' , activation = 'relu')) #10,10,256
model.add(Dropout(0.2)) #10,10,256
model.add(BatchNormalization())  #10,10,256
model.add(MaxPool2D((2,2) , strides = 2 , padding = 'same'))  #5,5,256
model.add(Flatten()) #5*5*256=6400
model.add(Dense(units = 128 , activation = 'relu')) #128
model.add(Dropout(0.2))  #128
model.add(Dense(units = 1 , activation = 'sigmoid'))  #1
model.compile(optimizer = "rmsprop" , loss = 'binary_crossentropy' , metrics = ['accuracy'])
model.summary()

 #智能学习率函数

learning_rate_reduction = ReduceLROnPlateau(monitor='val_accuracy', patience = 2, verbose=1,factor=0.3, min_lr=0.000001)

#训练

history = model.fit(datagen.flow(x_train,y_train, batch_size = 32) ,epochs = 12 , validation_data = datagen.flow(x_val, y_val) ,callbacks = [learning_rate_reduction])

没有GPU花费了将近50分钟,训练集上的结果逐渐优化,验证集上结果一般。

epochs = [i for i in range(12)]
fig , ax = plt.subplots(1,2)
train_acc = history.history['accuracy']
train_loss = history.history['loss']
val_acc = history.history['val_accuracy']
val_loss = history.history['val_loss']
fig.set_size_inches(20,10)

ax[0].plot(epochs , train_acc , 'go-' , label = 'Training Accuracy')
ax[0].plot(epochs , val_acc , 'ro-' , label = 'Validation Accuracy')
ax[0].set_title('Training & Validation Accuracy')
ax[0].legend()
ax[0].set_xlabel("Epochs")
ax[0].set_ylabel("Accuracy")

ax[1].plot(epochs , train_loss , 'g-o' , label = 'Training Loss')
ax[1].plot(epochs , val_loss , 'r-o' , label = 'Validation Loss')
ax[1].set_title('Testing Accuracy & Loss')
ax[1].legend()
ax[1].set_xlabel("Epochs")
ax[1].set_ylabel("Training & Validation Loss")
plt.show()

#测试集结果

predictions = model.predict_classes(x_test)
predictions = predictions.reshape(1,-1)[0]
cm = confusion_matrix(y_test,predictions)
cm
#sklearn.metrics.confusion_matrix(y_truey_pred*labels=Nonesample_weight=Nonenormalize=None)

#显示预测正确图像

correct = np.nonzero(predictions == y_test)[0]
i = 0
for c in correct[:6]:
    plt.subplot(3,2,i+1)
    plt.xticks([])
    plt.yticks([])
    plt.imshow(x_test[c].reshape(150,150), cmap="gray", interpolation='none')
    plt.title("Predicted {},Actual {}".format(predictions[c], y_test[c]))
    i += 1

#显示错误预测图像

incorrect = np.nonzero(predictions != y_test)[0]
i = 0
for c in incorrect[:6]:
    plt.subplot(3,2,i+1)
    plt.xticks([])
    plt.yticks([])
    plt.imshow(x_test[c].reshape(150,150), cmap="gray", interpolation='none')
    plt.title("Predicted {},Actual {}".format(predictions[c], y_test[c]))
    i += 1

#保存模型

model.save('model.h5')

欢迎讨论!

 

posted @ 2021-02-18 17:44  最爱我的肉肉  阅读(1878)  评论(0编辑  收藏  举报