Python 分类方法记录
使用GPU
import os os.environ["CUDA_VISIBLE_DEVICES"] = "4,5,6,7" import tensorflow as tf config = tf.ConfigProto() config.gpu_options.per_process_gpu_memory_fraction = 0.5 config.gpu_options.allow_growth = True
DNN
def baseline_model(): model = Sequential() model.add(Dense(16, input_shape=(21, ), activation="relu")) model.add(Dense(16, activation="relu")) model.add(Dense(2, activation="sigmoid")) model.compile(optimizer=RMSprop(lr=0.01), loss=binary_crossentropy, metrics=['accuracy']) return model def cross_validation(X, new_y, num_feat): print("X=", X[:10]) print("X.values=", X.values[:10]) # print("y=", new_y[:10]) # X = X.values y = to_categorical(new_y) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=666) standScaler = StandardScaler() standScaler.fit(X_train) X_train = standScaler.transform(X_train) X_test = standScaler.transform(X_test) estimator = KerasClassifier(build_fn=baseline_model, epochs=10, batch_size=1, verbose=1) kfold = KFold(n_splits=5, shuffle=True, random_state=999) scores = cross_val_score(estimator, X_train, y_train, cv=kfold) print("Accuracy of cross validation, mean %.2f, std %.2f" %(scores.mean(), scores.std())) # clf = LogisticRegression(penalty = 'l2', solver = 'liblinear', class_weight = 'balanced') # clf = KNeighborsClassifier(weights = "distance", n_neighbors = 10, p =9) # clf = svm.SVC(kernel = 'rbf', C = 2e4, gamma = 2e-5) # clf = svm.SVC(kernel= 'linear', C = 2e3) # clf = RandomForestClassifier(n_estimators = 1000, class_weight = "balanced") # clf = GaussianNB() # scores = cross_val_score(clf, X, new_y, cv = 10) return scores
画准确率和损失曲线
def show_acc(history): plt.clf() history_dict = history.history acc = history_dict['binary_accuracy'] val_acc = history_dict['val_binary_accuracy'] epochs = range(1, len(val_acc) + 1) plt.plot(epochs, acc, 'bo', label='Training acc') plt.plot(epochs, val_acc, 'b', label='Balidation acc') plt.xlabel('Epochs') plt.ylabel('Acc') plt.legend() plt.show()
def show_loss(history): plt.clf() history_dict = history.history print("print history.history = ", history_dict) loss = history_dict['loss'] val_loss = history_dict['val_loss'] epochs = range(1, len(val_loss) + 1) plt.plot(epochs, loss, 'bo', label='Training loss') plt.plot(epochs, val_loss, 'b', label='Validation loss') plt.xlabel('Epochs') plt.ylabel('Loss') plt.legend() plt.show()
Tensorboard
def classify_data(X, y, class_names): y = to_categorical(y)
# 归一化 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=666) standScaler = StandardScaler() standScaler.fit(X_train) X_train = standScaler.transform(X_train) X_test = standScaler.transform(X_test) model = Sequential() model.add(Dense(16, input_shape=(21, ), activation="relu")) model.add(Dense(16, activation="relu")) model.add(Dense(2, activation="sigmoid")) model.summary() model.compile(optimizer=RMSprop(lr=0.001), loss=binary_crossentropy, metrics=['accuracy']) plot_model(model, show_shapes='True', to_file='model.png') callbacks = [keras.callbacks.TensorBoard(log_dir="my_log_dir", histogram_freq=1, embeddings_freq=1, embeddings_data=X[:20].astype("float32"))] history = model.fit(X_train, y_train, epoches=20, batch_size=1, validation_split=0.2, callbacks=callbacks)
画混淆矩阵
def plot_confusion_matrix(cm, classes,normalize=False, title='Confusion matrix',cmap=plt.cm.Blues): if normalize: cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] print("Normalized confusion matrix") else: print('Confusion matrix, without normalization') print(cm) plt.imshow(cm, interpolation='nearest', cmap=cmap) # 负责对图像进行处理,并显示其格式,但是不显示图 片 plt.title(title) plt.colorbar() # 显示色阶 tick_marks = np.arange(len(classes)) plt.xticks(tick_marks, classes, rotation=45) # x轴标注 plt.yticks(tick_marks, classes) # y轴标注 fmt = '.2f' if normalize else 'd' thresh = cm.max() / 2. # 依次取出list1中的每1个元素,与list2中的每1个元素,组成元组, 然后将所有的元组组成一个列表返回 # 矩阵行i为真实值,列j为预测值 # 也就是x为预测值,y为真实值 for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])): plt.text(j, i, format(cm[i, j], fmt), horizontalalignment="center",color="white" if cm[i, j] > thresh else "black") plt.tight_layout() plt.ylabel('True label', fontsize = 14) plt.xlabel('Predicted label', fontsize = 14) def classify_data(X, y, class_names): # 训练集测试集切分 global f_cv_scores X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = testSize, random_state = 42) # clf = LogisticRegression(penalty = 'l2', class_weight = 'balanced') # clf = RandomForestClassifier(n_estimators = 1000, class_weight = "balanced") # clf = KNeighborsClassifier(weights = "distance", n_neighbors = 10, p =9) # clf = svm.SVC(kernel = 'rbf', C = 2e4, gamma = 2e-5) clf = svm.SVC(kernel= 'linear', C = 2e3) # clf = RandomForestClassifier(n_estimators = 1000, class_weight = "balanced") # clf = GaussianNB() y_pred = clf.fit(X_train, y_train).predict(X_test) precision = precision_score(y_test, y_pred, average='macro') recall = recall_score(y_test, y_pred, average = 'micro') f1 = f1_score(y_test, y_pred, average = 'weighted') acc = accuracy_score(y_test, y_pred)
Keras训练集、测试集与验证集
# 训练集与测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=666) # 训练集中抽20%作为验证集
history = model.fit(X_train, y_train, epochs=20, batch_size=1, shuffle=True, validation_split=0.1, verbose = 1, callbacks = None, validation_data = None))