常用函数框架

1、混淆矩阵

import itertools
def plot_condusion_matrix(cm,classes,
                          title = 'Confusion matrix',
                          cmap = plt.cm.Blues):
    plt.imshow(cm,interpolation='nearest',cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks,classes,rotation=0)
    plt.yticks(tick_marks,classes)

    thresh = cm.max()/2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, cm[i, j],
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')

 

2、决策树可视化

def dec_tree(model,feature_names,tagret_names)
    import os
    os.environ["PATH"] += os.pathsep + 'C:/Program Files (x86)/Graphviz2.38/bin/'
    dot_data = \
        tree.export_graphviz(
            model, # 模型
            out_file=None,
class_names=target_names, feature_names
=feature_names, #特征名字 filled=True, impurity=False, rounded=True ) import pydotplus graph = pydotplus.graph_from_dot_data(dot_data) #graph.get_nodes()[7].set_fillcolor('#FFF2DD') from IPython.display import Image Image(graph.create_png()) graph.write_jpg('graph_jpg') #写入ipg文件

 3、训练模型时,需要遍历数据集并不断读取小批量数据样本,这里定义一个函数,每次返回batch_size个随机样本的特征和标签

def data_iter(batch_size,feature,labels):
    num_example = len(feature)
    indices = list(range(num_example))
    random.shuffle(indices) # 让样本的读取是随机的
    for i in range(0,num_example,batch_size):
        j = nd.array(indices[i:min(i+batch_size,num_example)]) # 增加一个min函数的作用是:当最后一段数据长度比batch_size小的时候,可以直接返回整个数据,防止报错
        yield feature.take(j),labels.take(j)  # take函数根据索引返回对应元素

 

posted @ 2019-06-04 12:29  编程小虾米  阅读(562)  评论(0编辑  收藏  举报