tensorflow, pytorch，loss可视化画图模板代码（推荐）

重要的几段代码摘抄如下：

import matplotlib.pyplot as plt

yy_train_loss = []
yy_valid_loss = []
yy_train_acc = []
yy_valid_acc = []
xx = []

#def train 或 def fit:
    for epoch in range(epochs):
            xx.append(epoch)
            yy_train_loss.append(total_loss / len(loader_train))
            yy_train_acc.append( train_acc  )
    
    #valid:
    yy_valid_loss.append(total_loss / len(loader))
    yy_valid_acc.append(val_gini_score)

#可视化并且保存
#def plot_png():

        ax1 = plt.subplot(1, 2, 1)
        plt.sca(ax1)
        plt.plot(xx, yy_train_loss, "r", label="train_logloss")
        plt.plot(xx, yy_valid_loss, "b", label="valid_logloss")
        plt.legend()
        ax2 = plt.subplot(1, 2, 2)
        plt.sca(ax2)
        plt.plot(xx, yy_train_acc, "y", label="train_acc")
        plt.plot(xx, yy_valid_acc, "g", label="valid_norm_gini")
        plt.legend()

        plt.savefig("./loss.png")
        print("保存了loss.png")
        print("over!")

给一个DeepFM.py完整文件

# -*- coding: utf-8 -*-

"""
A pytorch implementation of DeepFM for rates prediction problem.
"""
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import matplotlib.pyplot as plt
import os
from metrics import gini_norm

import sys
import torch.optim as optim

import time

yy_train_loss = []
yy_valid_loss = []
yy_train_acc = []
yy_valid_acc = []
xx = []
best_acc = 0
best_gini_score = 0        # gini越大越好

criterion = F.binary_cross_entropy_with_logits  #相当于logloss  ,这里自己修改，改成自定义的gini系数

class DeepFM(nn.Module):
    """
    A DeepFM network with RMSE loss for rates prediction problem.

    There are two parts in the architecture of this network: fm part for low
    order interactions of features and deep part for higher order. In this 
    network, we use bachnorm and dropout technology for all hidden layers,
    and "Adam" method for optimazation.

    You may find more details in this paper:
    DeepFM: A Factorization-Machine based Neural Network for CTR Prediction,
    Huifeng Guo, Ruiming Tang, Yunming Yey, Zhenguo Li, Xiuqiang He.
    """

    def __init__(self, feature_sizes, embedding_size=8,
                 hidden_dims=[32, 32], num_classes=1, dropout=[0.5, 0.5], 
                 use_cuda=True, verbose=False):
        """
        Initialize a new network

        Inputs: 
        - feature_size: A list of integer giving the size of features for each field.
        - embedding_size: An integer giving size of feature embedding.
        - hidden_dims: A list of integer giving the size of each hidden layer.
        - num_classes: An integer giving the number of classes to predict. For example,
                    someone may rate 1,2,3,4 or 5 stars to a film.
        - batch_size: An integer giving size of instances used in each interation.
        - use_cuda: Bool, Using cuda or not
        - verbose: Bool
        """
        super().__init__()
        self.field_size = len(feature_sizes)
        self.feature_sizes = feature_sizes
        self.embedding_size = embedding_size
        self.hidden_dims = hidden_dims
        self.num_classes = num_classes
        self.dtype = torch.long
        self.bias = torch.nn.Parameter(torch.randn(1))
        """
            check if use cuda
        """
        if use_cuda and torch.cuda.is_available():
            self.device = torch.device('cuda')
        else:
            self.device = torch.device('cpu')
        """
            init fm part
        """
        self.fm_first_order_embeddings = nn.ModuleList(
            [nn.Embedding(feature_size, 1) for feature_size in self.feature_sizes])
        self.fm_second_order_embeddings = nn.ModuleList(
            [nn.Embedding(feature_size, self.embedding_size) for feature_size in self.feature_sizes])
        """
            init deep part
        """
        all_dims = [self.field_size * self.embedding_size] + \
            self.hidden_dims + [self.num_classes]  #形如[k,h1,h2,1] , hidden_dims是[32,32]
        for i in range(1, len(hidden_dims) + 1):
            setattr(self, 'linear_'+str(i),                 # 新方法，setattr设置object的属性时，属性不一定存在
                    nn.Linear(all_dims[i-1], all_dims[i]))
            # nn.init.kaiming_normal_(self.fc1.weight)
            setattr(self, 'batchNorm_' + str(i),
                    nn.BatchNorm1d(all_dims[i]))
            setattr(self, 'dropout_'+str(i),
                    nn.Dropout(dropout[i-1]))

    def forward(self, Xi, Xv):
        """
        Forward process of network. 

        Inputs:
        - Xi: A tensor of input's index, shape of (N, field_size, 1)  #注意！这里因为是一个batch，所以是三维的
        - Xv: A tensor of input's value, shape of (N, field_size)   # modified，原文写错了点,这里是二维
        """
        """
            fm part
        """
        # Xv[:, i]) 维度是(100,)

        # test code
        # for i,emb in enumerate(self.fm_first_order_embeddings):
        #     print("i:",i)
        #     print("emb:",emb)
        #     print("Xi.shape:",Xi.shape)
        #     print("Xi:",Xi)
        #     print("Xi[:,i,:]:", Xi[:,i,:])

        fm_first_order_emb_arr = [(torch.sum(emb(Xi[:, i, :]), 1).t() * Xv[:, i]).t() for i, emb in enumerate(self.fm_first_order_embeddings)]
        fm_first_order = torch.cat(fm_first_order_emb_arr, 1)  # 100*39
        fm_second_order_emb_arr = [(torch.sum(emb(Xi[:, i, :]), 1).t() * Xv[:, i]).t() for i, emb in enumerate(self.fm_second_order_embeddings)]
        # fm_second_order_emb_arr: [N*k, N*k, ...]
        fm_sum_second_order_emb = sum(fm_second_order_emb_arr)
        fm_sum_second_order_emb_square = fm_sum_second_order_emb * \
            fm_sum_second_order_emb  # (x+y)^2
        fm_second_order_emb_square = [
            item*item for item in fm_second_order_emb_arr]
        fm_second_order_emb_square_sum = sum(
            fm_second_order_emb_square)  # x^2+y^2
        fm_second_order = (fm_sum_second_order_emb_square -
                           fm_second_order_emb_square_sum) * 0.5
        """
            deep part
        """
        deep_emb = torch.cat(fm_second_order_emb_arr, 1)  #按照列方向进行拼接
        deep_out = deep_emb
        for i in range(1, len(self.hidden_dims) + 1):
            deep_out = getattr(self, 'linear_' + str(i))(deep_out)
            deep_out = getattr(self, 'batchNorm_' + str(i))(deep_out)
            deep_out = getattr(self, 'dropout_' + str(i))(deep_out)
        """
            sum
        """
        total_sum = torch.sum(fm_first_order, 1) + \
                    torch.sum(fm_second_order, 1) + torch.sum(deep_out, 1) + self.bias
        return total_sum

    def fit(self, loader_train, loader_val, optimizer, epochs=100, verbose=False, print_every=100):
        """
        Training a model and valid accuracy.

        Inputs:
        - loader_train: I
        - loader_val: .
        - optimizer: Abstraction of optimizer used in training process, e.g., "torch.optim.Adam()""torch.optim.SGD()".
        - epochs: Integer, number of epochs.
        - verbose: Bool, if print.
        - print_every: Integer, print after every number of iterations. 
        """
        """
            load input data
        """
        model = self.train().to(device=self.device)


        for epoch in range(epochs):
            #global s_time, e_time
            s_time = time.time()
            
            xx.append(epoch)
            train_num_correct = 0
            train_num_samples = 0
            total_loss = 0
            train_acc = 0
            all_gini_score = 0

            for t, (xi, xv, y) in enumerate(loader_train):
                xi = xi.to(device=self.device, dtype=self.dtype)
                xv = xv.to(device=self.device, dtype=torch.float)
                y = y.to(device=self.device, dtype=torch.float)
                
                total = model(xi, xv)  # 回到forward函数 ， 返回 100*1维
                loss = criterion(total, y)  # y是label，整型 0或1

                #preds_prob = F.sigmoid(total)
                #gini_score = gini_norm(y, preds_prob)   # actual/pred

                #gini_score = gini_norm(y.cuda().data.cpu().numpy(), preds_prob.cuda().data.cpu().numpy())

                #all_gini_score += gini_score
                # print("type(all_gini_score):",type(all_gini_score))
                # print("all_gini_score/len(loader_train):",all_gini_score/len(loader_train))

                preds = (F.sigmoid(total) > 0.5)  # 配合sigmoid使用
                train_num_correct += (preds == y).sum()
                train_num_samples += preds.size(0)  # 每次加上一个batch_size
                total_loss += loss.item()

                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

                train_acc = float(train_num_correct) / train_num_samples
                #print('Got Train %d / %d correct (%.2f%%)' % (train_num_correct, train_num_samples, 100 * train_acc))

                print('[ Epoch{}: {}/{} ] loss:{:.3f} acc:{:.3f} '.format(epoch + 1, t + 1, len(loader_train), loss.item(), train_acc *100), end='\r')

                # print('[ Epoch{}: {}/{} ] loss:{:.3f} gini_score:{:.4f} '.format(
                #          epoch + 1, t + 1, len(loader_train), loss.item(), all_gini_score / (t+1)), end='\r')
            e_time = time.time()
            print("epoch:{} cost time:{} minutes:".format(epoch, (e_time-s_time)/60 ))
            print('\nTrain | Loss:{:.5f} acc: {:.3f}'.format(total_loss / len(loader_train), train_acc  ))
            yy_train_loss.append(total_loss / len(loader_train))
            yy_train_acc.append( train_acc  )

            #if verbose and t % print_every == 0:             #修改源代码，减少一个tab
           # if verbose:
                #print('Iteration %d, loss = %.4f' % (t, loss.item()))
            self.check_accuracy(loader_val, model)

        #end of epoch loop

        ax1 = plt.subplot(1, 2, 1)
        plt.sca(ax1)
        plt.plot(xx, yy_train_loss, "r", label="train_logloss")
        plt.plot(xx, yy_valid_loss, "b", label="valid_logloss")
        plt.legend()
        ax2 = plt.subplot(1, 2, 2)
        plt.sca(ax2)
        plt.plot(xx, yy_train_acc, "y", label="train_acc")
        plt.plot(xx, yy_valid_acc, "g", label="valid_norm_gini")
        plt.legend()

        plt.savefig("./loss.png")
        print("保存了loss.png和model_save/best_ckpt.model")
        print("model.fit() over!")
    
    def check_accuracy(self, loader, model):
#         if loader.dataset.train:
#             print('Checking accuracy on validation set')
#         else:
#             print('Checking accuracy on test set')   
        num_correct = 0
        num_samples = 0
        model.eval()  # set model to evaluation mode
        with torch.no_grad():
            total_loss = 0
            all_gini_score = 0
            pred_gini = []
            actual = []
            for t,(xi, xv, y) in enumerate(loader):
                xi = xi.to(device=self.device, dtype=self.dtype)  # move to device, e.g. GPU
                xv = xv.to(device=self.device, dtype=torch.float)
                y = y.to(device=self.device, dtype=torch.float)

                total = model(xi, xv)

                loss = criterion(total, y)

                preds_prob = F.sigmoid(total)
                #gini_score = gini_norm(y, preds_prob)  # actual/pred
                # gini_score = gini_norm(y.cuda().data.cpu().numpy(), preds_prob.cuda().data.cpu().numpy())
                #print("type(y.cuda().data.cpu().numpy()):",type(y.cuda().data.cpu().numpy()) )
                actual = np.append(actual , y.cuda().data.cpu().numpy())
                pred_gini = np.append(pred_gini , preds_prob.cuda().data.cpu().numpy())
                
                # all_gini_score += gini_score


                total_loss += loss.item()

                y = y.to(device=self.device, dtype=torch.bool)
                preds = (F.sigmoid(total) > 0.5)  #配合sigmoid使用
                num_correct += (preds == y).sum()
                num_samples += preds.size(0)  #每次加上一个batch_size
            valid_acc = float(num_correct) / num_samples
            
            
            
            val_gini_score = gini_norm(actual, pred_gini)
            # exit(0)

           # print("valid norm gini:", val_gini_score)

            #print('Got Valid %d / %d correct (%.2f%%)' % (num_correct, num_samples, 100 * acc))
            print("Valid | Loss:{:.5f} norm_gini:{}".format(total_loss / len(loader), val_gini_score ))

            yy_valid_loss.append(total_loss / len(loader))
            yy_valid_acc.append(val_gini_score)
            if(not os.path.exists("model_save")):
                    os.mkdir("model_save")
                    
            torch.save(model, "model_save/final_ckpt.model") # best_ckpt.model 和test.ipynb文件保持一致才行
            
            global best_gini_score
            if val_gini_score  > best_gini_score:

                # 如果validation的結果優於之前所有的結果，就把當下的模型存下來以備之後做預測時使用
                best_gini_score = val_gini_score
                if(not os.path.exists("model_save")):
                    os.mkdir("model_save")
                torch.save(model, "model_save/best_ckpt.model") # best_ckpt.model 和test.ipynb文件保持一致才行

posted @ 2020-12-20 20:06 qiezi_online 阅读(1542) 评论(0) 编辑收藏举报

努力加载评论中...

刷新页面返回顶部

qiezi_online

tensorflow, pytorch，loss可视化画图模板代码（推荐）

公告