HW1

固定随机数种子:

def same_seed(seed): 
    '''Fixes random number generator seeds for reproducibility.'''
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)

划分数据集(torch.utils.data.random_split):

def train_valid_split(data_set, valid_ratio, seed):
    '''Split provided training data into training set and validation set'''
    valid_set_size = int(valid_ratio * len(data_set)) 
    train_set_size = len(data_set) - valid_set_size
    train_set, valid_set = random_split(data_set, [train_set_size, valid_set_size], generator=torch.Generator().manual_seed(seed))
    return np.array(train_set), np.array(valid_set)

random_split还可以帮助我们划分成训练集、验证集和测试集:

import numpy as np
import torch
from torch.utils.data import random_split

# 创建数据集,100个样本,10个特征
data = np.arange(1000).reshape((100, 10))  # 输入是ndarray,如果导入的是csv文件(df),那么data = df.values
print(data.shape)  # (100, 10)

# 按照7: 2: 1划分训练、验证和测试集
data_split = random_split(data, [70, 20, 10], generator=torch.Generator().manual_seed(520))
train_data, valid_data, test_data = data_split

# 返回的切分后的数据需要转换成ndarray,整个数据的切分是从ndarray到ndarray的过程!
print(np.array(train_data).shape)  # (70, 10)
print(np.array(valid_data).shape)  # (20, 10)
print(np.array(test_data).shape)   # (10, 10)

early_stop策略用在模型的训练和验证上:

import math
import torch

# 初始化
best_loss, early_stop_count = math.inf, 0

# 用在每一个epoch的训练和验证后面
if mean_valid_loss < best_loss:
    best_loss = mean_valid_loss
    torch.save(model.state_dict(), config['save_path'])  # Save your best model
    print('Saving model with loss {:.3f}...'.format(best_loss))
    early_stop_count = 0
else: 
    early_stop_count += 1

if early_stop_count >= config['early_stop']:
    print('\nModel is not improving, so we halt the training session.')

把所有的参数放在一个config字典中,方便调整:

device = 'cuda' if torch.cuda.is_available() else 'cpu'
config = {
    'seed': 5201314,      # Your seed number, you can pick your lucky number. :)
    'select_all': True,   # Whether to use all features.
    'valid_ratio': 0.2,   # validation_size = train_size * valid_ratio
    'n_epochs': 3000,     # Number of epochs.            
    'batch_size': 256, 
    'learning_rate': 1e-5,              
    'early_stop': 400,    # If model has not improved for this many consecutive epochs, stop training.     
    'save_path': './models/model.ckpt'  # Your model will be saved here.
}

测试结果保存为csv文件(import csv):

def save_pred(preds, file):
    ''' Save predictions to specified file '''
    with open(file, 'w') as fp:
        writer = csv.writer(fp)
        writer.writerow(['id', 'tested_positive'])
        for i, p in enumerate(preds):
            writer.writerow([i, p])

 

posted @ 2023-08-05 15:39  Peg_Wu  阅读(16)  评论(0编辑  收藏  举报