李宏毅2021深度学习 作业1 COVID-19 Cases Prediction
目前以自己的能力只能过medium baseline。
simple baseline过很简单,每行数据不做任何处理,直接作为网络的输入就行,网络也不用优化。
medium baseline需要加一些优化。
首先前面表示州的one-hot向量过于稀疏,所以我直接忽略。
由于输入数据全是正数,而且我使用了relu,所以训练中有时会导致loss一直不变,所以我将输入数据进行了归一化。
加了dropout正则化,和kaiming初始化。
最好成绩能到1.03426
数据读取器:
from collections import namedtuple import csv import pandas as pd from numpy import genfromtxt def loader(location): data = genfromtxt(location, delimiter=',', skip_header=1) return data[:, 41:]
main:
import os import shutil import torch import torch.nn as nn import torch.optim as optim import torchvision.transforms as transforms from dataloader import loader from torch.utils.tensorboard import SummaryWriter import numpy as np import pandas train_data_location = './hw1/covid.train.csv' test_data_location = './hw1/covid.test.csv' PATH = './runs/newest' batch_size = 2400 tot_epoch = 20000 learning_rate = 5e-3 hidden_size = 32 class Net(nn.Module): def __init__(self): super(Net, self).__init__() self.fc1 = nn.Linear(53, hidden_size) self.fc2 = nn.Linear(hidden_size, 1) self.dropout = nn.Dropout(p=0.2) self.relu = nn.ReLU() self.bn = nn.BatchNorm1d(hidden_size) self.sigmoid = nn.Sigmoid() self.tanh = nn.Tanh() self.loss_func = nn.MSELoss(reduction='mean') self.optim = optim.Adam(params=self.parameters(), lr=learning_rate) torch.nn.init.kaiming_normal_(self.fc1.weight) torch.nn.init.kaiming_normal_(self.fc2.weight) def forward(self, x): ret = self.dropout(self.bn(self.relu(self.fc1(x)))) ret = self.fc2(ret) return ret def update(self, data, target): self.optim.zero_grad() loss = self.cal_loss(data, target) loss.backward() self.optim.step() return loss def cal_loss(self, data, target): predict = self.forward(data) loss = self.loss_func(predict, target) return loss def norm(x): mean = torch.mean(x, dim=0) std = torch.std(x, dim=0) return (x - mean) / std if __name__ == '__main__': use_gpu = torch.cuda.is_available() print(use_gpu) torch.set_default_tensor_type(torch.DoubleTensor) data = torch.tensor(loader(train_data_location), requires_grad=False) epoch = 0 shutil.rmtree(PATH) os.mkdir(PATH) writer = SummaryWriter(log_dir=PATH) net = Net() if use_gpu: net = net.cuda() net.train() tmp = data[torch.randperm(data.size(0))] batch_data = tmp[0:batch_size] test_data = tmp[batch_size:] while epoch < tot_epoch: train_data, train_target = torch.split(batch_data, [53, 1], dim=1) inputs = norm(train_data) if use_gpu: inputs = inputs.cuda() train_target = train_target.cuda() loss = net.update(inputs, train_target).cpu() writer.add_scalar('Loss/epoch', loss, epoch) if epoch % 100 == 0: print(loss) epoch += 1 net.eval() epoch = 0 net = net.cpu() train_data, train_target = torch.split(batch_data, [53, 1], dim=1) train_data = norm(train_data) print('train_loss: ', net.cal_loss(train_data, train_target)) test_data, test_target = torch.split(test_data, [53, 1], dim=1) test_data = norm(test_data) print('test_loss: ', net.cal_loss(test_data, test_target)) data = norm(torch.tensor(loader(test_data_location))) predicts = net.forward(data) tmp = predicts.detach().numpy() ret = pandas.DataFrame(tmp) ret.to_csv('result.csv', index=True, header=['tested_positive'], )
后面参考了一些大佬的代码,学了一些技巧,比如相关度分析,训练过程中不断保存验证集上效果最好的模型等,但是最好还是只能到0.93,摆烂了不搞了。
main:
import os import shutil import torch import torch.nn as nn import torch.optim as optim import torchvision.transforms as transforms from dataloader import loader from torch.utils.tensorboard import SummaryWriter import numpy as np import pandas as pd from sklearn.feature_selection import SelectKBest, f_regression from sklearn import preprocessing train_data_location = './hw1/covid.train.csv' test_data_location = './hw1/covid.test.csv' PATH = './runs/newest' batch_size = 200 learning_rate = 5e-3 hidden_size = 32 feature_nums = 15 l2_rate = 0.00075 class Net(nn.Module): def __init__(self): super(Net, self).__init__() self.fc1 = nn.Linear(feature_nums, hidden_size) self.fc2 = nn.Linear(hidden_size, 1) self.dropout = nn.Dropout(p=0.2) self.relu = nn.LeakyReLU() self.bn = nn.BatchNorm1d(hidden_size) self.sigmoid = nn.Sigmoid() self.tanh = nn.Tanh() self.loss_func = nn.MSELoss(reduction='mean') self.optim = optim.Adam(params=self.parameters()) torch.nn.init.kaiming_normal_(self.fc1.weight, a=0.01) torch.nn.init.normal_(self.fc2.weight, mean=0, std=1.0) def forward(self, x): ret = self.dropout(self.bn(self.relu(self.fc1(x)))) ret = self.fc2(ret) return ret def update(self, data, target): self.optim.zero_grad() loss = self.cal_loss(data, target) loss.backward() self.optim.step() return loss def cal_loss(self, data, target, l2=True): predict = self.forward(data) loss = self.loss_func(predict, target) if l2: for param in self.parameters(): loss += l2_rate * torch.sum(param ** 2) return loss def norm(x): mean = torch.mean(x, dim=0) std = torch.std(x, dim=0) return (x - mean) / std def select_features(t): x = t[:, 0:53] y = t[:, 53:].squeeze() best_features = SelectKBest(score_func=f_regression, k=5) fit = best_features.fit(x, y) df_scores = pd.DataFrame(fit.scores_) df_scores.columns = ['Score'] tmp = df_scores.nlargest(feature_nums, 'Score') # print(tmp) return tmp.index.values #获取dataframe的索引 /tmp.coloums.values # print(tmp.header()) def train(model, config, data, features, is_dev): epoch = 0 min_loss = 1000 update_times = 0 tot_epochs = config['tot_epochs'] data = data[torch.randperm(data.size(0))] if is_dev: tot_epochs *= 100 train_data = data[:] dev_data = data[:] # dev_x = dev_data[:, features] # dev_x = norm(dev_x) # dev_y = dev_data[:, -1].view(-1, 1) else: train_data = data[:] while epoch < tot_epochs: model.train() # train_data, train_target = torch.split(batch_data, [53, 1], dim=1) train_data = train_data[torch.randperm(train_data.size(0))] train_x = train_data[0:config['batch_size'], features] train_y = train_data[0:config['batch_size'], -1].view(-1, 1) train_x = norm(train_x) if use_gpu: train_x = train_x.cuda() train_y = train_y.cuda() loss = model.update(train_x, train_y).cpu() writer.add_scalar('Loss/epoch', loss, epoch) if is_dev: # dev_data = dev_data[torch.randperm(dev_data.size(0))] dev_x = dev_data[:, features] dev_x = norm(dev_x) dev_y = dev_data[:, -1].view(-1, 1) dev_loss = dev(dev_x, dev_y, model) # print(loss) if dev_loss < min_loss: min_loss = dev_loss.detach() torch.save(model.state_dict(), config['save_path']) print('update models in epoch ', epoch, 'min_loss: ', min_loss) update_times = 0 else: update_times += 1 if update_times > config['early_stop']: break if epoch % 1000 == 0: print('loss: ', loss) epoch += 1 def dev(dev_x, dev_y, model, use_gpu=True): model.eval() if use_gpu: dev_x = dev_x.cuda() dev_y = dev_y.cuda() with torch.no_grad(): loss = model.cal_loss(dev_x, dev_y, l2=False) return loss if __name__ == '__main__': use_gpu = torch.cuda.is_available() print(use_gpu) config = { 'tot_epochs': 20000, 'optimizer: ': 'Adam', 'batch_size': 200, 'train_data_size': 2400, 'dev_data_size': 300, 'optim_hparas': { }, 'save_path': './models/model.pth', 'early_stop': 20000, } torch.set_default_tensor_type(torch.DoubleTensor) data = loader(train_data_location) features = select_features(data) # data = data[:, features] data = torch.tensor(data, requires_grad=False) epoch = 0 shutil.rmtree(PATH) os.mkdir(PATH) os.makedirs('./models', exist_ok=True) writer = SummaryWriter(log_dir=PATH) net = Net() if use_gpu: net = net.cuda() train(net, config, data, features, is_dev=True) tmp = torch.load(config['save_path'], map_location='cpu') net.load_state_dict(tmp) # train(net, config, data, features, is_dev=False) net.eval() net = net.cpu() data = norm(torch.tensor(loader(test_data_location))[:, features]) predicts = net.forward(data) tmp = predicts.detach().numpy() ret = pd.DataFrame(tmp) ret.to_csv('result.csv', index=True, header=['tested_positive'], )
大佬代码链接:
HW1_public_strong_baseline | Kaggle