Kaggle 比赛技巧之分类模型融合
Kaggle
比赛技巧
- 多种的模型融合
- 公开训练集交叉验证的结果CV SCORE作为模型性能指标
- TTA (Test Time Augmentation) 对测试集进行数据增强(Image)或微小修改(Raw Data),然后进行多次预测,得到多个测试集预测结果
StackNet
Super Learner
已知数据集D,模型m(1), ...,m(n),模型S
- 将D分为A,B
- m在A上分别进行交叉验证得到y_pred(A) (num_sample,n)
- m在B上预测得到y_pred(B)
- y_pred(A) 和 y(A) 分别当作数据和标签组成新的数据集,训练一个模型S,这一步称为Meta Learner或Super Learner
- y_pred(B) 和 y(B) 分别当作数据和标签组成新的数据集,使用模型S进行预测并评估性能
- 在kaggle比赛中y(B)是测试集未知的标签private test dataset label,A是公开训练集
多个模型的预测结果融合
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
import torch
import torch.nn.functional as F
import torch.nn as nn
NUM_MODELS = 2 # multiple model with different backbone
NUM_CLASSES = 2
NUM_CHANNELS = 128 # hyper parameter
class StackingCNN(nn.Module):
def __init__(self, num_models, num_channels):
super(StackingCNN, self).__init__()
self.conv1 = nn.Conv2d(1, num_channels,
kernel_size=(num_models, 1))
self.relu1 = nn.ReLU(inplace=True)
self.dp1 = nn.Dropout(0.3)
self.conv2 = nn.Conv2d(num_channels, num_channels * 2, kernel_size=(1, NUM_CLASSES))
self.relu2 = nn.ReLU(inplace=True)
self.dp2 = nn.Dropout(0.3)
self.linear = nn.Linear(num_channels * 2, NUM_CLASSES)
self.fast_global_avg_pool_2d = nn.AdaptiveAvgPool2d((1,2))
def forward(self, x):
x = self.conv1(x)
x = self.relu1(x)
x = self.dp1(x)
x = self.conv2(x)
x = self.relu2(x)
x = self.dp2(x)
x = x.view(x.size()[0],-1)
x = self.linear(x)
return x
model = StackingCNN(NUM_MODELS, NUM_CHANNELS)
a = torch.rand(4,1,NUM_MODELS, NUM_CLASSES)
b = model(a)
模型预测结果和CNN模型倒一层全连接层特征融合
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
import torch
import torch.nn.functional as F
import torch.nn as nn
feature_num = 128 # feature from last linear linear of cnn model
model_number = 11 # multiple model with different backbone
feature_dim = 128
seq_len = 24 # #dicoms of each patient
lstm_layers = 2
hidden = 96
drop_out = 0.5
class_num = 6
batch_size = 4
class SequenceModel(nn.Module):
def __init__(self):
super(SequenceModel, self).__init__()
model_num = model_number
# seq model 1
self.fea_conv = nn.Sequential(nn.Dropout2d(drop_out),
nn.Conv2d(feature_dim, 512, kernel_size=(1, 1), stride=(1,1),padding=(0,0), bias=False),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.Dropout2d(drop_out),
nn.Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), padding=(0, 0), bias=False),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.Dropout2d(drop_out),
)
self.fea_first_final = nn.Sequential(nn.Conv2d(128*feature_num, 6, kernel_size=(1, 1), stride=(1, 1), padding=(0, 0), bias=True))
# # bidirectional GRU
self.hidden_fea = hidden
self.fea_lstm = nn.GRU(128*feature_num, self.hidden_fea, num_layers=lstm_layers, batch_first=True, bidirectional=True)
self.fea_lstm_final = nn.Sequential(nn.Conv2d(1, 6, kernel_size=(1, self.hidden_fea*2), stride=(1, 1), padding=(0, 0), dilation=1, bias=True))
ratio = 4
model_num += 1
# seq model 2
self.conv_first = nn.Sequential(nn.Conv2d(model_num, 128*ratio, kernel_size=(5, 1), stride=(1,1),padding=(2,0),dilation=1, bias=False),
nn.BatchNorm2d(128*ratio),
nn.ReLU(),
nn.Conv2d(128*ratio, 64*ratio, kernel_size=(3, 1), stride=(1, 1), padding=(2, 0),dilation=2, bias=False),
nn.BatchNorm2d(64*ratio),
nn.ReLU())
self.conv_res = nn.Sequential(nn.Conv2d(64 * ratio, 64 * ratio, kernel_size=(3, 1), stride=(1, 1),padding=(4, 0),dilation=4, bias=False),
nn.BatchNorm2d(64 * ratio),
nn.ReLU(),
nn.Conv2d(64 * ratio, 64 * ratio, kernel_size=(3, 1), stride=(1, 1),padding=(2, 0),dilation=2, bias=False),
nn.BatchNorm2d(64 * ratio),
nn.ReLU(),)
self.conv_final = nn.Sequential(nn.Conv2d(64*ratio, 1, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0), dilation=1,bias=False))
# bidirectional GRU
self.hidden = hidden
self.lstm = nn.GRU(64*ratio*6, self.hidden, num_layers=lstm_layers, batch_first=True, bidirectional=True)
self.final = nn.Sequential(nn.Conv2d(1, 6, kernel_size=(1, self.hidden*2), stride=(1, 1), padding=(0, 0), dilation=1, bias=True))
def forward(self, fea, x):
batch_size, _, _, _ = x.shape
fea = self.fea_conv(fea)
fea = fea.permute(0, 1, 3, 2).contiguous()
fea = fea.view(batch_size, 128 * feature_num, -1).contiguous()
fea = fea.view(batch_size, 128 * feature_num, -1, 1).contiguous()
fea_first_final = self.fea_first_final(fea)
#################################################
out0 = fea_first_final.permute(0, 3, 2, 1)
#################################################
# bidirectional GRU
fea = fea.view(batch_size, 128 * feature_num, -1).contiguous()
fea = fea.permute(0, 2, 1).contiguous()
fea, _ = self.fea_lstm(fea)
fea = fea.view(batch_size, 1, -1, self.hidden_fea * 2)
fea_lstm_final = self.fea_lstm_final(fea)
fea_lstm_final = fea_lstm_final.permute(0, 3, 2, 1)
#################################################
out0 += fea_lstm_final
#################################################
out0_sigmoid = torch.sigmoid(out0)
x = torch.cat([x, out0_sigmoid], dim = 1)
x = self.conv_first(x)
x = self.conv_res(x)
x_cnn = self.conv_final(x)
#################################################
out = x_cnn
#################################################
# bidirectional GRU
x = x.view(batch_size, 256, -1, 6)
x = x.permute(0,2,1,3).contiguous()
x = x.view(batch_size, x.size()[1], -1).contiguous()
x, _= self.lstm(x)
x = x.view(batch_size, 1, -1, self.hidden*2)
x = self.final(x)
x = x.permute(0,3,2,1)
#################################################
out += x
#################################################
#res
return out, out0
fea = torch.rand((batch_size,feature_dim,seq_len,feature_num))
x = torch.rand((batch_size,model_num,seq_len,class_num))
model = SequenceModel()
out, out0 = model(fea,x)
out.shape