关于对Comformer中卷积层的理解
"""ConvolutionModule definition.""" from torch import nn class ConvolutionModule(nn.Module): """ConvolutionModule in Conformer model. Args: channels (int): The number of channels of conv layers. kernel_size (int): Kernerl size of conv layers. """ def __init__(self, channels, kernel_size, activation=nn.ReLU(), bias=True): """Construct an ConvolutionModule object.""" super(ConvolutionModule, self).__init__() # kernerl_size should be a odd number for 'SAME' padding assert (kernel_size - 1) % 2 == 0 self.pointwise_conv1 = nn.Conv1d( channels, 2 * channels, #输出通道为输入的两倍,扩张维度 kernel_size=1, #卷积核为1,也就是对每一列独立做卷积,token内的交互 stride=1, padding=0, bias=bias, ) self.depthwise_conv = nn.Conv1d( channels, channels, kernel_size, #卷积核这里设置的31 stride=1, padding=(kernel_size - 1) // 2, #填充是15,随着卷积核的变化而变化,目的是使得输出形状和输入形状相同 groups=channels, #分了channels组,表示只对每个卷积核范围内的token建立交互,而不对dim维度进行卷积 bias=bias, ) self.norm = nn.BatchNorm1d(channels) self.pointwise_conv2 = nn.Conv1d( channels, channels, kernel_size=1, stride=1, padding=0, bias=bias, ) self.activation = activation def forward(self, x): """Compute convolution module. Args: x (torch.Tensor): Input tensor (#batch, time, channels). Returns: torch.Tensor: Output tensor (#batch, time, channels). """ # exchange the temporal dimension and the feature dimension x = x.transpose(1, 2) #为了后面卷积操作,需要先转置 # GLU mechanism x = self.pointwise_conv1(x) # (batch, 2*channel, dim) x = nn.functional.glu(x, dim=1) # (batch, channel, dim) # 1D Depthwise Conv 相当于深度可分离卷积的形式,可以降低参数量,并分别对通道维度和time维度卷积 x = self.depthwise_conv(x) x = self.activation(self.norm(x)) x = self.pointwise_conv2(x) return x.transpose(1, 2)