https://www.bilibili.com/video/BV1GB4y1H7hq?spm_id_from=333.999.0.0&vd_source=b1ce52b6eb3a9e6c2360a4b7172edf5a
class LayerNorm(nn.Module):
def __init__(self, feature, eps=1e-6):
"""
:param feature: self-attention 的 x 的大小
:param eps:
"""
super(LayerNorm, self).__init__()
self.a_2 = nn.Parameter(torch.ones(feature))
self.b_2 = nn.Parameter(torch.zeros(feature))
self.eps = eps
def forward(self, x):
mean = x.mean(-1, keepdim=True)
std = x.std(-1, keepdim=True)
return self.a_2 * (x - mean) / (std + self.eps) + self.b_2
# python 面向对象
# 当你不做高拓展的时候,下面这种写法被你给写死了
# 一个 512 维的向量,还有一个 256 维的向量
l1 = LayerNorm(10)
l2 = LayerNorm(20)
l1()
l1()
l1()
l1()
l1()
l2()
今天
class LayerNorm1(nn.Module):
def __init__(self):
"""
:param feature: self-attention 的 x 的大小
:param eps:
"""
super(LayerNorm1, self).__init__()
def forward(self, feature, x, eps=1e-6):
self.a_2 = nn.Parameter(torch.ones(feature))
self.b_2 = nn.Parameter(torch.zeros(feature))
self.eps = eps
mean = x.mean(-1, keepdim=True)
std = x.std(-1, keepdim=True)
return self.a_2 * (x - mean) / (std + self.eps) + self.b_2
l3 = LayerNorm1()
l3(1)
l3(1)
l3(1)
l3(1)
l3(1)