pytorch-modules,children,parameters,buffers
modules(),children(),parameters(),buffers()
目录
前言
可以使用以下4对8个方法来访问网络层所有的Modules
用来遍历网络结构或者网络参数等
四种结构相似,所有放到一起来说,都是 nn.Modules
下属性方法, 返回类型都是 generator()
modules()
和named_modules()
递归模型children()
和named_children()
网络模型,不递归parameters()
和named_parameters()
网络参数buffers()
和named_buffers()
网络结构遍历
mode.modules()
modules()
方法,返回一个包含当前模型所有模块的迭代器,这个是递归的返回网络中的所有Module
递归的返回网络的各个module,从最顶层直到最后的叶子module
数据结构: generator
# 定义一个网络
class Model(nn.Module):
def __init__(self):
super(Model,self).__init__()
self.conv1 = nn.Conv2d(in_channels=3,out_channels=64,kernel_size=3)
self.conv2 = nn.Conv2d(64,64,3)
self.features = nn.Sequential(OrderedDict([
('conv3', nn.Conv2d(64,128,3)),
('conv4', nn.Conv2d(128,128,3)),
('relu1', nn.ReLU())
]))
def forward(self,x):
x = self.conv1(x)
x = self.conv2(x)
x = self.features(x)
return x
pass
# 使用mode.modules() ,递归每一层
model = Model()
# m.modules() 返回的是<generator>
# 1、 使用list() 转化 list(m.modules())
# 2、 遍历 for i in m.modules():
for i, inter in enumerate(mode.modules()):
print(i, '-',inter," |---",type(inter))
"""
0 - Model(
(conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1))
(conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1))
(features): Sequential(
(conv3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1))
(conv4): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1))
(relu1): ReLU()
)
) |--- <class '__main__.Model'>
1 - Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1)) |--- <class 'torch.nn.modules.conv.Conv2d'>
2 - Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1)) |--- <class 'torch.nn.modules.conv.Conv2d'>
3 - Sequential(
(conv3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1))
(conv4): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1))
(relu1): ReLU()
) |--- <class 'torch.nn.modules.container.Sequential'>
4 - Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1)) |--- <class 'torch.nn.modules.conv.Conv2d'>
5 - Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1)) |--- <class 'torch.nn.modules.conv.Conv2d'>
6 - ReLU() |--- <class 'torch.nn.modules.activation.ReLU'>
"""
输出结果解析:
0-Model 整个网络模块
1-2-3 为网络的1,2,3,个子模块,注意3 - Sequential仍然包含有子模块
4-5-6为模块3 - Sequential的子模块
可以看出modules()是递归的返回网络的各个module,从最顶层直到最后的叶子module。
mode.modules() 中 每一个都是 class
mode.named_modules()
named_modules()的功能和modules()的功能类似,不同的是它返回内容有两部分: module的名称 name
以及 module
m = Model()
for i, inter in enumerate(mode.named_modules()):
print(i, '-',inter," |---",type(inter),type(inter[0]),type(inter[1]))
"""
0 - ('', Model(
(conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1))
(conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1))
(features): Sequential(
(conv3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1))
(conv4): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1))
(relu1): ReLU()
)
)) |--- <class 'tuple'> <class 'str'> <class '__main__.Model'>
1 - ('conv1', Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1))) |--- <class 'tuple'> <class 'str'> <class 'torch.nn.modules.conv.Conv2d'>
2 - ('conv2', Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1))) |--- <class 'tuple'> <class 'str'> <class 'torch.nn.modules.conv.Conv2d'>
3 - ('features', Sequential(
(conv3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1))
(conv4): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1))
(relu1): ReLU()
)) |--- <class 'tuple'> <class 'str'> <class 'torch.nn.modules.container.Sequential'>
4 - ('features.conv3', Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1))) |--- <class 'tuple'> <class 'str'> <class 'torch.nn.modules.conv.Conv2d'>
5 - ('features.conv4', Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1))) |--- <class 'tuple'> <class 'str'> <class 'torch.nn.modules.conv.Conv2d'>
6 - ('features.relu1', ReLU()) |--- <class 'tuple'> <class 'str'> <class 'torch.nn.modules.activation.ReLU'>
"""
分析
mode.named_modules() 相比较 mode.modules() 多了一个 str(name)
mode.named_modules() 中 每一个都是 tuple(str,class)
mode.children()
和modules()
不同,children()
只返回当前模块的子模块,不会递归子模块
不会递归后续模块的module
数据结构: generator
# model.children() # generator
# 使用mode.chirdren()
m = Model()
for idx,m in enumerate(m.children()):
print(idx,"-",m)
"""
0 - Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1)) |--- <class 'torch.nn.modules.conv.Conv2d'>
1 - Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1)) |--- <class 'torch.nn.modules.conv.Conv2d'>
2 - Sequential(
(conv3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1))
(conv4): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1))
(relu1): ReLU()
) |--- <class 'torch.nn.modules.container.Sequential'>
"""
# 分析:
# 只是遍历一层,不会递归返回
# 返回当前层 modules
# 子数据类型 modules
# 没有name
mode.named_children()
和 mode.named_modules()
相似,不进行递归,只返回当前模块
结构中有 模块 name
for i, inter in enumerate(mode.named_children()):
print(i, '-',inter," |---",type(inter),type(inter[0]),type(inter[1]))
"""
0 - ('conv1', Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1))) |--- <class 'tuple'> <class 'str'> <class 'torch.nn.modules.conv.Conv2d'>
1 - ('conv2', Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1))) |--- <class 'tuple'> <class 'str'> <class 'torch.nn.modules.conv.Conv2d'>
2 - ('features', Sequential(
(conv3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1))
(conv4): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1))
(relu1): ReLU()
)) |--- <class 'tuple'> <class 'str'> <class 'torch.nn.modules.container.Sequential'>
"""
分析
# 只是遍历一层,不会递归返回
# 返回当前层 modules
# 子数据类型 tuple(str,modules)
# 有name
网络参数遍历
模型中需要保存下来的参数包括两种
- 一种是反向传播需要被optimizer更新的,称之为 parameter
- 一种是反向传播不需要被optimizer更新,称之为 buffer
mode.parameters()
方法parameters()
返回一个包含模型所有参数的迭代器。一般用来当作optimizer的参数。
class Model(nn.Module):
def __init__(self):
super(Model, self).__init__()
self.conv1 = nn.Linear(in_features= 1,out_features=2)
self.conv2 = nn.Linear(2, 4)
self.features = nn.Sequential(OrderedDict([
('conv3', nn.Linear(4, 2)),
('conv4', nn.Linear(2, 1)),
('relu1', nn.ReLU())
]))
def forward(self, x):
x = self.conv1(x)
x = self.conv2(x)
x = self.features(x)
return x
pass
mode = Model()
for i, m in enumerate(mode.parameters()):
print(i,'-',m,' |---',type(m))
"""
0 - Parameter containing:
tensor([[ 0.7648],
[-0.1883]], requires_grad=True) |--- <class 'torch.nn.parameter.Parameter'>
1 - Parameter containing:
tensor([-0.4055, 0.3920], requires_grad=True) |--- <class 'torch.nn.parameter.Parameter'>
2 - Parameter containing:
tensor([[-0.0892, 0.4368],
[ 0.6725, 0.4771],
[ 0.3529, 0.3735],
[-0.2655, 0.3425]], requires_grad=True) |--- <class 'torch.nn.parameter.Parameter'>
3 - Parameter containing:
tensor([ 0.1371, -0.3791, -0.0297, 0.1128], requires_grad=True) |--- <class 'torch.nn.parameter.Parameter'>
4 - Parameter containing:
tensor([[-0.2866, 0.4171, -0.4858, 0.1794],
[ 0.0398, -0.4532, 0.3153, 0.3343]], requires_grad=True) |--- <class 'torch.nn.parameter.Parameter'>5 - Parameter containing:
tensor([-0.3771, 0.0276], requires_grad=True) |--- <class 'torch.nn.parameter.Parameter'>
6 - Parameter containing:
tensor([[ 0.2573, -0.2529]], requires_grad=True) |--- <class 'torch.nn.parameter.Parameter'>
7 - Parameter containing:
tensor([-0.6073], requires_grad=True) |--- <class 'torch.nn.parameter.Parameter'>
"""
分析
# 输出权重参数
# 没有name
# mode.parameters()的结果是 <generator>
# 子模块 类型 <class 'torch.nn.parameter.Parameter'>
mode.named_parameters()
for i ,m in enumerate(mode.named_parameters()):
print(i,'-',m,' |---',type(m))
"""
0 - ('conv1.weight', Parameter containing:
tensor([[ 0.1968],[-0.1485]], requires_grad=True)) |--- <class 'tuple'>
1 - ('conv1.bias', Parameter containing:
tensor([0.4953, 0.8265], requires_grad=True)) |--- <class 'tuple'>
2 - ('conv2.weight', Parameter containing:
tensor([[ 0.4551, 0.4058],
[-0.2279, -0.2382],
[-0.6567, -0.6940],
[ 0.5560, 0.6012]], requires_grad=True)) |--- <class 'tuple'>
3 - ('conv2.bias', Parameter containing:
tensor([ 0.1039, 0.5741, 0.0626, -0.6930], requires_grad=True)) |--- <class 'tuple'>
4 - ('features.conv3.weight', Parameter containing:
tensor([[-0.1569, -0.1964, -0.0289, 0.0726],
[-0.4019, -0.3200, -0.4739, 0.0219]], requires_grad=True)) |--- <class 'tuple'>
5 - ('features.conv3.bias', Parameter containing:
tensor([-0.4377, -0.1137], requires_grad=True)) |--- <class 'tuple'>
6 - ('features.conv4.weight', Parameter containing:
tensor([[0.2752, 0.1640]], requires_grad=True)) |--- <class 'tuple'>
7 - ('features.conv4.bias', Parameter containing:
tensor([0.1114], requires_grad=True)) |--- <class 'tuple'>
"""
分析
#
# 输出权重参数
# 有 name
# mode.parameters()的结果是 <generator>
# 子模块 类型 tuple(str, torch.nn.parameter.Parameter)
其他参数
mode.buffers()
一种是反向传播不需要被optimizer更新,称之为 buffer
class MyModel(nn.Module):
def __init__(self):
super(MyModel, self).__init__()
self.my_tensor = torch.randn(1) # 参数直接作为模型类成员变量
self.register_buffer('my_buffer', torch.randn(1)) # 参数注册为 buffer
self.my_param = nn.Parameter(torch.randn(1))
def forward(self, x):
return x
mode=MyModel()
print(model.state_dict())
>>>OrderedDict([('my_param', tensor([1.2357])), ('my_buffer', tensor([-0.9982]))])
for n, bf in enumerate(mode.buffers()):
print(n,bf,type(bf))
# 0 tensor([-1.2442]) <class 'torch.Tensor'>
# 需要进行注册才能能使用
# 属于参数,但是不参与优化
mode.name_buffer()
for n, bf in enumerate(mode.named_buffers()):
print(n,bf,type(bf))
0 ('my_buffer', tensor([-1.2442])) <class 'tuple'>