pytorch3-optimizer
V1,可训练参数与模型一体,结果证明写在模型中的param是可以更新的
import torch
import torch.nn as nn
from tqdm import tqdm
from matplotlib import pyplot as plt
class Net(nn.Module):
def __init__(self):
super().__init__()
self.param = nn.Parameter(torch.ones(2, 10) * 0.001)
self.linear = nn.Linear(10, 10)
def forward(self):
return self.linear(self.param)
target = torch.randn(2, 10).cuda()
loss = nn.PairwiseDistance(p=2)
myNet = Net().cuda()
optimier = torch.optim.Adam(myNet.parameters(), lr=0.01)
epoch = 1000
flagBitList = []
for i in tqdm(range(epoch)):
optimier.zero_grad()
l = loss(myNet(), target).mean()
l.backward()
optimier.step()
print(myNet.param)
flagBitList.append(myNet.param[0][0].item())
print(f'\033[31m{myNet()}\033[0m')
print(f'\033[32m{target}\033[0m')
plt.plot(flagBitList)
plt.show()
V2,可训练参数与模型分离,结果证明写在模型外的param也是可以更新的
import torch
import torch.nn as nn
from tqdm import tqdm
from matplotlib import pyplot as plt
dataIn = nn.Parameter(torch.ones(2, 10).cuda() * 0.001) #此处要先cuda再设置requires_grad=True
class Net(nn.Module):
def __init__(self):
super().__init__()
self.linear = nn.Linear(10, 10)
def forward(self, data):
return self.linear(data)
target = torch.randn(2, 10).cuda()
loss = nn.PairwiseDistance(p=2)
myNet = Net().cuda()
optimier = torch.optim.Adam([{'params': myNet.parameters()}, {'params': dataIn}], lr=0.01) #此处必须是params
epoch = 1000
flagBitList = []
for i in tqdm(range(epoch)):
optimier.zero_grad()
l = loss(myNet(dataIn), target).mean()
l.backward()
optimier.step()
print(dataIn)
flagBitList.append(dataIn[0][0].item())
print(f'\033[31m{myNet(dataIn)}\033[0m')
print(f'\033[32m{target}\033[0m')
plt.plot(flagBitList)
plt.show()
V3,结果证明经过unsqueeze的param也是可以更新的
import torch
import torch.nn as nn
from tqdm import tqdm
from matplotlib import pyplot as plt
dataIn = nn.Parameter(torch.ones(2, 1, 10).cuda() * 0.001) #此处要先cuda再设置requires_grad=True
class Net(nn.Module):
def __init__(self):
super().__init__()
self.linear = nn.Linear(10, 10)
def forward(self, data):
return self.linear(data)
target = torch.randn(2, 10).cuda()
loss = nn.PairwiseDistance(p=2)
myNet = Net().cuda()
optimier = torch.optim.Adam([{'params': myNet.parameters()}, {'params': dataIn}], lr=0.01) #此处必须是params
epoch = 1000
flagBitList = []
for i in tqdm(range(epoch)):
optimier.zero_grad()
l = loss(myNet(dataIn).squeeze(), target).mean()
l.backward()
optimier.step()
print(dataIn)
flagBitList.append(dataIn[0][0][0].item())
print(f'\033[31m{myNet(dataIn).squeeze()}\033[0m')
print(f'\033[32m{target}\033[0m')
plt.plot(flagBitList)
plt.show()
行动是治愈恐惧的良药,而犹豫拖延将不断滋养恐惧。