在二分类问题中Sigmoid函数是否越训练梯度越小。
背景二分类问题,最后一层的输出函数为sigmoid函数。随着训练是否会出现梯度衰减的情况。
训练时模型为了降低损失函数,会尽可能提高正类的置信度降低负类的置信度。
结果:
并未发现正常训练导致的梯度衰减。
代码:
from enum import auto
from scipy.io import loadmat
import numpy as np
import torch
import torch.utils.data as data_utils
from torch import nn
import torch.optim as optim
network=nn.Linear(1,1)
network1=nn.BatchNorm1d(1)
w=nn.Sigmoid()
tr=torch.Tensor([[100000],[200000]])
#tr=torch.Tensor([[1],[2]])
test=torch.Tensor([[150000],[300000]])
optimizer = optim.Adam(network.parameters(), lr=0.04)
optimizer1 = optim.Adam(network1.parameters(), lr=0.04)
l1=0
while True:
network.train()
#network1.train()
#network1.eval()#
optimizer.zero_grad()
optimizer1.zero_grad()
#l=w(network(tr))
l=w(network1(network(tr)))
#print(network1(network(tr)))
l=(l[0]-0)**2+(l[1]-1)**2
l.backward()
optimizer.step()
optimizer1.step()
print(l)
if l1==0:
l1=l
#network.eval()
if l1/l>100:
print(network(tr))
print(network1(network(tr)))
print(w(network1(network(tr))))
network.eval()
network1.eval()
print(network(tr))
print(network1(network(tr)))
print(w(network1(network(tr))))
print(1)
for name, parms in network.named_parameters():
print('-->name:', name)
print('-->para:', parms)
print('-->grad_requirs:',parms.requires_grad)
print('-->grad_value:',parms.grad)
print("===")
l1=l