linear-regression

import random
import torch

def generate_date(W,b,num_examples):
    X = torch.normal(0,1,(num_examples , len(W)))
    noise = torch.normal(0,0.01 , (num_examples,1))
    Y = torch.matmul(X , W) + b + noise
    return X , Y
true_W = torch.tensor([ 4,-32 , 5 ] , dtype = torch.float ).reshape( (3 , 1) )
true_b = torch.tensor(0.9)
num_examples = 100
featires , label = generate_date(true_W , true_b , num_examples)
true_W.shape 
torch.Size([3, 1])
featires.shape , label.shape
(torch.Size([100, 3]), torch.Size([100, 1]))
(torch.randn((100,3))@ torch.randn(3) ).shape
torch.Size([100])
(torch.randn((100,3))@ torch.randn(3) + torch.randn( (100,1) ) ).shape
torch.Size([100, 100])
(torch.randn((100,3))@ torch.randn(3) + torch.randn( (1,100) ) ).shape
torch.Size([1, 100])
def data_iter(X , Y , batch_size):
    num = len(X)
    indices = list(range(0,num))  # 这里必须加list
    random.shuffle(indices)
    for i in range(0,num,batch_size):
        index = indices[i:min(i+batch_size , num)]
        yield X[ index ] ,Y[index]
        
for X ,Y  in data_iter(featires,label,10):
    print(X.shape , Y.shape)
torch.Size([10, 3]) torch.Size([10, 1])
torch.Size([10, 3]) torch.Size([10, 1])
torch.Size([10, 3]) torch.Size([10, 1])
torch.Size([10, 3]) torch.Size([10, 1])
torch.Size([10, 3]) torch.Size([10, 1])
torch.Size([10, 3]) torch.Size([10, 1])
torch.Size([10, 3]) torch.Size([10, 1])
torch.Size([10, 3]) torch.Size([10, 1])
torch.Size([10, 3]) torch.Size([10, 1])
torch.Size([10, 3]) torch.Size([10, 1])
W = torch.randn( true_W.shape  , requires_grad=True)
b = torch.zeros( true_b.shape , requires_grad=True )
def linear_reg(X , W , b):
    return X@W+b
def square_loss(Y_hat , Y ):
    return ( (Y-Y_hat)**2/2 )/len(Y)
def sgd( params , lr ):
    with torch.no_grad():
        for param in params:
            param -= param.grad*lr
            param.grad.zero_()
lr = 0.1
num_ephoc = 10
batch_size = 10
for i in range(num_ephoc):
    for X , Y in data_iter(featires , label , batch_size):
        Y_hat = linear_reg(X , W , b)
        loss = square_loss(Y_hat , Y)
#         print(loss.sum())
        loss.sum().backward()
        sgd( [W,b] , lr)
    with torch.no_grad():
        loss = square_loss( linear_reg(featires , W , b) , label )
        print( f"epoch: {i +1} , loss: {loss.sum()}" )
epoch: 1 , loss: 83.69226837158203
epoch: 2 , loss: 24.340164184570312
epoch: 3 , loss: 7.377201080322266
epoch: 4 , loss: 2.2390201091766357
epoch: 5 , loss: 0.6830682158470154
epoch: 6 , loss: 0.20761319994926453
epoch: 7 , loss: 0.062477193772792816
epoch: 8 , loss: 0.019114961847662926
epoch: 9 , loss: 0.005850582383573055
epoch: 10 , loss: 0.0018150561954826117
print(f"w-true_w = {true_W-W} \n, true_b-b = {true_b-b} ")
w-true_w = tensor([[ 0.0045],
        [-0.0768],
        [-0.0148]], grad_fn=<SubBackward0>) 
, true_b-b = 0.0017161369323730469 


重点函数

  • torch.normal(0,1 , (A,B) ) 均值为0 方差为1 的正态分布采样 采样维度(A,B)
  • troch.matual(X,W) 矩阵乘法
  • y.reshape((-1,A)) 转换维度为(B,A) B自动计算
  • len(W) 取W维度的第一个数
  • torch.tensor(A) A转换为tensor
  • random.shuffle(A) 将A随机打乱 A可以为list
  • 对于模型参数, 若未用nn.Parameter 包装, 需要设置requires_grad 为True
  • range的返回结果 不可直接用 例如在shuffle 需要先转为list
type( range(0,3) )
range



简易版

import torch
from torch.utils import data

from torch import nn
from d2l import torch as d2l
true_w , true_b = torch.tensor([1,3,6] , dtype = torch.float).reshape((3,1)) , torch.tensor([0.45])
features , label = d2l.synthetic_data(true_w , true_b , 1000)
features.shape , label.shape

(torch.Size([1000, 3]), torch.Size([1000, 1]))
def get_iter(X , Y , batch_size , is_train = True):
    dataset = data.TensorDataset(X , Y  )
    return data.DataLoader(dataset , batch_size = batch_size , shuffle = is_train)
batch_size = 10
data_iter = get_iter(features , label , batch_size , True)
for X , Y  in data_iter:
    print(X.shape , Y.shape)
    break
torch.Size([10, 3]) torch.Size([10, 1])
net = nn.Sequential( nn.Linear(3,1) )

net[0].weight.data.normal_(0,0.1)
net[0].bias.data.fill_(0)
tensor([0.])
loss = nn.MSELoss()
trainer = torch.optim.SGD( net.parameters() , lr = 0.1 )
num_ephoc = 5

for i in range(num_ephoc):
    for X , Y in data_iter:
        Y_hat = net(X)
        l = loss(Y_hat,Y)
        l.backward()
        trainer.step()
        trainer.zero_grad()
    with torch.no_grad():
        Y_hat = net(features)
        l = loss(Y_hat,label)
        print(f"epoch {i+1}  loss {l}")
epoch 1  loss 0.00010193984053330496
epoch 2  loss 9.933139517670497e-05
epoch 3  loss 0.00010065524111269042
epoch 4  loss 9.379631228512153e-05
epoch 5  loss 0.00010790234955493361
print( f"true_w-w: {true_W,net[0].weight.data} \n , true_b-b: {true_b-net[0].bias.data} " )
true_w-w: (tensor([[  4.],
        [-32.],
        [  5.]]), tensor([[1.0000, 3.0001, 6.0002]])) 
 , true_b-b: tensor([-0.0004]) 

重点函数

  • torch.tensor([1,3,6] , dtype = torch.float).reshape((3,1)) 注意 dtype 不加dtype是Long 会报错
  • net[0].weight.data.normal_(---) 注意最后的下划线 bias 同
  • net.parameters() p 是小写

posted @ 2024-06-22 23:52  Mr小明同学  阅读(4)  评论(0编辑  收藏  举报