linear-regression

import random
import torch

def generate_date(W,b,num_examples):
    X = torch.normal(0,1,(num_examples , len(W)))
    noise = torch.normal(0,0.01 , (num_examples,1))
    Y = torch.matmul(X , W) + b + noise
    return X , Y

true_W = torch.tensor([ 4,-32 , 5 ] , dtype = torch.float ).reshape( (3 , 1) )
true_b = torch.tensor(0.9)
num_examples = 100
featires , label = generate_date(true_W , true_b , num_examples)

true_W.shape

torch.Size([3, 1])

featires.shape , label.shape

(torch.Size([100, 3]), torch.Size([100, 1]))

(torch.randn((100,3))@ torch.randn(3) ).shape

torch.Size([100])

(torch.randn((100,3))@ torch.randn(3) + torch.randn( (100,1) ) ).shape

torch.Size([100, 100])

(torch.randn((100,3))@ torch.randn(3) + torch.randn( (1,100) ) ).shape

torch.Size([1, 100])

def data_iter(X , Y , batch_size):
    num = len(X)
    indices = list(range(0,num))  # 这里必须加list
    random.shuffle(indices)
    for i in range(0,num,batch_size):
        index = indices[i:min(i+batch_size , num)]
        yield X[ index ] ,Y[index]

for X ,Y  in data_iter(featires,label,10):
    print(X.shape , Y.shape)

torch.Size([10, 3]) torch.Size([10, 1])
torch.Size([10, 3]) torch.Size([10, 1])
torch.Size([10, 3]) torch.Size([10, 1])
torch.Size([10, 3]) torch.Size([10, 1])
torch.Size([10, 3]) torch.Size([10, 1])
torch.Size([10, 3]) torch.Size([10, 1])
torch.Size([10, 3]) torch.Size([10, 1])
torch.Size([10, 3]) torch.Size([10, 1])
torch.Size([10, 3]) torch.Size([10, 1])
torch.Size([10, 3]) torch.Size([10, 1])

W = torch.randn( true_W.shape  , requires_grad=True)
b = torch.zeros( true_b.shape , requires_grad=True )

def linear_reg(X , W , b):
    return X@W+b

def square_loss(Y_hat , Y ):
    return ( (Y-Y_hat)**2/2 )/len(Y)

def sgd( params , lr ):
    with torch.no_grad():
        for param in params:
            param -= param.grad*lr
            param.grad.zero_()

lr = 0.1
num_ephoc = 10
batch_size = 10
for i in range(num_ephoc):
    for X , Y in data_iter(featires , label , batch_size):
        Y_hat = linear_reg(X , W , b)
        loss = square_loss(Y_hat , Y)
#         print(loss.sum())
        loss.sum().backward()
        sgd( [W,b] , lr)
    with torch.no_grad():
        loss = square_loss( linear_reg(featires , W , b) , label )
        print( f"epoch: {i +1} , loss: {loss.sum()}" )

epoch: 1 , loss: 83.69226837158203
epoch: 2 , loss: 24.340164184570312
epoch: 3 , loss: 7.377201080322266
epoch: 4 , loss: 2.2390201091766357
epoch: 5 , loss: 0.6830682158470154
epoch: 6 , loss: 0.20761319994926453
epoch: 7 , loss: 0.062477193772792816
epoch: 8 , loss: 0.019114961847662926
epoch: 9 , loss: 0.005850582383573055
epoch: 10 , loss: 0.0018150561954826117

print(f"w-true_w = {true_W-W} \n, true_b-b = {true_b-b} ")

w-true_w = tensor([[ 0.0045],
        [-0.0768],
        [-0.0148]], grad_fn=<SubBackward0>) 
, true_b-b = 0.0017161369323730469

重点函数

torch.normal(0,1 , (A,B) ) 均值为0 方差为1 的正态分布采样采样维度（A,B）
troch.matual(X,W) 矩阵乘法
y.reshape((-1,A)) 转换维度为（B,A） B自动计算
len(W) 取W维度的第一个数
torch.tensor(A) A转换为tensor
random.shuffle(A) 将A随机打乱 A可以为list
对于模型参数，若未用nn.Parameter 包装，需要设置requires_grad 为True
range的返回结果不可直接用例如在shuffle 需要先转为list

type( range(0,3) )

range

简易版

import torch
from torch.utils import data

from torch import nn

from d2l import torch as d2l

true_w , true_b = torch.tensor([1,3,6] , dtype = torch.float).reshape((3,1)) , torch.tensor([0.45])

features , label = d2l.synthetic_data(true_w , true_b , 1000)

features.shape , label.shape

(torch.Size([1000, 3]), torch.Size([1000, 1]))

def get_iter(X , Y , batch_size , is_train = True):
    dataset = data.TensorDataset(X , Y  )
    return data.DataLoader(dataset , batch_size = batch_size , shuffle = is_train)

batch_size = 10
data_iter = get_iter(features , label , batch_size , True)

for X , Y  in data_iter:
    print(X.shape , Y.shape)
    break

torch.Size([10, 3]) torch.Size([10, 1])

net = nn.Sequential( nn.Linear(3,1) )

net[0].weight.data.normal_(0,0.1)
net[0].bias.data.fill_(0)

tensor([0.])

loss = nn.MSELoss()

trainer = torch.optim.SGD( net.parameters() , lr = 0.1 )

num_ephoc = 5

for i in range(num_ephoc):
    for X , Y in data_iter:
        Y_hat = net(X)
        l = loss(Y_hat,Y)
        l.backward()
        trainer.step()
        trainer.zero_grad()
    with torch.no_grad():
        Y_hat = net(features)
        l = loss(Y_hat,label)
        print(f"epoch {i+1}  loss {l}")

epoch 1  loss 0.00010193984053330496
epoch 2  loss 9.933139517670497e-05
epoch 3  loss 0.00010065524111269042
epoch 4  loss 9.379631228512153e-05
epoch 5  loss 0.00010790234955493361

print( f"true_w-w: {true_W,net[0].weight.data} \n , true_b-b: {true_b-net[0].bias.data} " )

true_w-w: (tensor([[  4.],
        [-32.],
        [  5.]]), tensor([[1.0000, 3.0001, 6.0002]])) 
 , true_b-b: tensor([-0.0004])

重点函数

torch.tensor([1,3,6] , dtype = torch.float).reshape((3,1)) 注意 dtype 不加dtype是Long 会报错
net[0].weight.data.normal_(---) 注意最后的下划线 bias 同
net.parameters() p 是小写

posted @ 2024-06-22 23:52 Mr小明同学阅读(4) 评论(0) 编辑收藏举报

会员力量，点亮园子希望

刷新页面返回顶部

Mr.小明同学

linear-regression

重点函数

简易版

重点函数

公告