线性回归-代码库

import torch
import os
import numpy as np
import random 

def synthetic_data(w, b, num_examples):
    '''
    产生data
    '''
    features = torch.normal(0,1,(num_examples,len(w)))
    labels = torch.matmul(features, w) + b
    labels += torch.normal(0,0.001,labels.shape)  # 添加噪声
    return features, labels

def data_iterator(batch_size,features, labels):
    '''
    数据生成器,生成批量数据
    '''
    n = len(labels)
    indices = np.arange(n)
    random.shuffle(indices)  # 数据shuffle
    
    for i in range(0,n,batch_size):
        indices_index = indices[i:min(i+batch_size, n)]  # 防止数据出界
        yield features[indices_index], labels[indices_index]

def linear_reg(X,w,b):
    '''
    定义模型输出
    '''
    return torch.matmul(X, w) + b

def mse(y_hat, y_pred):
    '''
    评价标准
    '''
    return (y_hat - y_pred.reshape(y_hat.shape))**2 * 0.5 / len(y_hat) # y_hat与y_pred 维度一定要一致

def SGD(params, lr, batch_size):
    with torch.no_grad():
        for param in params:
            param -= (lr * param.grad)
            param.grad.zero_()  # 梯度清0

def fit(num_epoch, features, labels, batch_size, lr, net, criterion, w, b):
    for i in range(num_epoch):
        for feature,label  in data_iterator(batch_size, features, labels):
            y_pred = net(feature, w,b)
            loss = criterion(y_pred, label)
            loss.sum().backward()
            SGD([w,b],lr, batch_size)

        with torch.no_grad():
            loss = criterion(net(features, w, b), labels).sum()
            print('i=',i,'loss:= ',loss, 'w:= ',w, 'b:= ',b)

# -----------------------------  1. 使用dataloader 载入数据-----------------------------
'''
总结: 如上为手动实现线性回归的完整过程。
接下来:沿着如下线索构建网络
  1. 使用dataloader载入数据 --> 使用 pytorch定义的MSE损失函数 --> 使用 系统自带的优化器 --> 自定义网络 逐个部件将 其替换为由nn.module 构建的网络 
  2. 分支 
    (a). 构建网络 使用nn.parameter vs 不实用nn.parameter
'''

from torch import nn 
from torch.utils import data

def data_iter_sys(features, labels,v_batch_size, is_train=True):
    '''
    1. TensorDataset 可以用来对 tensor 进行打包,包装成数据集,被 DataLoader 类使用
    2. * 表示解包操作
    3. DataLoader返回打散之后小批量数据 返回的是一个生成器
    '''
    dataset = data.TensorDataset(*(features, labels))
    return data.DataLoader(dataset,batch_size=v_batch_size, shuffle=is_train)

def fit_dataloader(num_epoch, dataloader, features, labels, batch_size, lr, net, criterion, w, b):
    for i in range(num_epoch):
        for feature,label  in dataloader:
            y_pred = net(feature, w,b)
            loss = criterion(y_pred, label)
            loss.sum().backward()
            SGD([w,b],lr, batch_size)

        with torch.no_grad():
            loss = criterion(net(features, w, b), labels).sum()
            print('i=',i,'loss:= ',loss, 'w:= ',w, 'b:= ',b)


def fit_optimizer(num_epoch, dataloader, features, labels, batch_size, lr, net, criterion, params, optimizer):
    for i in range(num_epoch):
        for feature,label  in dataloader:
            y_pred = net(feature, params[0], params[1])
            loss = criterion(y_pred, label)
            loss.sum().backward()

            optimizer.step()
            optimizer.zero_grad()
            
        with torch.no_grad():
            loss = criterion(net(features, params[0], params[1]), labels).sum()
            print('i=',i,'loss:= ',loss, 'w:= ',params[0], 'b:= ',params[1])

# -----------------------------  2. 自定义网络-----------------------------
class Liner_net(nn.Module):
    def __init__(self, dim):
        super(Liner_net, self).__init__()
        self.weight = torch.randn(dim, requires_grad=True)
        self.bias = torch.randn(1, requires_grad=True)
    
    def forward(self, X):
        return torch.matmul(X, self.weight)+self.bias

class Liner_net_1(nn.Module):
    def __init__(self, dim):
        super(Liner_net_1, self).__init__()
        self.weight = nn.Parameter(torch.randn(dim))
        self.bias = nn.Parameter(torch.randn(1))
    
    def forward(self, X):
        return torch.matmul(X, self.weight)+self.bias

class Liner_net_2(nn.Module):
    def __init__(self, dim):
        super(Liner_net_2, self).__init__()
        self.hidden = nn.Linear(in_features=dim, out_features=1,bias=True)
    
    def forward(self, X):
        return self.hidden(X)

def fit_net_selfdefine(num_epoch, dataloader, features, labels, batch_size, lr, net, criterion, params, optimizer):
    for i in range(num_epoch):
        for feature,label in dataloader:
            y_pred = net(feature).reshape(label.shape)  # 当 y_pred 是32*1,label是32 那么此时求出的loss是不对的
            loss = criterion(y_pred, label)
            loss.sum().backward()

            optimizer.step()
            optimizer.zero_grad()
            
        with torch.no_grad():
            loss = criterion(net(features), labels).sum()
            print('i=',i,'loss:= ',loss, 'w:= ',params[0], 'b:= ',params[1])

# -----------------------------  全局配置 ---------------------------- 
num_epoch = 100
num_examples = 2000
batch_size = 32
lr = 0.001
net = linear_reg
criterion = mse

if __name__=='__main__':
    # 0. 生成数据
    w_true = torch.tensor([3,-2,4,1], dtype=torch.float)
    b_true = torch.tensor([0.5])
    features, labels = synthetic_data( w_true, b_true, num_examples)    

    w = torch.randn(w_true.shape, requires_grad=True)
    b = torch.randn(1, requires_grad=True)
    # 1. 手动实现线性回归
    '''
    # 模型训练
    fit(num_epoch, features, labels, batch_size, lr, net, criterion, w, b)
    print(10*'*','运行结果',10*'*')
    print('[w_true,w]',[w_true, w])
    print('[b_true,b]',[b_true, b])
    '''
    
    # 2. 使用dataloader 替换自定义数据生成器
    '''
    print(10*'*','2. 使用自定义数据生成器', 10*'*')
    num_epoch = 50
    dataloader = data_iter_sys(features, labels, batch_size)
    fit_dataloader(num_epoch,dataloader,features, labels, batch_size, lr, net, criterion, w, b)
    print(10*'*','运行结果',10*'*')
    print('[w_true,w]',[w_true, w])
    print('[b_true,b]',[b_true, b])
    ''' 
    # 3. 使用自定义损失函数
    ''' 
    print(10*'*','3. 使用自定义损失函数', 10*'*')
    criterion = nn.MSELoss()
    fit(num_epoch, features, labels, batch_size, lr, net, criterion, w, b)
    print(10*'*','运行结果',10*'*')
    print('[w_true,w]',[w_true, w])
    print('[b_true,b]',[b_true, b])
    '''
    # 4. 自定义优化器
    ''' 
    print(10*'*','4. 自定义优化器', 10*'*')
    w = torch.randn(w_true.shape, requires_grad=True)
    b = torch.randn(1, requires_grad=True)
    print([w,b])
    optimizer = torch.optim.SGD([w,b],lr=lr) 
    criterion = mse  # or criterion = nn.MSELoss() 
    dataloader = data_iter_sys(features, labels, batch_size) # 不能使用 dataloader = data_iterator(batch_size,features, labels) why?
    fit_optimizer(num_epoch, dataloader, features, labels, batch_size, lr, net, criterion, [w,b], optimizer)
    print(10*'*','运行结果',10*'*')
    print('[w_true,w]',[w_true, w])
    print('[b_true,b]',[b_true, b])
    '''
    
    # 5. 自定义网络
    ''' 
    print(10*'*','5. 自定义优化器-不使用parameter or parameter', 10*'*')
    criterion = nn.MSELoss()   # or criterion = nn.MSELoss() 
    num_epoch = 100
    #net = Liner_net(w.shape[0])   # 使用tensor定义net
    net = Liner_net_1(w.shape[0])  # 使用parameter定义net
    optimizer = torch.optim.SGD([net.weight,net.bias],lr=lr) 
    dataloader = data_iter_sys(features, labels, batch_size) # 不能使用 dataloader = data_iterator(batch_size,features, labels) why?
    fit_net_selfdefine(num_epoch, dataloader, features, labels, batch_size, lr, net, criterion, [net.weight,net.bias], optimizer)
    print(10*'*','运行结果',10*'*')
    print('[w_true,w]',[w_true, net.weight.data])
    print('[b_true,b]',[b_true, net.bias.data])
    '''
   # 6. 使用nn.linear 
    ''' 
    print(10*'*','6. 自定义网络 使用Sequential ', 10*'*')
    criterion = nn.MSELoss()   # or criterion = nn.MSELoss() 
    num_epoch = 100
    net = nn.Sequential(nn.Linear(in_features=w.shape[0], out_features=1,bias=True))
    net[0].weight.data = torch.randn(w.shape[0], 1, dtype=torch.float).T # 一定要定义成矩阵而不是向量
    net[0].bias.data = torch.randn(1)
    optimizer = torch.optim.SGD([net[0].weight,net[0].bias],lr=lr) 
    dataloader = data_iter_sys(features, labels, batch_size) # 不能使用 dataloader = data_iterator(batch_size,features, labels) why?
    fit_net_selfdefine(num_epoch, dataloader, features, labels, batch_size, lr, net, criterion, [net[0].weight,net[0].bias], optimizer)
    print(10*'*','运行结果',10*'*')
    print('[w_true,w]',[w_true, net[0].weight.data])
    print('[b_true,b]',[b_true, net[0].bias.data])
    '''
   # 7. 自定义网络 使用nn.linear 
    print(10*'*','7. 自定义网络 使用nn.linear', 10*'*')
    criterion = nn.MSELoss()   # or criterion = nn.MSELoss() 
    num_epoch = 100
    net = Liner_net_2(w.shape[0])
    params = [net.hidden.weight,net.hidden.bias]
    optimizer = torch.optim.SGD(params,lr=lr) 
    dataloader = data_iter_sys(features, labels, batch_size) # 不能使用 dataloader = data_iterator(batch_size,features, labels) why?
    fit_net_selfdefine(num_epoch, dataloader, features, labels, batch_size, lr, net, criterion, params, optimizer)
    print(10*'*','运行结果',10*'*')
    print('[w_true,w]',[w_true, params[0].data])
    print('[b_true,b]',[b_true, params[1].data])
    ```
posted @ 2024-08-27 22:51  金字塔下的蜗牛  阅读(6)  评论(0编辑  收藏  举报