【课程作业经验】基于MindSpore疫苗接种数据预测
基于mindspore实现疫苗接种数据预测
基于机器学习实践课程完成的相关使用mindspore深度学习框架完成的任务,写一些分享心得。
数据导入与准备
数据下载链接:https://pan.baidu.com/s/10npLE-JCKCmY4mBs1zDHpQ 提取码:23vb
疫苗接种数据集包含1983年-2016年疫苗接种数据,其形式如下图所示:
读取数据并进行训练集、测试集分割:
df = pd.read_csv("vaccine.csv")
features = df["Year"]
target = df["Values"]
split_num = int(len(features)*0.7)
X_train = features[:split_num]
y_train = target[:split_num]
X_test = features[split_num:]
y_test = target[split_num:]
# 创建数据
X_train,X_test = poly_transform(X_train,X_test,degree=1)
ds_train=create_dataset(X_train,y_train,batch_number,repeat_number)
构建多项式特征:
def poly_transform(X_train,X_test,degree=2):
'''
多项式特征
'''
poly_features_2 = PolynomialFeatures(degree = degree, include_bias=False)
poly_X_train_2 = poly_features_2.fit_transform(X_train.values.reshape(len(X_train),1))
poly_X_test_2 = poly_features_2.fit_transform(X_test.values.reshape(len(X_test),1))
return poly_X_train_2,poly_X_test_2
全连接网络模型建立
构建一层1->1的网络,比较简单:
class LinearNet(nn.Cell):
def __init__(self,n=1):
super(LinearNet,self).__init__()
# 定义一个线形层,同时初始化权重和偏置
self.fc=nn.Dense(n,1,Normal(0.02),Normal(0.02),has_bias=True)
def construct(self,x):
x=self.fc(x)
return x
设定优化器以及其他参数
在这里我们采用MSEloss函数,以及Momentum优化器,感兴趣的话可以探究其他参数。
# 初始化超参数
batch_number=1
repeat_number=1
epoch = 1000
# 创建模型
net=LinearNet(n=1)
net_loss=nn.loss.MSELoss()
opt=nn.Momentum(net.trainable_params(),learning_rate=1e-7,momentum=0.01)
模型训练
Mindspore实现的模型训练较为简单,封装的很好。
model=Model(net,net_loss,opt)
model.train(epoch, ds_train, dataset_sink_mode=False)
测试结果
完整代码
import numpy as np
import pandas as pd
from mindspore import Model, Tensor
from mindspore import dataset as ds
from mindspore import nn
from mindspore.common.initializer import Normal
from sklearn.preprocessing import PolynomialFeatures
def get_train(X,y):
'''
获取训练数据
param:
X: 特征(pandas读取类型)
y: 标签(pandas读取类型)
'''
X,y=np.array(X).astype(np.float32),np.array(y).astype(np.float32)
for i in range(len(X)):
yield [X<i>],[y<i>]
def get_test(X,y):
'''
获取测试数据
'''
X,y=np.array(X).astype(np.float32),np.array(y).astype(np.float32)
return X,y
def create_dataset(X_train,y_train,batch_size=16,repeat_size=1):
'''
创建数据迭代器
'''
a = list(get_train(X_train,y_train))
input_data=ds.GeneratorDataset(a,column_names=['data','label'])
input_data=input_data.batch(batch_size) # 设置数据批次
input_data=input_data.repeat(repeat_size) # 设置数据重复次数
return input_data
def mse(y_predict,y_test):
error = 0
for i in range(y_predict.shape[0]):
error+=(y_predict<i>-y_test<i>)**2
error /= y_predict.shape[0]
error = error**0.5
print("测试集的mse为:",error)
return error
def test_all(net,X_test,y_test):
'''
测试函数,输出测试集的mse
'''
weight = net.trainable_params()[0]
bias = net.trainable_params()[1]
x_test,y_test = get_test(X_test,y_test)
a= Tensor(weight).asnumpy()[0]
a = np.expand_dims(a, 1)
x_test = np.expand_dims(x_test, 1)
b = np.matmul(x_test, a)
y_predict = b + Tensor(bias).asnumpy()[0]
mse(y_predict,y_test)
def poly_transform(X_train,X_test,degree=2):
'''
多项式特征
'''
poly_features_2 = PolynomialFeatures(degree = degree, include_bias=False)
poly_X_train_2 = poly_features_2.fit_transform(X_train.values.reshape(len(X_train),1))
poly_X_test_2 = poly_features_2.fit_transform(X_test.values.reshape(len(X_test),1))
return poly_X_train_2,poly_X_test_2
class LinearNet(nn.Cell):
def __init__(self,n=1):
super(LinearNet,self).__init__()
# 定义一个线形层,同时初始化权重和偏置
self.fc=nn.Dense(n,1,Normal(0.02),Normal(0.02),has_bias=True)
def construct(self,x):
x=self.fc(x)
return x
def main():
# ===================================================
# 读入数据->绝对路径
df = pd.read_csv("vaccine.csv")
features = df["Year"]
target = df["Values"]
split_num = int(len(features)*0.7)
X_train = features[:split_num]
y_train = target[:split_num]
X_test = features[split_num:]
y_test = target[split_num:]
# ===================================================
# ===================================================
# 初始化超参数
batch_number=1
repeat_number=1
epoch = 1000
# ===================================================
# 创建数据
X_train,X_test = poly_transform(X_train,X_test,degree=1)
ds_train=create_dataset(X_train,y_train,batch_number,repeat_number)
print(ds_train)
# ===================================================
# 创建模型
net=LinearNet(n=1)
net_loss=nn.loss.MSELoss()
opt=nn.Momentum(net.trainable_params(),learning_rate=1e-7,momentum=0.01)
model=Model(net,net_loss,opt)
# ===================================================
# ===================================================
# 训练+测试
model.train(epoch, ds_train, dataset_sink_mode=False)
test_all(net,X_test,y_test)
# ===================================================
# 打印线性回归参数
for net_param in net.trainable_params():
print(net_param, net_param.asnumpy())
if __name__ == "__main__":
main()