相比于上一个版本,这个版本引入了cost = torch.nn.MSELoss(reduction=True) 和 optimizer(Adam)优化器 

第一步: 进行特征的可视化操作

import pandas as pd
import numpy as np
import datetime
import matplotlib.pyplot as plt

features = pd.read_csv('temps.csv')

# 可视化图形
print(features.head(5))

#使用日期构造可视化图像
dates = [str(int(year)) + "-" + str(int(month)) + "-" + str(int(day)) for year, month, day in zip(features['year'], features['month'], features['day'])]
dates = [datetime.datetime.strptime(date, "%Y-%m-%d") for date in dates]

# 进行画图操作
plt.style.use("fivethirtyeight")

fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(nrows=2, ncols=2, figsize=(10, 10))
fig.autofmt_xdate(rotation=45)

ax1.plot(dates, features["temp_1"])
ax1.set_xlabel('')
ax1.set_ylabel('Temperature')
ax1.set_title("Previous max Temp")

ax2.plot(dates, features["temp_2"])
ax2.set_xlabel('')
ax2.set_ylabel('Temperature')
ax2.set_title("Two day Prio max Temp")

ax3.plot(dates, features["friend"])
ax3.set_xlabel('')
ax3.set_ylabel('Temperature')
ax3.set_title("Friend Estimate")

ax4.plot(dates, features["actual"])
ax4.set_xlabel('')
ax4.set_ylabel('Temperature')
ax4.set_title("Max Temperature")

plt.tight_layout(pad=2)
plt.show()

第二步: 对非数字的特征进行独热编码,使用温度的真实值作为标签,去除真实值的特征作为输入特征,同时使用process进行标准化操作

# 构造独热编码
# 遍历特征,将里面不是数字的特征进行去除
for feature_name in features.columns:
    print(feature_name)
    try:
        float(features.loc[0, feature_name])
    except:
        for s in set(features[feature_name]):
            features[s] = 0

        #根据每一行数据在时间特征上添加为1
        for f in range(len(features)):
            features.loc[f, [features.iloc[f][feature_name]]] = 1

        # 去除对应的week特征
        features = features.drop(feature_name, axis=1)

# 构造独热编码也可以使用
# features = pd.get_dummies(features)

# 构造标签
labels = np.array(features['actual'])

# 构造特征
features = features.drop('actual', axis=1)

# 进行torch网络训练
import torch

# 对特征进行标准化操作
from sklearn import preprocessing

input_feature = preprocessing.StandardScaler().fit_transform(features)


print(input_feature[:, 5])

第三步: 对特征和标签进行torch.tensor处理,转换为tensor格式,使用torch.nn.MseLoss作为损失值, 使用torch.optim.Adam构造优化器 

import torch
import torch.nn as nn

input_size = input_feature.shape[1]
hidden_size = 128
output_size = 1
batch_size = 16
epochs = 1000

my_nn = nn.Sequential(
    torch.nn.Linear(input_size, hidden_size),
    torch.nn.Sigmoid(),
    torch.nn.Linear(hidden_size, output_size)
)

x = torch.tensor(input_feature, dtype=torch.float)
y = torch.tensor(labels, dtype=torch.float)

cost = torch.nn.MSELoss(reduction="mean")
optimizer = torch.optim.Adam(my_nn.parameters(), lr=0.001)
for i in range(epochs):
    batch_mean = []
    for start in range(0, len(input_feature), batch_size):
        end = start + batch_size if start + batch_size < len(input_feature) else len(input_feature)
        xx = x[start:end]
        yy = y[start:end]
        predict = my_nn(xx)
        loss = cost(yy, predict)
        optimizer.zero_grad()
        loss.backward(retain_graph=True)
        optimizer.step()
        batch_mean.append(loss.data.numpy())
    if i % 100 == 0:
        print(np.mean(batch_mean))


for i in my_nn.parameters():
    print(i.data.numpy())

第四步:进行作图的可视化,使用了Adam优化器相较于上述的梯度优化器而言,预测结果更加的平稳 

x = torch.tensor(input_feature, dtype=torch.float)
predict = np.squeeze(my_nn(x).data.numpy())

dates = [str(int(year)) + '-' + str(int(month)) + '-' + str(int(day)) for year, month, day in zip(years, months, days)]
dates = [datetime.datetime.strptime(date, "%Y-%m-%d") for date in dates]

true_data = pd.DataFrame(data = {"date":dates, "actual":labels})

predict_data = pd.DataFrame(data= {"date":dates, "predict":predict})

plt.plot(true_data["date"], true_data["actual"], "b-", label='actual')
plt.plot(predict_data['date'], predict_data['predict'], 'ro', label='predict')
plt.xticks(rotation=60)
plt.title("Temperature Predict")
plt.xlabel("Date")
plt.ylabel("Temperature")
plt.show()

 

posted on 2020-04-08 00:04  python我的最爱  阅读(691)  评论(0编辑  收藏  举报