Loading

13-LSTM多步预测-静态模型预测

import pandas as pd
from sklearn.metrics import mean_squared_error
import math
import matplotlib.pyplot as plt
import datetime

def parser(x):
    return datetime.datetime.strptime(x, '%Y/%m/%d')

# 把数据拆分,线性数据变成四个一组的监督型数据
def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
    n_vars = 1 if type(data) is list else data.shape[1]
    df = pd.DataFrame(data)  # 数据多了行标和列标
    cols, names = [], []
    for i in range(n_in, 0, -1):
        cols.append(df.shift(i))
        names += [('var%d(t-%d)' % (j+1, 1)) for j in range(n_vars)]
    for i in range(0, n_out, 1):
        cols.append(df.shift(-i))
        if i == 0:
            names += [('var%d(t)' % (j+1)) for j in range(n_vars)]
        else:
            names += [('var%d(t+%d)' % (j+1, i)) for j in range(n_vars)]
    agg = pd.concat(cols, axis=1)
    agg.columns = names
    if dropnan:
        agg.dropna(inplace=True)
    return agg

# 拆分训练数据、测试数据
def prepare_data(series, n_test, n_lay, n_seq):
    raw_values = series.values
    raw_values = raw_values.reshape(len(raw_values), 1)
    # 转换成四个一组的监督型数据
    supervised = series_to_supervised(raw_values, n_lay, n_seq)
    supervised_values = supervised.values
    # 前3/4作为训练数据,后1/4作为测试数据
    train, test = supervised_values[:-n_test], supervised_values[-n_test:]
    return train, test

# persistence model预测
# 用上一次观察值作为之后n_seq的预测值
# 其实只是单纯的把上一次的观测值,重复三次写入一个包含三个元素的数组,作为一个包含三个元素的预测结果
def persistence(last_ob, n_seq):
    return [last_ob for i in range(n_seq)]

# 评估persistence model
def make_forecast(train, test, n_lay, n_seq):
    forecasts = []
    for i in range(len(test)):
        x, y = test[i, :n_lay], test[i, n_lay:]
        # 这里的预测其实就是抄上一次的观测值,把观测值变成一个数组列表
        forecast = persistence(x[-1], n_seq)
        forecasts.append(forecast)
    return forecasts

# 预测评估
# 计算预测结果的损失值,把抄写的观测值结果带入运算损失值,输出
def evaluate_forecasts(test, forecasts, n_lag, n_seq):
    for i in range(n_seq):
        actual = test[:, (n_lag + i)]
        predicted = [forecast[i] for forecast in forecasts]
        print('predicted')
        print(predicted)
        rmse = math.sqrt(mean_squared_error(actual, predicted))
        print('t+%d RMSE:%f' % ((i+1), rmse))

def plot_forecasts(series, forecasts, n_test):
    # 原始数据
    plt.plot(series.values)
    # 预测数据
    for i in range(len(forecasts)):
        off_s = len(series) - n_test + i - 1
        off_e = off_s + len(forecasts[i]) + 1
        xaxis = [x for x in range(off_s, off_e)]
        yaxis = [series.values[off_s]] + forecasts[i]
        print('xaxis')
        print(xaxis)
        print('yaxis')
        print(yaxis)
        print('series.values[off_s]')
        print(series.values[off_s])
        plt.plot(xaxis, yaxis, color='red')
    plt.show()

# 默认为False。如果设置squeeze=True则表示如果解析的数据只包含一列,则返回一个Series
series = pd.read_csv('../LSTM系列/Multi-Step LSTM预测1/data_set/shampoo-sales.csv', 
                    header=0, parse_dates=[0], index_col=0, date_parser=parser).squeeze('index')['Sales']


# 一步数据,预测3步
n_lag = 1
n_seq = 3
n_test = 10
train, test = prepare_data(series, n_test, n_lag, n_seq)
print('train data')
print(train)
print('test data')
print(test)
forecasts = make_forecast(train, test, n_lag, n_seq)
print('forecasts')
print(forecasts)
# 没有任何意义,只是为了教你如何进行多步的预测,数据全是根据最后观测值编造的
evaluate_forecasts(test, forecasts, n_lag, n_seq)
plot_forecasts(series, forecasts, n_test + 2)
posted @ 2023-02-09 00:04  lotuslaw  阅读(140)  评论(0编辑  收藏  举报