第六讲 循环神经网络--RNN-Stock
1 !pip install tushare 2 import tushare as ts 3 import matplotlib.pyplot as plt 4 5 df1 = ts.get_k_data('601318', ktype='D', start='2008-01-01', end='2020-05-10') 6 datapath1 = "./SH601318.csv" 7 df1.to_csv(datapath1) 8 9 10 import numpy as np 11 import tensorflow as tf 12 from tensorflow.keras.layers import Dense, Dropout, SimpleRNN 13 import matplotlib.pyplot as plt 14 import os 15 import pandas as pd 16 from sklearn.preprocessing import MinMaxScaler 17 from sklearn.metrics import mean_squared_error, mean_absolute_error 18 import math 19 20 21 pingan = pd.read_csv("./SH601318.csv") 22 pingan.head() 23 pingan.tail() 24 25 26 training_set = pingan.iloc[0:2201, 2:3].values 27 test_set = pingan.iloc[2201:, 2:3].values 28 29 #归一化 30 sc = MinMaxScaler(feature_range=(0, 1)) 31 training_set_scaled = sc.fit_transform(training_set) 32 test_set = sc.fit_transform(test_set) 33 34 35 x_train = [] 36 y_train = [] 37 38 x_test = [] 39 y_test = [] 40 41 42 # 利用for循环,遍历整个训练集,提取训练集中连续60天的开盘价作为输入特征x_train,第61天的数据作为标签,for循环共构建2426-300-60=2066组数据。 43 for i in range(50, len(training_set_scaled)): 44 x_train.append(training_set_scaled[i-50:i, 0]) 45 y_train.append(training_set_scaled[i, 0]) 46 47 48 np.random.seed(7) 49 np.random.shuffle(x_train) 50 np.random.seed(7) 51 np.random.shuffle(y_train) 52 tf.random.set_seed(7) 53 54 x_train, y_train = np.array(x_train), np.array(y_train) 55 56 # 使x_train符合RNN输入要求:[送入样本数, 循环核时间展开步数, 每个时间步输入特征个数]。 57 # 此处整个数据集送入,送入样本数为x_train.shape[0]即2066组数据;输入60个开盘价,预测出第61天的开盘价,循环核时间展开步数为60; 每个时间步送入的特征是某一天的开盘价,只有1个数据,故每个时间步输入特征个数为1 58 x_train = np.reshape(x_train, (x_train.shape[0], 50, 1)) 59 60 for i in range(50, len(test_set)): 61 x_test.append(test_set[i-50:i, 0]) 62 y_test.append(test_set[i, 0]) 63 64 x_test, y_test = np.array(x_test), np.array(y_test) 65 x_test = np.reshape(x_test, (x_test.shape[0], 50, 1)) 66 67 68 model = tf.keras.Sequential([ 69 SimpleRNN(80, return_sequences=True), 70 Dropout(0.2), 71 SimpleRNN(100), 72 Dropout(0.2), 73 Dense(1) 74 ]) 75 76 model.compile(optimizer=tf.keras.optimizers.Adam(0.01), 77 loss='mean_squared_error') 78 79 checkpoint_save_path = "./checkpoint/rnn_stock.ckpt" 80 81 if os.path.exists(checkpoint_save_path + '.index'): 82 print("--------load the model-----------") 83 model.load_weights(checkpoint_save_path) 84 85 cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_save_path, 86 save_weights_only=True, 87 save_best_only=True, 88 monitor='val_loss') 89 90 history = model.fit(x_train, y_train, batch_size=32, epochs=15, validation_data=(x_test, y_test), validation_freq=1, 91 callbacks=[cp_callback]) 92 93 model.summary() 94 95 with open("./weights.txt", 'w') as f: 96 for v in model.trainable_variables: 97 f.write(str(v.name) + '\n') 98 f.write(str(v.shape) + '\n') 99 f.write(str(v.numpy()) + '\n') 100 101 102 103 loss = history.history['loss'] 104 val_loss = history.history['val_loss'] 105 106 plt.plot(loss, label='Training Loss') 107 plt.plot(val_loss, label='Validation Loss') 108 plt.title('TRaining an Validation Loss') 109 plt.legend() 110 plt.show() 111 112 113 predicted_stock_price = model.predict(x_test) 114 115 predicted_stock_price = sc.inverse_transform(predicted_stock_price) 116 117 real_stock_price = sc.inverse_transform(test_set[50:]) 118 119 plt.plot(real_stock_price, color='red', label='Pingan Stock Price') 120 plt.plot(predicted_stock_price, color='blue', label='Predicted Pingan Stock Price') 121 plt.xlabel('Time') 122 plt.ylabel('Pingan Stock Price') 123 plt.legend() 124 plt.show() 125 126 127 # calculate MSE 均方误差 ---> E[(预测值-真实值)^2] (预测值减真实值求平方后求均值) 128 mse = mean_squared_error(predicted_stock_price, real_stock_price) 129 # calculate RMSE 均方根误差--->sqrt[MSE] (对均方误差开方) 130 rmse = math.sqrt(mean_squared_error(predicted_stock_price, real_stock_price)) 131 # calculate MAE 平均绝对误差----->E[|预测值-真实值|](预测值减真实值求绝对值后求均值) 132 mae = mean_absolute_error(predicted_stock_price, real_stock_price) 133 print('均方误差: %.6f' % mse) 134 print('均方根误差: %.6f' % rmse) 135 print('平均绝对误差: %.6f' % mae)