LSTM运用在股票数据

最近发现了tensorflow，在琢磨里面的LSTM，并简单的运用到了股票的数据，望各位大牛帮小妹更正
下面是源代码
from __future__ import division
from __future__ import print_function  
import numpy as np
import pandas as pd
import matplotlib.pylab as plt
import seaborn as sns
import tensorflow as tf
from tensorflow.python.ops import rnn, rnn_cell
import tushare as ts

def getData(id,start,end,num):
    df = ts.get_hist_data(id,start,end)
    df = (df-np.sum(df)/len(df))/(np.std(df))
    df = df[:num]
    df1 = np.array(df)
    #df2 = np.array(df.index)
    
    ##df = df.T
    x = []
    for i in range(len(df1)):
        #temp = np.append(df2[i],df1[i])
        temp = df1[i]
        newresult = []
        for item in temp:
            newresult.append(item)
        x.append(newresult)
    x.pop()
    x.reverse()
    return x


def getDataR(id,start,end,num):
    df = ts.get_hist_data(id,start,end)
    df = (df-np.sum(df)/len(df))/(np.std(df))
    df = df[:num]
    df1 = np.array(df)
    #df2 = np.array(df.index)
    
    ##df = df.T
    x = []
    for i in range(len(df1)):
        #temp = np.append(df2[i],df1[i])
        temp = df1[i]
        newresult = []
        for item in temp:
            newresult.append(item)
        x.append(newresult)
    x.pop()
    
    
    P=df['close']
    templist=(P-P.shift(-5))/P.shift(-5)
    tempDATA = []
    #1  0.01  0
    ix=0
    for indextemp in templist:
        if(ix%5==0):
            if indextemp>0:
                tempDATA.append([1,0,0])
            elif(indextemp<=0):
                tempDATA.append([0,1,0])
            else:
                tempDATA.append([0,0,1])
            ix += 1
        else:
            ix += 1
    tempDATA.pop()
    y=tempDATA
    y.reverse()
    return y

df_sh = ts.get_sz50s()['code']
#train dataset
fac = []
ret = []
#test dataset
facT = []
retT = []
for ishare in df_sh:
    newfac = getData(ishare,'2008-07-22','2016-07-22',261)
    newret = getDataR(ishare,'2008-07-22','2016-07-22',261)
    #fac.append(newfac)
    for i in range(len(newfac)):
        fac.append(newfac[i])
    for i in range(len(newret)):
        ret.append(newret[i])
    
    newfacT = getData(ishare,'2016-08-01','2017-01-10',31)
    newretT = getDataR(ishare,'2016-08-01','2017-01-10',31)
    #fac.append(newfac)
    for i in range(len(newfacT)):
        facT.append(newfacT[i])
    for i in range(len(newretT)):
        retT.append(newretT[i])


newf = []
newfa = []
for i in range(len(fac)):
    if((i+1)%5!=0):
        newf.append(fac[i])
    else:
        newf.append(fac[i])
        newfa.append(newf)
        newf = []
fac = np.array(newfa)

ret = np.array(ret)

newfT = []
newfaT = []
for i in range(len(facT)):
    if((i+1)%5!=0):
        newfT.append(facT[i])
    else:
        newfT.append(facT[i])
        newfaT.append(newfT)
        newfT = []
facT = np.array(newfaT)

retT = np.array(retT)

print("**")
print(len(facT))
print(len(retT))
print("**")


fac = np.array(fac)
ret = np.array(ret)
learning_rate = 0.001
#Number of images entered into the model
batch_size = 10
training_iters = int(fac.shape[0]/batch_size)
display_step = 10

# Network Parameters
#Factors
n_input = 14
#time steps
n_steps = 5
n_hidden = 1024
#types of results
n_classes = 3

# tf Graph input
x = tf.placeholder('float',[None, n_steps, n_input])
y = tf.placeholder('float',[None, n_classes])

# tf Graph input
x = tf.placeholder("float", [None, n_steps, n_input])
y = tf.placeholder("float", [None, n_classes])

# Define weights
weights = {
    # Hidden layer weights => 2*n_hidden because of forward + backward cells
    'out': tf.Variable(tf.random_normal([2*n_hidden, n_classes]))
}
biases = {
    'out': tf.Variable(tf.random_normal([n_classes]))
}

def BiRNN(x, weights, biases):

    # Prepare data shape to match `bidirectional_rnn` function requirements
    # Current data input shape: (batch_size, n_steps, n_input)
    # Required shape: 'n_steps' tensors list of shape (batch_size, n_input)

    # Permuting batch_size and n_steps
    x = tf.transpose(x, [1, 0, 2])
    # Reshape to (n_steps*batch_size, n_input)
    x = tf.reshape(x, [-1, n_input])
    # Split to get a list of 'n_steps' tensors of shape (batch_size, n_input)
    x = tf.split(0, n_steps, x)

    # Define lstm cells with tensorflow
    # Forward direction cell
    lstm_fw_cell = tf.nn.rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0)
    # Backward direction cell
    lstm_bw_cell = tf.nn.rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0)

    # Get lstm cell output
    try:
        outputs, _, _ = tf.nn.bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, x,
                                              dtype=tf.float32)
    except Exception: # Old TensorFlow version only returns outputs not states
        outputs = tf.nn.bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, x,
                                        dtype=tf.float32)

    # Linear activation, using rnn inner loop last output
    return tf.matmul(outputs[-1], weights['out']) + biases['out']
pred = BiRNN(x, weights, biases)

# Define loss and optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(pred, y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

# Evaluate model
correct_pred = tf.equal(tf.argmax(pred,1), tf.argmax(y,1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

# Initializing the variables
init = tf.global_variables_initializer()

# Launch the graph
with tf.Session() as sess:
    sess.run(init)
    step = 1
    for step in range(10):
        for i in range(int(len(fac)/batch_size)):
            batch_x = fac[i*batch_size:(i+1)*batch_size].reshape([batch_size,n_steps,n_input])
            batch_y = ret[i*batch_size:(i+1)*batch_size].reshape([batch_size,n_classes])
            sess.run(optimizer,feed_dict={x:batch_x,y:batch_y})           
            if i % display_step ==0:
                print(i,'----',(int(len(fac)/batch_size)))
        loss, acc = sess.run([cost, accuracy], feed_dict={x: batch_x,y: batch_y})
        print("Iter " + str(step*batch_size) + ", Minibatch Loss= " +    "{:.6f}".format(loss) + ", Training Accuracy= " +  "{:.5f}".format(acc))
    print("Optimization Finished!")   
    # Calculate accuracy for 128 mnist test images
    #test_len = 1280
    print("Accuracy in data set")
    test_data = fac[:batch_size].reshape([batch_size,n_steps,n_input])
    test_label = ret[:batch_size].reshape([batch_size,n_classes])
    print("Testing Accuracy:",         sess.run(accuracy, feed_dict={x: test_data, y: test_label}))
    
    print("Accuracy out of data set")
    test_dataT = facT[:batch_size].reshape([batch_size,n_steps,n_input])
    test_labelT = retT[:batch_size].reshape([batch_size,n_classes])
    print("Testing Accuracy:",         sess.run(accuracy, feed_dict={x: test_dataT, y: test_labelT}))
    
    sess.close()
运行出来的结果是在数据集内准确率为0.8，数据集外准确率为稍小于0.8，但是我没有采取防止过拟合的操作，结果好的有点奇怪
posted on 2017-01-18 09:07 薄樱阅读(8744) 评论(34) 编辑收藏举报