from __future__ import division
from __future__ import print_function  
import numpy as np
import pandas as pd
import matplotlib.pylab as plt
import seaborn as sns
import tensorflow as tf
from tensorflow.python.ops import rnn, rnn_cell
import tushare as ts

def getData(id,start,end,num,flag):
    df = ts.get_hist_data(id,start,end)
    #df = (df-np.sum(df)/len(df))/(np.std(df))
    if(flag=="true"):
        df = df[1:num]
    else:
        df = df[:num]
    df1 = np.array(df)
    #df2 = np.array(df.index)
    
    ##df = df.T
    x = []
    for i in range(len(df1)):
        #temp = np.append(df2[i],df1[i])
        temp = df1[i]
        newresult = []
        for item in temp:
            newresult.append(item)
        x.append(newresult)
    x.reverse()
    return x


def getDataR(id,start,end,num):
    df = ts.get_hist_data(id,start,end)
    df1 = np.array(df)
    x = []
    for i in range(len(df1)):
        temp = df1[i]
        newresult = []
        for item in temp:
            newresult.append(item)
        x.append(newresult)
    
    P=df['close']
    #ʵ������û��end��һ������ݣ�������Ԥ��δ��һ����������ڵ����̼�
    templist=(P.shift(1)-P)/P
    templist = templist[:num]
    templist = np.array(templist)
    templist = templist.tolist()
    templist.reverse()
    tempDATA = []
    for i in range(len(templist)):
        if((i+1)%10!=0):
            pass
        else:
            if(templist[i]>0):
                #tempDATA.append(templist[i])
                tempDATA.append([1,0,0])
            elif(templist[i]<=0):
                #tempDATA.append(templist[i])
                tempDATA.append([0,1,0])
            else:
                #tempDATA.append(templist[i])
                tempDATA.append([0,0,1])
            
    y=tempDATA
    return y

#df_sh = ts.get_sz50s()['code']
df_sh =["600016"]
fac = []
ret = []
facT = []
retT = []
predFAC = []
for ishare in df_sh:
    #ȡ�����260������
    newfac = getData(ishare,'2008-07-22','2016-08-01',601,"true")
    newret = getDataR(ishare,'2008-07-22','2016-08-01',601)
    #fac.append(newfac)
    for i in range(len(newfac)):
        fac.append(newfac[i])
    for i in range(len(newret)):
        ret.append(newret[i])
    
    newfacT = getData(ishare,'2016-08-01','2017-01-19',101,"true")
    newretT = getDataR(ishare,'2016-08-01','2017-01-19',101)
    #fac.append(newfac)
    for i in range(len(newfacT)):
        facT.append(newfacT[i])
    for i in range(len(newretT)):
        retT.append(newretT[i])
    
    newpredFAC = getData(ishare,'2016-08-01','2017-01-20',11,"false")
    for i in range(len(newpredFAC)):
        predFAC.append(newpredFAC[i])

fac = np.array(fac)
ret = np.array(ret)
meanfac = np.sum(fac, axis=0)/len(fac)
stdfac = np.std(fac, axis=0)
fac = (fac-meanfac)/stdfac

facT = np.array(facT)
retT = np.array(retT)
facT = (facT-meanfac)/stdfac


newf = []
newfa = []
for i in range(len(fac)):
    if((i+1)%10!=0):
        newf.append(fac[i])
    else:
        newf.append(fac[i])
        newfa.append(newf)
        newf = []
fac = np.array(newfa)
newfT = []
newfaT = []
for i in range(len(facT)):
    if((i+1)%10!=0):
        newfT.append(facT[i])
    else:
        newfT.append(facT[i])
        newfaT.append(newfT)
        newfT = []
facT = np.array(newfaT)

predFAC = (predFAC-meanfac)/stdfac

learning_rate = 0.001
batch_size = 10
print(int(fac.shape[0]))
training_iters = int(fac.shape[0]/batch_size)
display_step = 10

# Network Parameters
n_input = 14
n_steps = 10
n_hidden = 1024
n_classes = 3
dropout = 0.8
# tf Graph input
x = tf.placeholder('float',[None, n_steps, n_input])
y = tf.placeholder('float',[None, n_classes])
keep_prob = tf.placeholder(tf.float32) #dropout (keep probability)


# �������
def conv2d(name, l_input, w, b):
    return tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(l_input, w, strides=[1, 1, 1, 1], padding='SAME'),b), name=name)

# ����²�������
def max_pool(name, l_input, k):
    return tf.nn.max_pool(l_input, ksize=[1, k, k, 1], strides=[1, k, k, 1], padding='SAME', name=name)

# ��һ������
def norm(name, l_input, lsize=4):
    return tf.nn.lrn(l_input, lsize, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name=name)

# ������������ 
def alex_net(_X, _weights, _biases, _dropout):
    # ����תΪ����
    _X = tf.reshape(_X, shape=[-1, 10, 14, 1])

    # �����
    conv1 = conv2d('conv1', _X, _weights['wc1'], _biases['bc1'])
    # �²�����
    pool1 = max_pool('pool1', conv1, k=2)
    # ��һ����
    norm1 = norm('norm1', pool1, lsize=4)
    # Dropout
    norm1 = tf.nn.dropout(norm1, _dropout)

    # ���
    conv2 = conv2d('conv2', norm1, _weights['wc2'], _biases['bc2'])
    # �²���
    pool2 = max_pool('pool2', conv2, k=2)
    # ��һ��
    norm2 = norm('norm2', pool2, lsize=4)
    # Dropout
    norm2 = tf.nn.dropout(norm2, _dropout)

    # ���
    conv3 = conv2d('conv3', norm2, _weights['wc3'], _biases['bc3'])
    # �²���
    pool3 = max_pool('pool3', conv3, k=2)
    # ��һ��
    norm3 = norm('norm3', pool3, lsize=4)
    # Dropout
    norm3 = tf.nn.dropout(norm3, _dropout)

    # ȫ���Ӳ㣬�Ȱ�����ͼתΪ����
    dense1 = tf.reshape(norm3, [-1, _weights['wd1'].get_shape().as_list()[0]]) 
    dense1 = tf.nn.relu(tf.matmul(dense1, _weights['wd1']) + _biases['bd1'], name='fc1') 
    # ȫ���Ӳ�
    dense2 = tf.nn.relu(tf.matmul(dense1, _weights['wd2']) + _biases['bd2'], name='fc2') # Relu activation

    # ���������
    out = tf.matmul(dense2, _weights['out']) + _biases['out']
    return out

# �洢���е��������
weights = {
    'wc1': tf.Variable(tf.random_normal([3, 3, 1, 64])),
    'wc2': tf.Variable(tf.random_normal([3, 3, 64, 128])),
    'wc3': tf.Variable(tf.random_normal([3, 3, 128, 256])),
    'wd1': tf.Variable(tf.random_normal([1024, 1024])),
    'wd2': tf.Variable(tf.random_normal([1024, 1024])),
    'out': tf.Variable(tf.random_normal([1024, n_classes]))
}
biases = {
    'bc1': tf.Variable(tf.random_normal([64])),
    'bc2': tf.Variable(tf.random_normal([128])),
    'bc3': tf.Variable(tf.random_normal([256])),
    'bd1': tf.Variable(tf.random_normal([1024])),
    'bd2': tf.Variable(tf.random_normal([1024])),
    'out': tf.Variable(tf.random_normal([n_classes]))
}

# ����ģ��
pred = alex_net(x, weights, biases, keep_prob)

# ������ʧ������ѧϰ����
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(pred, y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

# ��������
correct_pred = tf.equal(tf.argmax(pred,1), tf.argmax(y,1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

# ��ʼ�����еĹ������
init = tf.global_variables_initializer()

# ����һ��ѵ��
with tf.Session() as sess:
    sess.run(init)
    for tr in range(100):
    #for tr in range(3):
        for i in range(int(len(fac)/batch_size)):
            batch_x = fac[i*batch_size:(i+1)*batch_size].reshape([batch_size,n_steps,n_input])
            batch_y = ret[i*batch_size:(i+1)*batch_size].reshape([batch_size,n_classes])
            sess.run(optimizer,feed_dict={x:batch_x,y:batch_y,keep_prob:dropout})
            if(i%50==0):
                print(i,'----',(int(len(fac)/batch_size)))
        loss, acc = sess.run([cost, accuracy], feed_dict={x: batch_x,y: batch_y, keep_prob:0.8})
        print("Iter " + str(tr*batch_size) + ", Minibatch Loss= " +"{:.26f}".format(loss) + ", Training Accuracy= " +"{:.26f}".format(acc))
    print("Optimization Finished!") 
    # ������Ծ���
    print("Accuracy in data set")
    test_data = fac[:batch_size].reshape([batch_size,n_steps,n_input])
    test_label = ret[:batch_size].reshape([batch_size,n_classes])
    loss, acc = sess.run([cost, accuracy], feed_dict={x: test_data,y: test_label, keep_prob:1.})
    print("Accuracy= " +"{:.26f}".format(acc))
    
    print("Accuracy out of data set")
    test_dataT = facT[:len(facT)].reshape([len(facT),n_steps,n_input])
    test_labelT = retT[:len(facT)].reshape([len(facT),n_classes])
    loss, acc = sess.run([cost, accuracy], feed_dict={x: test_dataT,y: test_labelT, keep_prob:1.})
    print("Accuracy= " +"{:.26f}".format(acc))
    
    pred_dataT = predFAC[:batch_size].reshape([1,n_steps,n_input])
    pred_lable = sess.run([pred],feed_dict={x: pred_dataT, keep_prob:1.})
    list_lable = pred_lable[0][0]
    maxindex = np.argmax(list_lable)
    #print("Predict_label is " + str(pred_lable[0][0]))
    if(maxindex==0):
        print("up")
    else:
        print("down")
    sess.close()

快疯了,也存在过拟合问题,做个笔记,以后慢慢完善

posted on 2017-01-22 18:07  薄樱  阅读(740)  评论(0编辑  收藏  举报