张 永 一个梦想自由的程序员

——————————————— 让科技和智能使人更便捷 ———————————————
  博客园  :: 首页  :: 新随笔  :: 联系 :: 订阅 订阅  :: 管理

研究 -- 很多时候,就是证伪

Posted on 2018-01-19 10:21  hylas  阅读(534)  评论(0编辑  收藏  举报

有个想法:
用期货交易数据的 10个tik 信息,去检验3分钟后,价格波动是否有关系。  或者说神经网络能否识别。  

结果:
花了两天时间,写代码,提取数据, 写网络模型, 训练。
证明结果:
1.网络模型不能有效识别 10个tik中的潜在的特征,并作出有效判断。 
2.或者说  3分钟后的结果,与10个tik中的特征 是无关的。 


代码(数据提取):

# encoding: UTF-8

import pandas as pd
import numpy as np
import  time
import os
import sys
import copy

file = '/home/hylas/dev/data/ru05_20171208.csv'


# file ='d:/test/ru09_20171219.csv'

# 本程序主要功能就是生成 tik10x3miny    X,y 数据用于网络预测,  每天的数据提出出X,y 保存到 .h5 文件
#  两组数据, 一组是  5tik , 3分钟后的dim 做为 Y  , dim >=10 为0, <=-10 为1, 其它过滤
#             一组是 10tik , 3分钟后的dim 做为 Y 同上

# tik 定义  [index, price, vol, amount, bidprice, bvol, askprice,askvol, openvol, closevol, ,type1,type2 ]
# X 定义   tik x5   or  tik x10
# Y 定义   dim >=10 为0, <=-10 为1, 其它过滤


class tik10x3min():

    srcpath = None
    resultpath ='./temp/'
    tikcount=0
    tiklist=[]

    #tiklist_df = None
    Xy_file =[]
    curFile = None
    Xdata=None
    ydata=None

    def __init__(self):
        pass

    def initData(self):
        self.tikcount = 0
        self.tiklist = []
        self.yDimList =[]
        self.Xdata = None
        self.ydata = None

        self.Xy_file = []
        self.curFile = None
        pass

    #   0     1       2    3        4        5     6         7      8         9         10      11
    # [index, price, vol, amount, bidprice, bvol, askprice, askvol, openvol, closevol,, type1, type2]
    #计算  openvol, closevol,, type1, type2
    def satype(self,lasttik, curtik ):
        #newtik = copy.deepcopy( curtik )
        if( curtik[1] >=  lasttik[6]) :
            curtik[10] = 1
        if (curtik[1] <= lasttik[4]):
            curtik[10] = -1
        curtik[8]  =   (curtik[2] + curtik[3] )/2
        curtik[9] = (curtik[2] - curtik[3]) / 2

        if( curtik[8] == 0 ): #双平
            curtik[11] = 1

        if( 0 == curtik[9] ): #双开
            curtik[11] = 2

        if( curtik[10] == 1 and  curtik[8] > curtik[9] ): #多开
            curtik[11] = 3

        if( curtik[10] == 1 and  curtik[8] < curtik[9] ): #空平
            curtik[11] = 4

        if( curtik[10] == 1 and  curtik[8] == curtik[9] ): #多换
            curtik[11] = 5

        if( curtik[10] == -1 and  curtik[8] > curtik[9] ): #空开
            curtik[11] = 6

        if( curtik[10] == -1 and  curtik[8] < curtik[9] ): #多平
            curtik[11] = 7

        if( curtik[10] == -1 and  curtik[8] == curtik[9] ): #空换
            curtik[11] = 8

        return curtik

        pass

    def dotik(self,tik):

        pass

    def dotiklist( self ):

        data = np.array( self.tiklist )
        print data.shape

        print 'for tiklist: '

        for i in range(10, data.shape[0]  ):
            y = self.yDimList[i]
            if( y>-10 and  y <10 ):
                continue
            #print y
            #print y
            y = y/abs(y)
            x_data_rc = self.tiklist[ i-10:i ]
            x_data_rc = np.array( x_data_rc )
            lastprice = self.tiklist[ i ][1]
            #print x_data_rc
            opNumber = np.array([ 0,lastprice,0,0,lastprice,0,lastprice,0,0,0,0,0 ], dtype=int)
            x_data_rc = x_data_rc - opNumber
            #print x_data_rc
            x_data_rc = x_data_rc.reshape( x_data_rc.shape[0]*x_data_rc.shape[1] )
            #print x_data_rc

            #self.Xdata.append( x_data_rc.tolist() )
            #self.ydata.append( [y] )
            y_data_rc = np.array( [y] )
            if(self.Xdata is None):
                self.Xdata = x_data_rc #np.array( x_data_rc.tolist()  )
                self.ydata = y_data_rc
            else:
                self.Xdata =  np.vstack((self.Xdata , x_data_rc))
                self.ydata = np.vstack((self.ydata,  y_data_rc  ))

            pass
        if(self.Xdata is None):
        return 
        print  self.Xdata.shape
        print  self.ydata.shape
        print self.Xdata[0:10]
        print self.ydata[0:20]


        pass

    def dofile(self, file):
        self.initData()
        #self.bsM.init()
        try:
            df = pd.read_csv(file, header=0, encoding='gbk')
        except Exception, e:
            return
        self.curFile = file
        self.tiklist_df = df

        data = np.array(df)
        print data.shape

        print 'for file: '

        for i in range(1, data.shape[0] - 360 ):

            lasttik = [i-1,data[i-1, 3], data[i-1, 5], data[i-1, 7], data[i-1, 8],data[i-1, 9],data[i-1, 10],data[i-1, 11], 0, 0, 0, 0 ]
            curtik = [i,data[i, 3], data[i, 5], data[i, 7], data[i, 8],data[i, 9],data[i, 10],data[i, 11],0,0,0,0 ]
            curtik = self.satype(lasttik, curtik )
            self.tiklist.append(  curtik )
            self.yDimList.append(  (data[i+360, 3]  -  data[i, 3] )   /5  )

            #self.dotik(tik)

            #[index, price, vol, amount, bidprice, bvol, askprice, askvol, openvol, closevol,, type1, type2]

        pass

        self.dotiklist()
    if(self.Xdata is None):
        return 

        #保存到文件
        srcfile = os.path.basename( file )
        fileflag = srcfile[0:-4]
        destFile = self.resultpath + 'tik10x3min_X_'+ fileflag+'.h5'

        df = pd.DataFrame( self.Xdata )  # X.reshape( X.shape[0], 120*120   )
        print destFile
        df.to_hdf(destFile, 'data')

        df = pd.DataFrame(self.ydata)
        df.to_hdf(destFile, 'label')
        self.Xy_file.append( destFile )

    def setPath(self,srcpath, resultpath):
        self.srcpath = srcpath
        self.resultpath = resultpath
        pass
    
    def dopath(self, srcpath, resultpath ):
        self.setPath(  srcpath, resultpath)
        rootdir = srcpath
        list = os.listdir(rootdir)  # 列出文件夹下所有的目录与文件

        for i in range(0, len(list)):
            print ('%d / %d' % (i, len(list)))
            path = os.path.join(rootdir, list[i])
            if os.path.isfile(path) == False:
                    continue
            # 你想对文件的操作
            print path
            self.dofile( path )
        pass

        xy_df = pd.DataFrame( self.Xy_file  )
        xy_df.to_csv( resultpath +'data.txt')
        pass

def test():
    data = [ [1,2,3],[4,5,6],[7,8,9] ]
    xdata = np.array( data  )
    print xdata


    a2 = np.array([1, 1, 3], dtype=int)
    xdata = xdata - a2
    print xdata

    pass


if __name__ == "__main__":
    sys_code_type = sys.getfilesystemencoding()

    test()

    model = tik10x3min()
    #model.dofile( file )
    # /home/hylas/dev/data/ru/20171205
    #model.dopath('/home/hylas/dev/data/ru/20171205/', './temp/')
    #model.dopath('/home/hylas/dev/data2/futuretik/ru/ru2015/', '/home/hylas/dev/data2/futuretik/ru/goodmin/ru2015X_goodmin_tik/')
    model.dopath('/home/hylas/dev/data2/futuretik/ru/ru2015/','/home/hylas/dev/data2/futuretik/ru/tik10x3min/')
    # /home/hylas/dev/data2/futuretik/ru/goodmin/ru2017


    #model.dopath('D:/test/rufile/','D:/test/result/')

 



代码(网络训练):

# encoding: UTF-8

from sklearn.datasets import fetch_mldata
import pandas as pd
import numpy as np
import  time

import sys
sys.path.append("/home/hylas/dev/py/project/lib/hyNN/")
#sys.path.append("../../lib/hyNN/")
#sys.path.append("..")
import tool
from tool.dataxdo import *
from tool.dfdo import *
from tool.datadraw import *
from tool.imgdo import *

from ML.CNN import *
from ML.MLP import *
from ML.LSTMer import *

#sys.path.append("/home/hylas/dev/py/project/lib/")
#import common
#from common.bsMonitor import *

import os
import sys
from sklearn import datasets
from keras.utils import np_utils




#读取文件, 形成X,y
#把X,y 放到 MLP   LS他M 里面识别


class runTik10x3miny():
    X = None
    y = None

    def __init__(self):
        print 'runTik10x3miny init'

        pass

    def dofile(self, path):
        df_X = pd.read_hdf(path, 'data')
        df_y = pd.read_hdf(path, 'label')

        #print df_y
        df_y[[0]] = df_y[[0]].astype(int)

        df_y[0] = df_y[0].map({ -1:0,1:1 })

        #print df_X[0:100]
        #print df_y
        npX = np.array(df_X)
        npY = np.array(df_y)

        if(self.X is None ):
            self.X = npX
            self.y = npY
        else:
            if( self.X.shape[1] != npX.shape[1] ):
                print ' self.X.shape[1] != npX.shape[1]  '
                print self.X.shape[1], npX.shape[1]
                return
                pass
            self.X = np.vstack((self.X , npX ))
            self.y = np.vstack((self.y,  npY ))
        pass


    def loadXyFromDiskPath(self, datapath ):

        rootdir = datapath
        list = os.listdir(rootdir)  # 列出文件夹下所有的目录与文件

        for i in range(0, len(list)):
            print ('%d / %d' % (i, len(list)))
            path = os.path.join(rootdir, list[i])
            if os.path.isfile(path) == False:
                    continue
            fileEx = path[-3:]
            #print fileEx
            if( '.h5' != fileEx ):
                continue
            # 你想对文件的操作
            print path
            self.dofile( path )
            #break
        pass
        pass
        return self.X, self.y


    def makedata(self):
        destFile = '/home/hylas/dev/data2/futuretik/ru/modeh5data/tik10x3miny.h5'
        if (os.path.exists(destFile) == True  ):
            X = pd.read_hdf(destFile, 'data')
            y = pd.read_hdf(destFile, 'label')

            X = np.array(X)
            y = np.array(y)
            #X.reshape(X.shape[0], 120 , 120, 1)

        else:

            dd = dfdo()
            X, y = self.loadXyFromDiskPath('/home/hylas/dev/data2/futuretik/ru/tik10x3min/')
            print  y[0:100]
            y =  np_utils.to_categorical(y, num_classes=2)
            X, y = dd.datadengfen(X, y)

            df = pd.DataFrame( X )  #X.reshape( X.shape[0], 120*120   )
            df.to_hdf(destFile, 'data')

            df = pd.DataFrame(y)
            df.to_hdf(destFile, 'label')

        return X,y
        pass


    def do(self ):
        model = MLP()

        X,y = self.makedata()


        print X.shape
        print y.shape
        print X[0:10]
        print y[0:10]
        #X = X.reshape( X.shape[0], 120*120   )
        model.simple_result(X,y)
        #model.simple_result(X, y, input_dim=(120, 120, 1), nClass=2)
        pass

    def do2(self ):
        model = LSTMer()

        X,y = self.makedata()


        print X.shape
        print y.shape
        print X[0:10]
        print y[0:10]
        #X = X.reshape( X.shape[0], 120*120   )
        model.simple_result(X,y,timesteps =10, data_dim=12)
        #model.simple_result(X, y, input_dim=(120, 120, 1), nClass=2)
        pass

if __name__ == "__main__":
    sys_code_type = sys.getfilesystemencoding()
    run = runTik10x3miny()
    run.do()

 



训练结果:

1263074/1263074 [==============================] - 22s - loss: 0.6923 - acc: 0.5092 - val_loss: 0.6924 - val_acc: 0.5133
Epoch 67/68
1263074/1263074 [==============================] - 22s - loss: 0.6924 - acc: 0.5097 - val_loss: 0.6924 - val_acc: 0.5117
Epoch 68/68
1263074/1263074 [==============================] - 22s - loss: 0.6923 - acc: 0.5093 - val_loss: 0.6921 - val_acc: 0.5102
evaluate acc: 
150144/155936 [===========================>..] - ETA: 0s[0.6920651392649998, 0.50743253642520003]

 


欢迎讨论(QQ群):   375129936