有个想法:
用期货交易数据的 10个tik 信息,去检验3分钟后,价格波动是否有关系。 或者说神经网络能否识别。
结果:
花了两天时间,写代码,提取数据, 写网络模型, 训练。
证明结果:
1.网络模型不能有效识别 10个tik中的潜在的特征,并作出有效判断。
2.或者说 3分钟后的结果,与10个tik中的特征 是无关的。
代码(数据提取):
# encoding: UTF-8 import pandas as pd import numpy as np import time import os import sys import copy file = '/home/hylas/dev/data/ru05_20171208.csv' # file ='d:/test/ru09_20171219.csv' # 本程序主要功能就是生成 tik10x3miny X,y 数据用于网络预测, 每天的数据提出出X,y 保存到 .h5 文件 # 两组数据, 一组是 5tik , 3分钟后的dim 做为 Y , dim >=10 为0, <=-10 为1, 其它过滤 # 一组是 10tik , 3分钟后的dim 做为 Y 同上 # tik 定义 [index, price, vol, amount, bidprice, bvol, askprice,askvol, openvol, closevol, ,type1,type2 ] # X 定义 tik x5 or tik x10 # Y 定义 dim >=10 为0, <=-10 为1, 其它过滤 class tik10x3min(): srcpath = None resultpath ='./temp/' tikcount=0 tiklist=[] #tiklist_df = None Xy_file =[] curFile = None Xdata=None ydata=None def __init__(self): pass def initData(self): self.tikcount = 0 self.tiklist = [] self.yDimList =[] self.Xdata = None self.ydata = None self.Xy_file = [] self.curFile = None pass # 0 1 2 3 4 5 6 7 8 9 10 11 # [index, price, vol, amount, bidprice, bvol, askprice, askvol, openvol, closevol,, type1, type2] #计算 openvol, closevol,, type1, type2 def satype(self,lasttik, curtik ): #newtik = copy.deepcopy( curtik ) if( curtik[1] >= lasttik[6]) : curtik[10] = 1 if (curtik[1] <= lasttik[4]): curtik[10] = -1 curtik[8] = (curtik[2] + curtik[3] )/2 curtik[9] = (curtik[2] - curtik[3]) / 2 if( curtik[8] == 0 ): #双平 curtik[11] = 1 if( 0 == curtik[9] ): #双开 curtik[11] = 2 if( curtik[10] == 1 and curtik[8] > curtik[9] ): #多开 curtik[11] = 3 if( curtik[10] == 1 and curtik[8] < curtik[9] ): #空平 curtik[11] = 4 if( curtik[10] == 1 and curtik[8] == curtik[9] ): #多换 curtik[11] = 5 if( curtik[10] == -1 and curtik[8] > curtik[9] ): #空开 curtik[11] = 6 if( curtik[10] == -1 and curtik[8] < curtik[9] ): #多平 curtik[11] = 7 if( curtik[10] == -1 and curtik[8] == curtik[9] ): #空换 curtik[11] = 8 return curtik pass def dotik(self,tik): pass def dotiklist( self ): data = np.array( self.tiklist ) print data.shape print 'for tiklist: ' for i in range(10, data.shape[0] ): y = self.yDimList[i] if( y>-10 and y <10 ): continue #print y #print y y = y/abs(y) x_data_rc = self.tiklist[ i-10:i ] x_data_rc = np.array( x_data_rc ) lastprice = self.tiklist[ i ][1] #print x_data_rc opNumber = np.array([ 0,lastprice,0,0,lastprice,0,lastprice,0,0,0,0,0 ], dtype=int) x_data_rc = x_data_rc - opNumber #print x_data_rc x_data_rc = x_data_rc.reshape( x_data_rc.shape[0]*x_data_rc.shape[1] ) #print x_data_rc #self.Xdata.append( x_data_rc.tolist() ) #self.ydata.append( [y] ) y_data_rc = np.array( [y] ) if(self.Xdata is None): self.Xdata = x_data_rc #np.array( x_data_rc.tolist() ) self.ydata = y_data_rc else: self.Xdata = np.vstack((self.Xdata , x_data_rc)) self.ydata = np.vstack((self.ydata, y_data_rc )) pass if(self.Xdata is None): return print self.Xdata.shape print self.ydata.shape print self.Xdata[0:10] print self.ydata[0:20] pass def dofile(self, file): self.initData() #self.bsM.init() try: df = pd.read_csv(file, header=0, encoding='gbk') except Exception, e: return self.curFile = file self.tiklist_df = df data = np.array(df) print data.shape print 'for file: ' for i in range(1, data.shape[0] - 360 ): lasttik = [i-1,data[i-1, 3], data[i-1, 5], data[i-1, 7], data[i-1, 8],data[i-1, 9],data[i-1, 10],data[i-1, 11], 0, 0, 0, 0 ] curtik = [i,data[i, 3], data[i, 5], data[i, 7], data[i, 8],data[i, 9],data[i, 10],data[i, 11],0,0,0,0 ] curtik = self.satype(lasttik, curtik ) self.tiklist.append( curtik ) self.yDimList.append( (data[i+360, 3] - data[i, 3] ) /5 ) #self.dotik(tik) #[index, price, vol, amount, bidprice, bvol, askprice, askvol, openvol, closevol,, type1, type2] pass self.dotiklist() if(self.Xdata is None): return #保存到文件 srcfile = os.path.basename( file ) fileflag = srcfile[0:-4] destFile = self.resultpath + 'tik10x3min_X_'+ fileflag+'.h5' df = pd.DataFrame( self.Xdata ) # X.reshape( X.shape[0], 120*120 ) print destFile df.to_hdf(destFile, 'data') df = pd.DataFrame(self.ydata) df.to_hdf(destFile, 'label') self.Xy_file.append( destFile ) def setPath(self,srcpath, resultpath): self.srcpath = srcpath self.resultpath = resultpath pass def dopath(self, srcpath, resultpath ): self.setPath( srcpath, resultpath) rootdir = srcpath list = os.listdir(rootdir) # 列出文件夹下所有的目录与文件 for i in range(0, len(list)): print ('%d / %d' % (i, len(list))) path = os.path.join(rootdir, list[i]) if os.path.isfile(path) == False: continue # 你想对文件的操作 print path self.dofile( path ) pass xy_df = pd.DataFrame( self.Xy_file ) xy_df.to_csv( resultpath +'data.txt') pass def test(): data = [ [1,2,3],[4,5,6],[7,8,9] ] xdata = np.array( data ) print xdata a2 = np.array([1, 1, 3], dtype=int) xdata = xdata - a2 print xdata pass if __name__ == "__main__": sys_code_type = sys.getfilesystemencoding() test() model = tik10x3min() #model.dofile( file ) # /home/hylas/dev/data/ru/20171205 #model.dopath('/home/hylas/dev/data/ru/20171205/', './temp/') #model.dopath('/home/hylas/dev/data2/futuretik/ru/ru2015/', '/home/hylas/dev/data2/futuretik/ru/goodmin/ru2015X_goodmin_tik/') model.dopath('/home/hylas/dev/data2/futuretik/ru/ru2015/','/home/hylas/dev/data2/futuretik/ru/tik10x3min/') # /home/hylas/dev/data2/futuretik/ru/goodmin/ru2017 #model.dopath('D:/test/rufile/','D:/test/result/')
代码(网络训练):
# encoding: UTF-8 from sklearn.datasets import fetch_mldata import pandas as pd import numpy as np import time import sys sys.path.append("/home/hylas/dev/py/project/lib/hyNN/") #sys.path.append("../../lib/hyNN/") #sys.path.append("..") import tool from tool.dataxdo import * from tool.dfdo import * from tool.datadraw import * from tool.imgdo import * from ML.CNN import * from ML.MLP import * from ML.LSTMer import * #sys.path.append("/home/hylas/dev/py/project/lib/") #import common #from common.bsMonitor import * import os import sys from sklearn import datasets from keras.utils import np_utils #读取文件, 形成X,y #把X,y 放到 MLP LS他M 里面识别 class runTik10x3miny(): X = None y = None def __init__(self): print 'runTik10x3miny init' pass def dofile(self, path): df_X = pd.read_hdf(path, 'data') df_y = pd.read_hdf(path, 'label') #print df_y df_y[[0]] = df_y[[0]].astype(int) df_y[0] = df_y[0].map({ -1:0,1:1 }) #print df_X[0:100] #print df_y npX = np.array(df_X) npY = np.array(df_y) if(self.X is None ): self.X = npX self.y = npY else: if( self.X.shape[1] != npX.shape[1] ): print ' self.X.shape[1] != npX.shape[1] ' print self.X.shape[1], npX.shape[1] return pass self.X = np.vstack((self.X , npX )) self.y = np.vstack((self.y, npY )) pass def loadXyFromDiskPath(self, datapath ): rootdir = datapath list = os.listdir(rootdir) # 列出文件夹下所有的目录与文件 for i in range(0, len(list)): print ('%d / %d' % (i, len(list))) path = os.path.join(rootdir, list[i]) if os.path.isfile(path) == False: continue fileEx = path[-3:] #print fileEx if( '.h5' != fileEx ): continue # 你想对文件的操作 print path self.dofile( path ) #break pass pass return self.X, self.y def makedata(self): destFile = '/home/hylas/dev/data2/futuretik/ru/modeh5data/tik10x3miny.h5' if (os.path.exists(destFile) == True ): X = pd.read_hdf(destFile, 'data') y = pd.read_hdf(destFile, 'label') X = np.array(X) y = np.array(y) #X.reshape(X.shape[0], 120 , 120, 1) else: dd = dfdo() X, y = self.loadXyFromDiskPath('/home/hylas/dev/data2/futuretik/ru/tik10x3min/') print y[0:100] y = np_utils.to_categorical(y, num_classes=2) X, y = dd.datadengfen(X, y) df = pd.DataFrame( X ) #X.reshape( X.shape[0], 120*120 ) df.to_hdf(destFile, 'data') df = pd.DataFrame(y) df.to_hdf(destFile, 'label') return X,y pass def do(self ): model = MLP() X,y = self.makedata() print X.shape print y.shape print X[0:10] print y[0:10] #X = X.reshape( X.shape[0], 120*120 ) model.simple_result(X,y) #model.simple_result(X, y, input_dim=(120, 120, 1), nClass=2) pass def do2(self ): model = LSTMer() X,y = self.makedata() print X.shape print y.shape print X[0:10] print y[0:10] #X = X.reshape( X.shape[0], 120*120 ) model.simple_result(X,y,timesteps =10, data_dim=12) #model.simple_result(X, y, input_dim=(120, 120, 1), nClass=2) pass if __name__ == "__main__": sys_code_type = sys.getfilesystemencoding() run = runTik10x3miny() run.do()
训练结果:
1263074/1263074 [==============================] - 22s - loss: 0.6923 - acc: 0.5092 - val_loss: 0.6924 - val_acc: 0.5133 Epoch 67/68 1263074/1263074 [==============================] - 22s - loss: 0.6924 - acc: 0.5097 - val_loss: 0.6924 - val_acc: 0.5117 Epoch 68/68 1263074/1263074 [==============================] - 22s - loss: 0.6923 - acc: 0.5093 - val_loss: 0.6921 - val_acc: 0.5102 evaluate acc: 150144/155936 [===========================>..] - ETA: 0s[0.6920651392649998, 0.50743253642520003]
欢迎讨论(QQ群): 375129936
————————————————————————
让科技和智能使人更便捷 --- 从我做起
————————————————————————