之前做了一个读取TDX数据的代码,如下:
def stock_lc5(self,filepath, name ): file_path=filepath+"\\" + name file_size = os.path.getsize(file_path) pos=0 if(file_size>16000): pos=file_size-16000 with open(file_path, 'rb') as f: f.seek(pos, os.SEEK_SET) loc=0 while True: # print ("loc",loc) li2 = f.read(32) # 读取一个5分钟数据 if not li2: # 如果没有数据了,就退出 break data2 = struct.unpack('HHffffllf', li2) # 解析数据 date_str = self.get_date_str(data2[0], data2[1]) # 解析日期和分时 data2_list = list(data2) # 将数据转成list data2_list[1] = date_d # 将list二个元素更改为日期 时:分 del (data2_list[0]) # 删除list第一个元素 data2_list.append(date_str) df.loc[loc]=data2_list loc+=1 print(df) df.to_csv(file_path+".csv") print(name," convert is done\n")
我去,那个速度,酸爽,想想还是用结构化的来读比较快
def stock_lc5(self,filepath, name): file_path=filepath+"\\" + name file_size = os.path.getsize(file_path) pos=0 dtype = np.dtype([ ("date_int", np.uint16), ("time_int", np.uint16), ("open", np.float32), ("high", np.float32), ("low", np.float32), ("close", np.float32), ("amount", np.int32), ("volume", np.int32), ("other", np.float32), ]) if(file_size>16000): pos=file_size-16000 with open(file_path, 'rb') as f: f.seek(pos, os.SEEK_SET) data = np.fromfile(f, dtype=dtype) df=pd.DataFrame(data,columns=["date_int","time_int","open","high","low","close","amount","volume","other"]) df['eob']= df.apply(lambda row:self.get_date_str(row["date_int"],row ["time_int"]), axis=1) df.to_csv(file_path+".csv") print(name," convert is done\n")
这速度,真的爽爆了