do_sings_user_threading.py
#!/usr/bin/env python from struct import * import pandas as pd import numpy as np import os import re import pathlib import threading import time def get_new_data(): #pathdir = "G:\\datas of status\\python codes\\data of every months" pathdir = "G:\\datas of status\\python codes\\everyday_data"#新数据原文件存放位置 files = np.array(os.listdir(pathdir)) #file_names = np.char.add(pathdir + "\\", files) file_names =[pathdir+"\\"+f for f in files if re.search('.csv$',f)] df_all = pd.DataFrame() for file in file_names: print(file) df1 = pd.read_csv(file) df_all = df_all.append(df1) #print(df_all) df_all["name"] = df_all["name"].str.replace("SZ","0_") df_all["name"] = df_all["name"].str.replace("SH","1_") df_all["name"] = df_all["name"] + ".dat" df_all["begin"] = df_all["name"].map(str) + df_all["date"].map(str) #print(df_all.columns.values) return df_all def read_file_out(file): f3 = open(file,mode='rb') buf = f3.read() #print(buf) num=len(buf) #print(num) no = num / 8 b = 0 e = 8 items = list() df_all = pd.DataFrame() for i in range(int(no)): date = unpack("I",buf[b:e-4]) con = unpack("f",buf[b+4:e]) data = {"date":date, 'content':con} df1 = pd.DataFrame(data) #print(date) #print(data) #print(df1) df_all = df_all.append(df1) #print(a) #print(c) b = b + 8 e = e + 8 df_all = df_all.reset_index(drop=True) return df_all def chang_to_bin(filew,df_all): #fw = open(filew, mode='wb')#重写模式 fw = open(filew, mode='ab')#追加模式 for i in range(len(df_all)): #print(df_all.loc[i,"date"]) #print(type(df_all.loc[i,"date"])) fw.write(pack("I",int(df_all.loc[i,"date"]))) fw.write(pack("f",df_all.loc[i,"content"])) fw.close() dir_dict = { "z_da" : "signals_user_43", "z_te_da" : "signals_user_44", "f_da" : "signals_user_47", "f_te_da" : "signals_user_48" } dir_dict2 = { "z_xiao" : "signals_user_41", "z_zhong" : "signals_user_42", "z_da" : "signals_user_43", "z_te_da" : "signals_user_44", "f_xiao" : "signals_user_45", "f_zhong" : "signals_user_46", "f_da" : "signals_user_47", "f_te_da" : "signals_user_48" } def dowork(df_only_name): for name in df_only_name:#控件股票数df_only_name[13:15] #print(name) df_t_name = df_new_data[df_new_data["name"] == name] for key in dir_dict.keys(): #if key == "f_zhong": #print(key) pathdir_key = pathdir + "\\" + dir_dict[key] file_t_name = pathdir_key + "\\" + name df_t_name_key =pd.DataFrame(df_t_name,columns = ["date",key]) df_t_name_key["content"] = df_t_name_key[key] df_t_name_key = df_t_name_key.drop(columns=key) df_t_name_key = df_t_name_key.drop_duplicates(subset=["date"], keep="last", inplace=False) #print(df_t_name_key) if not os.path.isfile(file_t_name): pathlib.Path(file_t_name).touch()#创建空文件 '''#以下为与原数据混合处理,同时要修改chang_to_bin里的读文件模式配合使用 df_old = read_file_out(file_t_name) df_old = df_old.drop_duplicates(subset=["date"], keep="last", inplace=False) #print(df_old) df_t_name_key = df_old.append(df_t_name_key,sort = False) df_t_name_key = df_t_name_key.drop_duplicates(subset = ["date"],keep = "last",inplace=False) ''' df_t_name_key = df_t_name_key.reset_index(drop = True) #print(df_t_name_key) chang_to_bin(file_t_name, df_t_name_key) def run(df_only_name1, semaphore): semaphore.acquire() #加锁 dowork(df_only_name1) #time.sleep(1) #print("run the thread:%s\n" % n) semaphore.release() #释放 if __name__ == '__main__': pathdir = "F:\\python\\untitled1\\core\\do_sings_user" # 更新后数据存放位置 # pathdir = "F:\\通达信\\New tdx vip2020.03开心果整合版红顶栏\\New tdx vip2020.03开心果整合版\\T0002\\signals" df_new_data = get_new_data() df_only_name = df_new_data["name"] # df_only = df_only.drop_duplicates(subset=['name'],keep='last',inplace=False) df_only_name = df_only_name.drop_duplicates(keep='last', inplace=False) print(len(df_only_name)) all_nums = len(df_only_name) every_batch = 13 epochs = int(all_nums / every_batch) print(epochs) num_of_thread = 303 #num = 1 semaphore = threading.BoundedSemaphore(num_of_thread) # 最多允许5个线程同时运行 print(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())) for i in range(epochs+1): begin = i*every_batch end = begin +every_batch if all_nums<=end: end = all_nums - 1 df_only_name1 = df_only_name[begin:end] t = threading.Thread(target=run, args=(df_only_name1, semaphore)) t.start() #print(i) print(i,begin,end) print(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())) while threading.active_count() != 1: print(threading.active_count()) time.sleep(10) pass # print threading.active_count() else: print('-----all threads done-----') print(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))