懵懂的菜鸟

Stay hungry,Stay foolish.

导航

joint python文件拼接

 1 # -*- coding:utf-8 -*-
 2 import os
 3 import re
 4 p1=r"([0-9][0-9][AB])\.\w{3}$"
 5 
 6 p2=r"^.+\,(\d{4}).+"
 7 
 8 pattern1=re.compile(p1)
 9 pattern2=re.compile(p2)
10 def get_dir(zz):
11     listdir=[]
12     for filename in os.listdir('./%s'%zz):
13         listdir.append(filename)
14         # print(filename)
15         # print(type(filename))
16     # print(listdir)
17     return listdir
18 
19 def joint_b0(listdir,zz):
20     if not os.path.exists('./new/'):
21         os.makedirs('./new/')
22     fw=open('./new/%s.csv'%(zz),'a')
23     for i in listdir:
24         # print(i)
25         j=0
26         matcher1=re.findall(pattern1,i)
27         fr=open('./%s/%s'%(zz,i))
28         for line in fr.readlines():
29             try:
30                 # print(len(line))
31                 # if(len(line)==1):#判断,跳过第一行
32                 #     continue
33                 if(j==0):#跳过第一行
34                     # print("xixi")
35                     
36                     j=j+1
37                     continue
38                 else:
39                     fw.write(matcher1[0]+'_%s'%j+','+line.strip('\n')+'\n')
40                     j=j+1
41             except:
42                 pass
43     fr.close()
44     fw.close()    
45 
46 def joint_b1(listdir,zz):#单独拼接
47     if not os.path.exists('./new/'):
48         os.makedirs('./new/')
49     fw=open('./new/%s.csv'%(zz),'a')
50     for i in listdir:
51         # print(i)
52         j=1
53         matcher1=re.findall(pattern1,i)
54         fr=open('./%s/%s'%(zz,i))
55         for line in fr.readlines():
56 
57             # print(len(line))
58             if(len(line)==4):
59                 continue
60             else:
61                 fw.write(line)
62                 # pass
63 
64                 
65 
66     fr.close()
67     fw.close()
68 
69 
70 def sort_joint():
71     # dir_list=['b0'],'b3','b4','b2_idsd','b3_idcd'
72 
73 
74     dir_list=['b1']#用于编号和拼接,会在new目录下生成编号号码的文件
75     for i in range(len(dir_list)):
76         listdir=get_dir(dir_list[i])
77         # print(listdir)
78         joint_b0(listdir,dir_list[i])
79 
80 def joint_only():
81     dir_list=['stock2']#用于拼接,将需要拼接的放到stock目录下,会在new目录下生成stock文件,然后根据需要修改名称
82     for i in range(len(dir_list)):
83         listdir=get_dir(dir_list[i])
84         # print(listdir)
85         joint_b1(listdir,dir_list[i])
86 
87 def updata(zz):
88     fr=open('./new/%s'%(zz))
89     fw=open('./new/new.csv','a')
90     for line in fr.readlines():
91         matcher2=re.findall(pattern2,line)
92         if(matcher2):
93             fw.write(matcher2[0]+','+line)
94             
95 
96 if __name__=="__main__":
97     sort_joint()
98     # joint_only()
99     # updata('b4.csv')

 

posted on 2017-08-30 17:32  懵懂的菜鸟  阅读(464)  评论(0编辑  收藏  举报