懵懂的菜鸟

Stay hungry,Stay foolish.

导航

python文件操作,读取,修改,合并

  1 # -*- coding:utf-8 -*-
  2 '''
  3 从11c开始提取
  4 '''
  5 import re
  6 import numpy as np
  7 import os
  8 year = '17A'
  9 ss="./data/edmd/"
 10 # filename=ss+"/EDMDI1.17A"
 11 try:
 12     os.rename(ss+"/EDMDI1.17A",ss+"/EDMDI1.txt")
 13 except:
 14     pass
 15 f1=open(ss+"/EDMDI1.txt")
 16 p1=re.compile(r"^(?:\s{3}|X\s{2}|\W\s{2})([A-Z]{6})\s.+\n")
 17 list_tag=list()
 18 for line in f1.readlines():
 19     # print(line)
 20     match1=re.findall(p1,line)
 21     # print(match1)
 22     if match1:
 23         for j in match1:
 24             list_tag.append(j)
 25 # filename_w1= ss+'%s'%list_tag[i]
 26 print(list_tag)
 27 for i in range(len(list_tag)):
 28     try:
 29         os.rename(ss+'%s_D.17A'%list_tag[i],ss+'%s.txt'%list_tag[i])
 30     except:
 31         break
 32 
 33     filename_w= ss+'/new/%s_w.txt'%list_tag[i]
 34     if os.path.exists(filename_w):
 35         os.remove(filename_w)
 36     # import os
 37 
 38     # os.rename('./data/CODECO_D.02A','./data/CODECO_D.txt')
 39     filename_r = ss+'%s.txt'%list_tag[i]  # txt文件和当前脚本在同一目录下,所以不用写具体路径
 40     #00010   UNH Message header      M   1
 41     pattern1   =  re.compile(r"(^\d{5})\s{3}[A-Z]{3}.+[CM]\s{3}\d*\s{1,}\|{0,}\n")#00010
 42     pattern1_2 =  re.compile(r"^\d{5}\s{3}([A-Z]{3}).+[CM]\s{3}\d*\s{1,}\|{0,}\n")#UNH
 43     pattern1_3 =  re.compile(r"^\d{5}\s{3}[A-Z]{3}(.+)[CM]\s{3}\d*\s{1,}\|{0,}\n")#Message header
 44     pattern1_4 =  re.compile(r"^\d{5}\s{3}[A-Z]{3}.+([CM])\s{3}\d*\s{1,}\|{0,}\n")#C
 45     pattern1_5 =  re.compile(r"^\d{5}\s{3}[A-Z]{3}.+[CM]\s{3}(\d*)\s{1,}\|{0,}\n")#1
 46     #pattern2 = re.compile(r"^\d{5}.+Segment\sgroup\s(\d)*.+[CM]\s{3}\d*\-+\+\n" )#+结尾
 47     #00050       ---- Segment group 1  ------------------ C   9----------------+
 48     pattern4_1 = re.compile(r"(^\d{5}).+Segment\sgroup\s\d*.+[CM]\s{3}\d*.+\n")
 49     pattern4_2 = re.compile(r"^\d{5}.+Segment\sgroup\s(\d*).+[CM]\s{3}\d*.+\n")
 50     pattern4_3 = re.compile(r"^\d{5}.+Segment\sgroup\s\d*.+([CM])\s{3}\d*.+\n")
 51     pattern4_4 = re.compile(r"^\d{5}.+Segment\sgroup\s\d*.+[CM]\s{3}(\d*).+\n")
 52     #匹配每组的单独结尾的一行即没有Segment group的以+、+|、+||、+|||……结尾的的每个字段
 53     #如00280   RNG Range details                            C   1---------------+|
 54     pattern5_1 = re.compile(r"(^\d{5})\s{3}[A-Z]{3}.+[CM]\s{3}\d*\-+\+{1,10}\|{0,20}\n" )
 55     pattern5_2 = re.compile(r"^\d{5}\s{3}([A-Z]{3}).+[CM]\s{3}\d*\-+\+{1,10}\|{0,20}\n" )
 56     pattern5_3 = re.compile(r"^\d{5}\s{3}[A-Z]{3}.+([CM])\s{3}\d*\-+\+{1,10}\|{0,20}\n" )
 57     pattern5_4 = re.compile(r"^\d{5}\s{3}[A-Z]{3}.+[CM]\s{3}(\d*)\-+\+{1,10}\|{0,20}\n" )
 58     #以下是确定层级关系
 59     #匹配每组的单独结尾的一行即没有Segment group的以+、+|、+||、+|||……结尾的
 60     pattern5 = re.compile(r"^\d{5}\s{3}[A-Z]{3}.+[CM]\s{3}\d*\-+\+\|{0,10}\n" )
 61     #匹配每组的开头一行即有Segment group的以+、+|、+||、+|||……结尾的
 62     pattern2_1 = re.compile(r"^\d{5}.+Segment\sgroup\s(\d*).+[CM]\s{3}\d*\-+\+\n" )#+结尾
 63     pattern2_2 = re.compile(r"^\d{5}.+Segment\sgroup\s(\d*).+[CM]\s{3}\d*\-+\+\|\n" )#+|结尾
 64     pattern2_3 = re.compile(r"^\d{5}.+Segment\sgroup\s(\d*).+[CM]\s{3}\d*\-+\+\|\|\n" )#+||结尾
 65     pattern2_4 = re.compile(r"^\d{5}.+Segment\sgroup\s(\d*).+[CM]\s{3}\d*\-+\+\|\|\|\n" )
 66     pattern2_5 = re.compile(r"^\d{5}.+Segment\sgroup\s(\d*).+[CM]\s{3}\d*\-+\+\|\|\|\|\n" )
 67     pattern2_6 = re.compile(r"^\d{5}.+Segment\sgroup\s(\d*).+[CM]\s{3}\d*\-+\+\|\|\|\|\|\n" )
 68     pattern2_7 = re.compile(r"^\d{5}.+Segment\sgroup\s(\d*).+[CM]\s{3}\d*\-+\+\|\|\|\|\|\|\n" )
 69     #匹配有同时多个组同时结束的情况,即以++、++|、++||……++、++|、++||……等结尾的
 70     pattern3_1 = re.compile(r"^\d{5}.+[CM]\s{3}\d*\-+\+{2}\|{0,20}\n")# 匹配++、++|、++||……等结尾
 71     pattern3_2 = re.compile(r"^\d{5}.+[CM]\s{3}\d*\-+\+{3}\|{0,20}\n")# 匹配+++、+++|、+++||……等结尾
 72     pattern3_3 = re.compile(r"^\d{5}.+[CM]\s{3}\d*\-+\+{4}\|{0,20}\n")
 73     pattern3_4 = re.compile(r"^\d{5}.+[CM]\s{3}\d*\-+\+{5}\|{0,20}\n")
 74     pattern3_5 = re.compile(r"^\d{5}.+[CM]\s{3}\d*\-+\+{6}\|{0,20}\n")
 75     pattern3_6 = re.compile(r"^\d{5}.+[CM]\s{3}\d*\-+\+{7}\|{0,20}\n")
 76 
 77 
 78     flag = 0
 79     #listgr中第一个不为0的点
 80     pos = -1
 81     listgr =[0,0,0,0,0,0,0,0,0,0]
 82 
 83     fr = open(filename_r)
 84     w2 = open(filename_w,'a')#a代表追加 w代表重写
 85     for line in fr.readlines():
 86         matcher1 = re.findall(pattern1,line)
 87         matcher1_2 = re.findall(pattern1_2,line)
 88         matcher1_3 = re.findall(pattern1_3,line)
 89         matcher1_4 = re.findall(pattern1_4,line)
 90         matcher1_5 = re.findall(pattern1_5,line)
 91         matcher2_1 = re.findall(pattern2_1,line)
 92         matcher2_2 = re.findall(pattern2_2,line)
 93         matcher2_3 = re.findall(pattern2_3,line)
 94         matcher2_4 = re.findall(pattern2_4,line)
 95         matcher2_5 = re.findall(pattern2_5,line)
 96         matcher2_6 = re.findall(pattern2_6,line)
 97         matcher2_7 = re.findall(pattern2_7,line)
 98         matcher3_1 = re.findall(pattern3_1,line)
 99         matcher3_2 = re.findall(pattern3_2,line)
100         matcher3_3 = re.findall(pattern3_3,line)
101         matcher3_4 = re.findall(pattern3_4,line)
102         matcher3_5 = re.findall(pattern3_5,line)
103         matcher3_6 = re.findall(pattern3_6,line)
104         matcher4_1 = re.findall(pattern4_1,line)
105         matcher4_2 = re.findall(pattern4_2,line)
106         matcher4_3 = re.findall(pattern4_3,line)
107         matcher4_4 = re.findall(pattern4_4,line)
108         matcher5   = re.findall(pattern5,line)
109         matcher5_1 = re.findall(pattern5_1,line)
110         matcher5_2 = re.findall(pattern5_2,line)
111         matcher5_3 = re.findall(pattern5_3,line)
112         matcher5_4 = re.findall(pattern5_4,line)
113 
114         if matcher4_1!=[]:
115             w2.write("\n")
116             for j in matcher4_1:
117                 for k in j:
118                     w2.write(k)
119         if matcher4_2!=[]:
120             w2.write(",")
121             #写入parent列
122             if pos!= -1:
123                 numgr =listgr[pos]
124             else:
125                 numgr = 0
126             w2.write("SG"+str(numgr)+",")
127             for j in matcher4_2:
128                 for k in j:
129                     w2.write(k)
130         if matcher4_3!=[]:
131             flag = 3
132             w2.write(",")
133             for j in matcher4_3:
134                 for k in j:
135                     w2.write(k)
136         if matcher4_4!=[]:
137             w2.write(",")
138             for j in matcher4_4:
139                 for k in j:
140                     w2.write(k)
141         if matcher5_1!=[]:
142             w2.write("\n")
143             for j in matcher5_1:
144                 for k in j:
145                     w2.write(k)
146         if matcher5_2!=[]:
147             w2.write(",")
148             #写入parent列
149             if pos!= -1:
150                 numgr =listgr[pos]
151             else:
152                 numgr = 0
153             w2.write("SG"+str(numgr)+",")
154             for j in matcher5_2:
155                 for k in j:
156                     w2.write(k)
157         if matcher5_3!=[]:
158             flag = 3
159             w2.write(",")
160             for j in matcher5_3:
161                 for k in j:
162                     w2.write(k)
163         if matcher5_4!=[]:
164             w2.write(",")
165             for j in matcher5_4:
166                 for k in j:
167                     w2.write(k)
168         #确定层级关系,也就是确定listgr
169         if(matcher5!=[]):
170             for i in listgr:
171                 if i==0:
172                     pos = listgr.index(i)-1
173                     break
174             listgr[pos]=0
175         if (matcher2_1!=[]):
176             # print "2_1"
177             for j in matcher2_1:
178                 # print j
179                 if(listgr[0]==0):
180                     listgr[0]=j
181                 else:
182                     listgr[0]=0
183             # print listgr
184         if (matcher2_2!=[]):
185             for j in matcher2_2:
186                 #numgr_d = j
187                 if(listgr[1]==0):
188                     listgr[1]=j
189                 else:
190                     listgr[1]=0
191         if (matcher2_3!=[]):
192             for j in matcher2_3:
193                 if(listgr[2]==0):
194                     listgr[2]=j
195                 else:
196                     listgr[2]=0
197         if (matcher2_4!=[]):
198             for j in matcher2_4:
199                 if(listgr[3]==0):
200                     listgr[3]=j
201                 else:
202                     listgr[3]=0
203         if (matcher2_5!=[]):
204             for j in matcher2_5:
205                 if(listgr[4]==0):
206                     listgr[4]=j
207                 else:
208                     listgr[4]=0
209         if (matcher2_6!=[]):
210             for j in matcher2_6:
211                 if(listgr[5]==0):
212                     listgr[5]=j
213                 else:
214                     listgr[5]=0
215         if (matcher2_7!=[]):
216             for j in matcher2_7:
217                 if(listgr[6]==0):
218                     listgr[6]=j
219                 else:
220                     listgr[6]=0
221         if (matcher3_1!=[]):
222             for i in listgr:
223                 if i==0:
224                     pos = listgr.index(i)-1
225                     break
226             listgr[pos]=0
227             listgr[pos-1]=0
228         if (matcher3_2!=[]):
229             for i in listgr:
230                 if i==0:
231                     pos = listgr.index(i)-1
232                     break
233             for k in range((pos-2),(pos+1)):
234                 listgr[k]=0
235         if (matcher3_3!=[]):
236             for i in listgr:
237                 if i==0:
238                     pos = listgr.index(i)-1
239                     break
240             for k in range((pos-3),(pos+1)):
241                 listgr[k]=0
242         if (matcher3_4!=[]):
243             for i in listgr:
244                 if i==0:
245                     pos = listgr.index(i)-1
246                     break
247             for k in range(pos-4,pos+1):
248                 listgr[k]=0
249         if (matcher3_5!=[]):
250             for i in listgr:
251                 if i==0:
252                     pos = listgr.index(i)-1
253                     break
254             for k in range(pos-5,pos+1):
255                 listgr[k]=0
256         if (matcher3_6!=[]):
257             for i in listgr:
258                 if i==0:
259                     pos = listgr.index(i)-1
260                     break
261             for k in range(pos-6,pos+1):
262                 listgr[k]=0
263          #确定层级关系结束
264         if (matcher1!=[]):
265             flag = 1
266             w2.write("\n")
267             for j in matcher1:
268                 for k in j:
269                     w2.write(k)
270         #print listgr
271         #判断当前lit不为0的位置
272         for i in listgr:
273             if i==0:
274                 pos = listgr.index(i)-1
275                 break
276         if matcher1_2!=[]:
277             flag = 2
278             w2.write(",")
279             #写入parent列
280             if pos!= -1:
281                 numgr =listgr[pos]
282             else:
283                 numgr = 0
284             w2.write("SG"+str(numgr)+",")
285             for j in matcher1_2:
286                 for k in j:
287                     w2.write(k)
288         if matcher1_3!=[]:
289             flag = 3
290             w2.write(",")
291             for j in matcher1_3:
292                 for k in j:
293                     w2.write(k)
294         if matcher1_4!=[]:
295             flag = 4
296             w2.write(",")
297             for j in matcher1_4:
298                 for k in j:
299                     w2.write(k)
300         if ((matcher1_5!=[])and(flag ==4)):
301             flag = 5
302             w2.write(",")
303             for j in matcher1_5:
304                 for k in j:
305                     w2.write(k)
306 
307     w2.close( )
308     fr.close()
309 
310 f2_w= open(ss+'/new/%s.txt'%year,'a')
311 
312 for i in range(len(list_tag)):
313     f2_r = open(ss+'/new/%s_w.txt'%list_tag[i])
314     for line in f2_r:
315         # for j in line:
316         f2_w.write(year+','+line)
317     f2_r.close() 
318     print("--%i--is ok"%i)
319 f2_w.close()   
320 
321 # if __name__ == '__main__':
322     
323 
324 """
325     特殊情况
326 
327 
328 
329     """

 

posted on 2017-08-21 16:45  懵懂的菜鸟  阅读(673)  评论(0编辑  收藏  举报