b2_trsd_EDSD_new
1 # -*- coding:utf-8 -*- 2 import re 3 4 5 ss="./data/" 6 year = '17A' 7 filename = ss+'EDSD%s.txt'%year 8 9 10 ''' 11 适应新版本 12 13 ''' 14 15 16 17 p1 = r"^\s{4}(?:X|\W)\s{2}([A-Z]{3})\s\s.+\n"#TC 18 p2 = r"(^\d{3})\s{4}C\d{3}.+[CM]\s+\d\n"#010 19 p3 = r"^\d{3}\s{4}(C\d{3}).+[CM]\s+\d\n"#C552 20 p4 = r"^\d{3}\s{4}C\d{3}.+([CM])\s+\d\n"#M 21 p5 = r"^\d{3}\s{4}C\d{3}.+[CM]\s+(\d)\n"#1 22 p6= r"(^\d{3})\s{4}\d{4}.+[CM]\s{4}\d\s.*\.\.\d+\n|(^\d{3})\s{4}\w\d{3}\s.+[^\d]$\n"#单独的030 23 p7 =r"^\d{3}\s{4}(\d{4}).+[CM]\s{4}\d\s.*\.\.\d+\n|^\d{3}\s{4}(\w\d{3})\s.+[^\d]$\n"#单独的3286 24 p8 =r"^\d{3}\s{4}\d{4}.+([CM])\s{4}\d\s.*\.\.\d+\n|^\s{12}[A-Z].+([CM])\s{4}\d\s.*\.\.\d+\n"#单独的M 25 p9 =r"^\d{3}\s{4}\d{4}.+[CM]\s{4}(\d)\s.*\.\.\d+\n|^\s{12}[A-Z].+[CM]\s{4}(\d)\s.*\.\.\d+\n"#单独的1 26 p10 =r"^\d{3}\s{4}\d{4}.+[CM]\s{4}\d\s(.*\.\.\d+)\n|^\s{12}[A-Z].+[CM]\s{4}\d\s(.*\.\.\d+)\n"#单独的an..35 27 28 29 30 pattern1 = re.compile(p1) 31 pattern2 = re.compile(p2) 32 pattern3 = re.compile(p3) 33 pattern4 = re.compile(p4) 34 pattern5 = re.compile(p5) 35 pattern6 = re.compile(p6) 36 pattern7 = re.compile(p7) 37 pattern8 = re.compile(p8) 38 pattern9 = re.compile(p9) 39 pattern10 = re.compile(p10) 40 41 fr = open(filename) 42 temp = (); 43 flag = 0 44 for line in fr.readlines(): 45 matcher1 = re.findall(pattern1,line) 46 matcher2 = re.findall(pattern2,line) 47 matcher3 = re.findall(pattern3,line) 48 matcher4 = re.findall(pattern4,line) 49 matcher5 = re.findall(pattern5,line) 50 matcher6 = re.findall(pattern6,line) 51 matcher7 = re.findall(pattern7,line) 52 matcher8 = re.findall(pattern8,line) 53 matcher9 = re.findall(pattern9,line) 54 matcher10 = re.findall(pattern10,line) 55 #print matcher 56 w2 = open(ss+'b2_%s.csv'%year,'a')#a代表追加 w代表重写 57 if (matcher1!=[]): 58 for g in matcher1: 59 flag = 1 60 temp = g 61 if ((matcher2!=[])and(flag ==1 or 2)): 62 flag = 2 63 w2.write("\n"+temp+",") 64 for j in matcher2: 65 for k in j: 66 w2.write(k) 67 if ((matcher3!=[])and(flag ==2)): 68 flag = 3 69 w2.write(",") 70 for j in matcher3: 71 for k in j: 72 w2.write(k) 73 #复合的缺省为0000 74 w2.write(",0000") 75 if ((matcher4!=[])and(flag ==3)): 76 flag = 4 77 w2.write(",") 78 for j in matcher4: 79 for k in j: 80 w2.write(k) 81 #增加固定列year 82 w2.write(","+year) 83 if ((matcher5!=[])and(flag ==4)): 84 flag = 5 85 w2.write(",") 86 for j in matcher5: 87 for k in j: 88 w2.write(k) 89 w2.write(", ") 90 # print len(matcher6) 91 if(len(matcher6)==1 and matcher6!=[''] ): 92 93 flag = 6 94 w2.write("\n"+temp+",") 95 for j in matcher6: 96 for k in j: 97 w2.write(k) 98 #单独的缺省为C000 99 w2.write(",C000") 100 if ((matcher7!=[])and(flag ==6)): 101 flag = 7 102 w2.write(",") 103 for j in matcher7: 104 for k in j: 105 w2.write(k) 106 if ((matcher8!=[])and(flag ==7)): 107 flag = 8 108 w2.write(",") 109 for j in matcher8: 110 for k in j: 111 w2.write(k) 112 #增加固定列year 113 w2.write(","+year) 114 if ((matcher9!=[])and(flag ==8)): 115 flag = 9 116 w2.write(",") 117 for j in matcher9: 118 for k in j: 119 w2.write(k) 120 if ((matcher10!=[])and(flag ==9)): 121 flag = 10 122 w2.write(",") 123 for j in matcher10: 124 for k in j: 125 w2.write(k) 126 w2.close( ) 127 128 """ 129 特殊情况 130 131 132 133 """