分享一个统计文档中不同key的个数的python脚本
前提:
原统计文档中条目以空格分隔或只有一列(为了方便awk筛选出某一个列)
1 #!/usr/bin/env python 2 # -*- coding:utf-8 -*- 3 import sys 4 import os 5 6 PRINTRED = "\033[1;31m" 7 PRINTGREEN = "\033[0;32;47m" #green color backgroud is white 8 PRINTGREEN_SIM = "\033[1;32m" #no backgroud green color 9 PRINTBLUE = "\033[1;34m" 10 PRINTCOLOR_END= "\033[0m" 11 12 def getAllItemList(fileName): 13 try: 14 fp = open(fileName, 'r') 15 fLines = fp.readlines() 16 fp.close() 17 perList = [] 18 for item in fLines: 19 #perSplit = line.split(';') 20 #for item in perSplit: 21 if item.isspace() == False: 22 # space do nothing 23 perList.append(item.strip()) 24 25 #print(perList) 26 return perList 27 except IOError: 28 print("error: file not found, please check it !!!") 29 sys.exit(0) 30 31 def calSameItemCount(itemList): 32 sameCountDict={} 33 for item in itemList: 34 if sameCountDict.has_key(item): 35 sameCountDict[item] += 1 36 else: 37 #print("notfindkey") 38 sameCountDict[item] = 1 39 print(("不重复key个数:{cstart}%d{cend}").format(cstart=PRINTRED,cend=PRINTCOLOR_END) % (len(sameCountDict))) 40 print("不重复的key如下:") 41 for key,value in sameCountDict.items(): 42 print(("%s 有[{cstart}%d{cend}]个!").format(cstart=PRINTRED,cend=PRINTCOLOR_END) % (key, value)) 43 #print(("{cstart}%s{cend}").format(cstart=PRINTRED,cend=PRINTCOLOR_END) % (key)) 44 45 46 def main(): 47 #想要统计的key的列在第10列,原文档固定名为count.txt 48 os.system("awk '{print $10}' count.txt > awkCountitem.txt") 49 50 #fileName = raw_input("please input fileName: \n") 51 fileName = "awkCountitem.txt" 52 itemList = getAllItemList(fileName) 53 itemCount = len(itemList) 54 print(("%s:样本数据[{cstart}%d{cend}] 个!!!").format(cstart=PRINTRED,cend=PRINTCOLOR_END) % (fileName, itemCount)) 55 calSameItemCount(itemList) 56 57 if __name__=="__main__": 58 main()
我是一块砖,哪里需要往哪搬。