Python处理utf-8 添加和删除BOM头
以下代码只处理了assic和utf8文件。其它文件编码为保险起见并未加入支持。
参数
exts 需要处理文件的扩展名
folders 需要处理的文件夹及子目录
处理目录为当前目录
运行:
添加bom头
python proc_bom.py
删除bom头
python proc_bom.py -r
运行缺少chardet报错
方法1:在线安装
pip install chardet
方法2:离线安装
https://github.com/chardet/chardet
https://pypi.python.org/pypi/chardet#downloads
去下载chardet包
进入到chardet 3.0.4 的解压包里,执行:python setup.py install 就可以完成安装了。
1 #!/usr/bin/python 2 # -*- coding: UTF-8 -*- 3 4 import os; 5 import sys; 6 import codecs; 7 import chardet; 8 9 #获取脚本文件的当前路径 10 def cur_file_dir(): 11 #获取脚本路径 12 path = sys.path[0] 13 #判断为脚本文件还是py2exe编译后的文件,如果是脚本文件,则返回的是脚本的目录,如果是py2exe编译后的文件,则返回的是编译后的文件路径 14 if os.path.isdir(path): 15 return path 16 elif os.path.isfile(path): 17 return os.path.dirname(path) 18 #打印结果 19 20 21 #pip install chardet 安装相应插件 22 def procBOM(strPath,curLen, bAdd): 23 newcontent = ''; 24 f = open(strPath, "rb"); 25 fcontent = f.read(); 26 f.close(); 27 printBuffer = strPath[curLen:] 28 codeType = chardet.detect(fcontent)["encoding"] #检测编码方式 29 printBuffer = printBuffer + " "+str(codeType) 30 31 if codeType.lower().find('utf-8') == -1 and codeType.lower().find('ascii') == -1 : 32 #非utf8文件保险起见先退出,并输出错误提示,todo后续再添加其它转码到utf8 33 print printBuffer + " error OK" 34 return 35 36 #不需要转换,已经添加bom头 37 38 if bAdd and fcontent[:3] != codecs.BOM_UTF8: 39 print printBuffer+" add bom", 40 newcontent = codecs.BOM_UTF8; 41 newcontent += fcontent; 42 elif not bAdd and fcontent[:3] == codecs.BOM_UTF8: 43 newcontent = fcontent[3:]; 44 print printBuffer+" del bom", 45 else: 46 return; 47 fnew = open(strPath, "wb+") 48 fnew.write(newcontent); 49 fnew.close(); 50 print "done" 51 return 52 53 if __name__ == "__main__": 54 55 bAdd = True; 56 exts = ['.h', '.c', '.cpp']; 57 folders = ["GNaviInterface/search","src","tester"] 58 bAdd = True; 59 if(len(sys.argv) > 1 and sys.argv[1] == '-r'): 60 bAdd = False; 61 curLen = len(cur_file_dir()) 62 for folderName in folders: 63 folderPath = cur_file_dir()+"/"+folderName+"/" 64 #print "procBOM:folder path = "+folderPath+",add = "+str(bAdd) 65 for parent,dirnames,filenames in os.walk(folderPath): 66 for f in filenames: 67 bTargetFile = False; 68 for e in exts: 69 if(f.endswith(e)): 70 bTargetFile = True; 71 if(bTargetFile): 72 procBOM(os.path.join(parent,f),curLen, bAdd); 73 #print 'file:%s add:%s' % (os.path.join(parent, f), bAdd);
posted on 2017-12-08 14:59 kenny.wmh 阅读(11194) 评论(1) 编辑 收藏 举报