changetoutf-8
1 import chardet 2 import os 3 # ANSI文件转UTF-8 4 import codecs 5 import os 6 7 def strJudgeCode(str): 8 return chardet.detect(str) 9 10 def readFile(path): 11 12 f = open(path, 'r',endoding='ANSI') 13 filecontent = f.read() 14 f.close() 15 16 return filecontent 17 18 def WriteFile(str, path): 19 try: 20 f = open(path, 'w') 21 f.write(str) 22 finally: 23 if f: 24 f.close() 25 26 def converCode(path): 27 file_con = readFile(path) 28 result = strJudgeCode(file_con) 29 #print(file_con) 30 if result['encoding'] == 'utf-8': 31 #os.remove(path) 32 a_unicode = file_con.decode('utf-8') 33 gb2312 = a_unicode.encode('gbk') 34 WriteFile(gb2312, path) 35 36 def listDirFile(dir): 37 list = os.listdir(dir) 38 for line in list: 39 print(line) 40 filepath = dir+line 41 print(filepath) 42 # if os.path.isdir(filepath): 43 # listDirFile(filepath) 44 # else: 45 # print(line) 46 converCode(filepath) 47 48 if __name__ == '__main__': 49 50 # listDirFile('./TRMD/') 51 52 # 文件所在目录 53 file_path =r"C:\\Users\\Lenovo\\Desktop\\数据库设计\\爬虫脚本\\TRMD\\test" 54 files = os.listdir(file_path) 55 56 for file in files: 57 file_name = file_path + '\\' + file 58 f = codecs.open(file_name, 'r','cp852') 59 ff = f.read() 60 file_object = codecs.open(file_path + '\\' + file, 'w', 'utf-8') 61 file_object.write(ff)