python中文乱码问题
1.设置编码格式
import sys reload(sys) sys.setdefaultencoding('utf8') # 设置默认编码格式为'utf-8'
2.字典中写入中文,并读取
a="测试" b="200001" dic={} dic.update({a:b}) print dic print json.dumps(dic, encoding='UTF-8', ensure_ascii=False)
输出: {'\xe6\xb5\x8b\xe8\xaf\x95': '200001'} {"测试": "200001"}
3.文件写入中文
import codecs cc=u"你好" aa=["测试1", "测试2"] dic={} if os.path.exists("zipcode"): #判断zipcode文件是否存在,存在时删除 os.remove("zipcode") with codecs.open('zipcode','ab','utf8') as f: dic.update({cc: aa}) print dic dic=json.dumps(dic, encoding="UTF-8", ensure_ascii=False) print dic f.write(dic.encode('utf-8')) #文件中写入中文(写入字典dic)
输出: {u'\u4f60\u597d': ['\xe6\xb5\x8b\xe8\xaf\x951', '\xe6\xb5\x8b\xe8\xaf\x952']} {"你好": ["测试1", "测试2"]}
4.示例
get_zipcode.py:
def get_zipcode(area): file=os.path.join("areas",area) with open(file,'r') as f: data=f.readlines() aa=[] for i in data: if re.match('^\d+',i) != None: aa.append(i.strip("\n")) zipcode.update({area: aa}) if __name__=='__main__': zipcode = {} if os.path.exists("zipcode"): os.remove("zipcode") upath = unicode("areas", 'utf-8') print os.listdir(upath) with codecs.open('zipcode','ab','utf8') as f: for area in os.listdir(upath): #文件夹areas存在多个中文名文件,对文件路径进行unicode编码 print area get_zipcode(area) zipcode=json.dumps(zipcode, encoding='UTF-8', ensure_ascii=False) print zipcode f.write(zipcode.encode('utf-8'))