python中文乱码问题

1.设置编码格式
import sys
reload(sys)
sys.setdefaultencoding('utf8') # 设置默认编码格式为'utf-8'
 
2.字典中写入中文,并读取
a="测试"
b="200001"
dic={}
dic.update({a:b})
print dic
print json.dumps(dic, encoding='UTF-8', ensure_ascii=False)
输出:
{'\xe6\xb5\x8b\xe8\xaf\x95': '200001'}
{"测试": "200001"}
3.文件写入中文
import codecs
cc=u"你好"
aa=["测试1", "测试2"]
dic={}
if os.path.exists("zipcode"): #判断zipcode文件是否存在,存在时删除
    os.remove("zipcode")
with codecs.open('zipcode','ab','utf8') as f:
    dic.update({cc: aa})
    print dic
    dic=json.dumps(dic, encoding="UTF-8", ensure_ascii=False)
    print dic
    f.write(dic.encode('utf-8'))  #文件中写入中文(写入字典dic)
输出:
{u'\u4f60\u597d': ['\xe6\xb5\x8b\xe8\xaf\x951', '\xe6\xb5\x8b\xe8\xaf\x952']}
{"你好": ["测试1", "测试2"]}
4.示例

get_zipcode.py:

def get_zipcode(area):
    file=os.path.join("areas",area)
    with open(file,'r') as f:
        data=f.readlines()
        aa=[]
        for i in data:
            if re.match('^\d+',i) != None:
                aa.append(i.strip("\n"))
        zipcode.update({area: aa})

if __name__=='__main__':
    zipcode = {}
    if os.path.exists("zipcode"):
        os.remove("zipcode")
    upath = unicode("areas", 'utf-8')
    print os.listdir(upath)
    with codecs.open('zipcode','ab','utf8') as f:
        for area in os.listdir(upath):  #文件夹areas存在多个中文名文件,对文件路径进行unicode编码
            print area
            get_zipcode(area)
        zipcode=json.dumps(zipcode, encoding='UTF-8', ensure_ascii=False)
        print zipcode
        f.write(zipcode.encode('utf-8'))

  

 

 

  

posted @ 2019-07-17 18:32  Sunmer09  阅读(1003)  评论(0编辑  收藏  举报