python查看与改变文件的编码格式

查看文件的编码格式

with open(r"C:\Users\Administrator\Desktop\111\2.1 (1)smile.txt", 'rb+') as fp:
    content = fp.read()
    print(chardet.detect(content)) #{'encoding': 'UTF-8-SIG', 'confidence': 1.0, 'language': ''}

修改文件的编码格式

with open(r"C:\Users\Administrator\Desktop\111\2.1 (1)smile.txt", 'rb+') as fp:
    content = fp.read()
    encoding = chardet.detect(content)['encoding']
    content = content.decode(encoding).encode('utf8') # 修改为utf-8格式
    fp.seek(0)
    fp.write(content)
    print(chardet.detect(content)) #{'encoding': 'utf-8', 'confidence': 0.99, 'language': ''}

案例:修改编码格式并且把所有的txt都整合成一个txt

import os
import chardet

def saveFile(filename, content):
    f = open(filename, "w", encoding="utf-8")
    f.write(content)
def readFile(filename):
    f = open(filename, "r", encoding="utf-8")
    return f.read()
def file_extension(path):
    return os.path.splitext(path)[1]
def combinetxt(rootdir, filename):
    print(rootdir,filename)
    f = open(filename, "w", encoding="utf-8")
    list = os.listdir(rootdir)
    for i in range(0, len(list)):
        path = os.path.join(rootdir, list[i])
        if os.path.isfile(path):
            if file_extension(path) == ".txt":
                content = readFile(path)
                # f.write("*****************************")
                f.write(content)
    f.close()


if __name__ == '__main__':
    # 打开文件
    path = r"C:\Users\Administrator\Desktop\111\弄好的"
    dirs = os.listdir(path)
    # 输出所有文件和文件夹
    for file in dirs:
        filepath = path + "\\" + file
        with open(filepath, 'rb+') as fp:
            content = fp.read()
            encoding = chardet.detect(content)['encoding']
            content = content.decode(encoding).encode('utf8')
            fp.seek(0)
            fp.write(content)
    outfile = "rest.txt"
    combinetxt(path, outfile)
posted @ 2021-02-05 14:52  zhw_sylvia  阅读(934)  评论(0编辑  收藏  举报