import os
import chardet
from tkinter import filedialog
from concurrent.futures import ThreadPoolExecutor
#获取文件的编码
def get_all_chardet(filename, max_bytes=1048576): # 默认读取1MB
try:
with open(file=filename, mode='rb') as f3: # 使用with保证文件被正确关闭
data = f3.read(max_bytes) # 限制读取的字节数
except Exception as e:
print(f"无法读取文件 {filename},错误信息:{e}")
return
if not data:
print(f"文件 {filename} 内容为空")
return
result = chardet.detect(data) # 检测文件内容
if not result['encoding']:
print(f"文件 {filename} 的编码检测失败")
return
a = list(result.values())
print(f"{filename} 编码为: {a[0]}") # 输出编码
#检测所有文件,使用多线程优化
def all_chardet_files(Folderpath):
with ThreadPoolExecutor() as executor:
for filepath, dirnames, filenames in os.walk(Folderpath):
for filename in filenames:
full_path = os.path.join(filepath, filename)
executor.submit(get_all_chardet, full_path)
#输出指定文件类型的编码
def by_filetype(Folderpath):
filetype = input('输入指定文件类型,例如.xml: ')
with ThreadPoolExecutor() as executor:
for filepath, dirnames, filenames in os.walk(Folderpath):
for filename in filenames:
if os.path.splitext(filename)[1] == filetype: # 指定文件类型
full_path = os.path.join(filepath, filename)
executor.submit(get_all_chardet, full_path)
#获取指定编码的文件
def get_specified_chardet(filename, b, max_bytes=1048576): # 默认读取1MB
try:
with open(file=filename, mode='rb') as f3:
data = f3.read(max_bytes)
except Exception as e:
print(f"无法读取文件 {filename},错误信息:{e}")
return
if not data:
print(f"文件 {filename} 内容为空")
return
result = chardet.detect(data)
a = list(result.values())
if a[0] == b:
print(f"文件 {filename} 编码为指定的 {b}")
#输出特定编码的文件
def specified_chardet_files(Folderpath):
b = input("请输入需要检测的编码: ")
print(f"编码是 {b} 的文件如下:")
with ThreadPoolExecutor() as executor:
for filepath, dirnames, filenames in os.walk(Folderpath):
for filename in filenames:
full_path = os.path.join(filepath, filename)
executor.submit(get_specified_chardet, full_path, b)
#获取非指定编码的文件
def get_no_specified_chardet(filename, b, max_bytes=1048576): # 默认读取1MB
try:
with open(file=filename, mode='rb') as f3:
data = f3.read(max_bytes)
except Exception as e:
print(f"无法读取文件 {filename},错误信息:{e}")
return
if not data:
print(f"文件 {filename} 内容为空")
return
result = chardet.detect(data)
a = list(result.values())
if a[0] != b:
print(f"文件 {filename} 编码不是 {b}")
#输出非指定编码的文件
def no_specified_chardet_files(Folderpath):
b = input("请输入需要检测的编码: ")
print(f"编码不是 {b} 的文件如下:")
with ThreadPoolExecutor() as executor:
for filepath, dirnames, filenames in os.walk(Folderpath):
for filename in filenames:
full_path = os.path.join(filepath, filename)
executor.submit(get_no_specified_chardet, full_path, b)
#菜单函数
def case():
print("utf-8 GB2312 ascii 等")
print("1. 输出所有文件的编码")
print("2. 输出指定类型文件的编码")
print("3. 输出指定编码的文件")
print("4. 输出非指定编码的文件")
a = int(input("请输入选项:"))
if a == 1:
all_chardet_files(Folderpath)
elif a == 2:
by_filetype(Folderpath)
elif a == 3:
specified_chardet_files(Folderpath)
elif a == 4:
no_specified_chardet_files(Folderpath)
if __name__ == '__main__':
print("输入需要检测的路径")
Folderpath = filedialog.askdirectory() # 获得选择好的文件夹
print("检测的路径是 " + Folderpath)
case()
ask = input("是否继续?y or exit").lower()
while ask == 'y':
case()