python反编译chm文件并生成pdf文件
1 # -*- coding: utf-8 -*- 2 3 import os 4 import os.path 5 import logging 6 import pdfkit 7 8 original_chm = r'C:\Users\hushaojun\Documents\canoe\Help01\CAPLfunctions.chm' 9 root_dir = r'C:\Users\hushaojun\Documents\canoe\Help01\canoe\Topics' 10 seperator = os.sep 11 12 # HH.EXE -decompile <输出路径> <目标chm文件> 13 14 logging.basicConfig(level=logging.WARNING, 15 format='%(asctime)s %(pathname)s %(filename)s %(funcName)s [line:%(lineno)d] %(levelname)s %(message)s', 16 datefmt='%Y-%m-%d %H:%M:%S', 17 filename='all_file_name.log', 18 filemode='w+') 19 20 #深度优先算法 21 def recursionFunction(level, path): 22 for file_content in os.listdir(path): #列举显示所有文件夹下的内容 23 #“-”用于显示目录层次结构 24 full_path_name = os.path.join(path, file_content) 25 print('-'*(level+1) + full_path_name) 26 27 if os.path.isdir(full_path_name): #如果是文件夹就递归显示 28 recursionFunction(level+1, full_path_name) 29 else: #如果是文件那就要直接显示 30 (shotname, extension) = os.path.splitext(file_content) 31 32 logging.error('文件全名为:' + full_path_name) 33 print('文件名为:' + shotname) 34 print('文件后缀名为:' + extension) 35 36 pdf_file_name = os.path.join(path, shotname + '.pdf') 37 38 if (not os.path.exists(pdf_file_name)) and ((extension == '.htm') or (extension == '.html')):
39 try: 40 pdfkit.from_file(full_path_name, pdf_file_name) 41 except OSError: 42 print('just skip!') 43 except Exception as e: 44 print(type(e)) 45 46 if __name__ == '__main__': 47 decomplile_cmd = 'HH.EXE -decompile %s %s' % (root_dir, original_chm) 48 os.system(decomplile_cmd) 49 recursionFunction(0, root_dir)