python将html批量转换为md

一、安装依赖

pip install html2text

代码实现

import os
import shutil
import html2text


def convert_html2md(src_html, target_md):
    #'''html转md'''
    with open(src_html, 'r',encoding='utf-8') as f:
        html = f.read()
    markdown = html2text.html2text(html)
    with open(target_md, 'w',encoding='utf-8') as f:
        f.write(markdown)

def batch_convert(root_path):
    # 将指定目录下的html批量转换为md
    for root, dirs, files in os.walk(root_path):
        for filename in files:
            if filename.endswith('.html'):
                file_path = os.path.join(root, filename)
                # print(file_path)
                target_md = os.path.join(root,filename.replace('.html','.md'))
                if os.path.exists(target_md):
                    print(f'{target_md},文件已存在,不在生成')
                    continue
                convert_html2md(src_html=file_path,target_md=target_md)

def batch_cp_md_file(root_path):
    # 将转换完成后的md文件复制到单独的目录
    for root, dirs, files in os.walk(root_path):
        for filename in files:
            if filename.endswith('.md'):
                file_path = os.path.join(root, filename)
                # print(root)
                target_path = root if str(root).endswith('-md') \
                    else f'{root}-md'
                # print(target_path)
                target_file = os.path.join(target_path, filename)
                if os.path.exists(target_file):
                    print(f"{target_file},已存在跳过复制文件")
                    continue
                if not os.path.exists(target_path):
                    os.makedirs(target_path)
                shutil.copyfile(file_path,target_file)


if __name__=='__main__':
    root_path = 'd:\\阿里云盘-课程'
    batch_convert(root_path=root_path)
    batch_cp_md_file(root_path=root_path)
posted @ 2023-06-05 15:51  wan了个蛋  阅读(388)  评论(0编辑  收藏  举报