convert code 2 markdown by python

"""convert code to markdown
"""
import os
import re
import sys
from datetime import datetime

# 需要过滤的文件夹
exclude_dirs = ["__pycache__", "venv", "build", "dist", "node_modules", "public", "LICENSE", "assets", "vendor", "tmp", "static", "templates", "bin", "obj", "Migrations", "Properties"]

# 需要过滤文件后缀
exclude_files = ["_NOTE.md", ".d.ts", ".lock", ".png", ".woff2", ".ttf", ".woff", ".css", "README.md", ".toml", "swagger-ui-bundle.js", "-lock.json"]

# 需要保留的文件
include_exts = [".py", ".vue", ".js", ".ts", ".html", ".go", ".mod", ".json", ".txt", ".sh", ".command", "cs", "csproj", ".jsx"]

#
md_suffix_table = {
    "command": "sh",
}


def get_root_dir(dir_path):
    file_list = os.listdir(dir_path)
    path_list = []
    root_file_list = []
    for file in file_list:
        print(file)
        # 过滤隐藏文件
        if file.startswith("."):
            continue
        # 过滤所有的文件
        is_file = re.findall(r'\.[^.\\/:*?"<>|\r\n]+$', file)
        if len(is_file):
            # 反向过滤,后缀文件
            res_abort = re.findall(re.compile(r"(d\.ts|config\.ts|-lock\.json)$"), file)
            if res_abort:
                continue
            # 保留根文件夹的(\.py|vue|js|ts)$ 结尾的文件
            res_save = re.findall(re.compile(r"(\.py|vue|config\.js|js|ts|html|txt|go|mod|json)$"), file)
            if len(res_save):
                root_file_list.append(file)
            continue
        # 过滤node_modules
        res_abort = re.findall(re.compile(r"(__pycache__|venv|build|dist|node_modules|public|LICENSE)"), file)
        if len(res_abort):
            continue
        # 拼接成路径
        code_file_path = os.path.join(dir_path, file)
        path_list.append(code_file_path)
    return path_list, root_file_list


def get_deep_dirs(path):
    file_path = []
    for root, dirs, files in os.walk(path):
        # 过滤不符合的文件夹------------------------------------------------------------------------
        del_dir_index = []
        for i, dir in enumerate(dirs):
            # 过滤隐藏文件
            if dir.startswith("."):
                del_dir_index.append(i)
            # 过滤掉所有不符合的文件夹
            res_abort = re.findall(re.compile(r"(__pycache__|venv|build|dist|node_modules|public|LICENSE|assets|vendor|tmp|static|templates)"), dir)
            if len(res_abort):
                del_dir_index.append(i)

        # 去重,排序,过滤文件夹
        del_dir_index = list(set(del_dir_index))
        del_dir_index.sort()
        for counter, index in enumerate(del_dir_index):
            index = index - counter
            dirs.pop(index)

        # 过滤不符合的文件-----------------------------------------------------------------------------
        del_file_index = []
        for i, file in enumerate(files):
            # 过滤隐藏文件
            # (\.gitignore)|(\.prettierrc)
            if file.startswith("."):
                del_file_index.append(i)
            # 过滤掉所有不符合的文件
            res_abort = re.findall(re.compile(r"(_NOTE\.md|\.d\.ts|\.lock|\.png|\.woff2|\.ttf|\.woff|\.css|README\.md|\.toml|swagger-ui-bundle.js|-lock\.json)$"), file)
            if len(res_abort):
                del_file_index.append(i)

        # 去重排序,过滤文件
        del_file_index = list(set(del_file_index))
        del_file_index.sort()
        for counter, index in enumerate(del_file_index):
            index = index - counter
            files.pop(index)

        # 筛选所有符合后缀的文件------------------------------------------------------------------------
        for file in files:
            # 正向过滤含有(\.py|vue|js|ts)$ 结尾的文件
            res_save = re.findall(re.compile(r"(\.py|vue|js|ts|html|go|mod|json)$"), file)
            if len(res_save):
                file_path.append(os.path.join(root, file))
    return file_path


def get_deep_dirs_fast(path):
    """获取所有的代码文件的路径

    Args:
        path (_type_): 项目的根目录

    Returns:
        _type_: 所有的代码文件的路径,是个列表
    """
    # global exclude_dirs
    # global exclude_files
    # global include_exts

    code_file_path = []
    for root, dirs, files in os.walk(path):
        # 过滤不符合的文件夹------------------------------------------------------------------------
        dirs[:] = [d for d in dirs if not d.startswith(".") and not any(ex in d for ex in exclude_dirs)]
        # 过滤不符合的文件-----------------------------------------------------------------------------
        files[:] = [f for f in files if not f.startswith(".") and not any(ex in f for ex in exclude_files)]
        # 筛选所有符合后缀的文件------------------------------------------------------------------------
        for file in files:
            # 正向过滤含有(\.py|vue|js|ts)$ 结尾的文件
            if any(file.endswith(ext) for ext in include_exts):
                code_file_path.append(os.path.join(root, file))
    return code_file_path


def readcode_writemd(code_file_path, project_path, markdown_file_path):
    """读取代码文件,写入markdown文件

    Args:
        code_file_path (_type_): 代码文件的路径
        project_path (_type_): 项目的根路径
        markdown_file_path (_type_): 输出的markdown文件的路径
    """

    suffix = re.findall(r'\.[^.\\/:*?"<>|\r\n]+$', code_file_path)
    if len(suffix):
        suffix = suffix[0][1:]
    with open(code_file_path, "r", encoding="utf-8") as f:  # 打开文件
        try:
            rest_line = f.read()
        except Exception as e:
            print(f"{code_file_path}{e}文件编码读取错误,非utf-8")
            rest_line = ""
        write2md(rest_line, suffix, code_file_path, project_path, markdown_file_path)


def get_md_title_path(code_file_path, project_path):
    """获取每个代码文件的md标题,去掉项目之前的文件路径

    Args:
        code_file_path (_type_): 代码路径
        project_path (_type_): 项目根路径

    Returns:
        _type_: 每个代码文件的md标题
    """
    # Get the common prefix of the two paths
    common_prefix = os.path.commonprefix([code_file_path, project_path])
    # Get the different parts of the two paths
    diff1 = code_file_path[len(common_prefix) + 1 :]
    md_title = os.path.join(os.path.basename(project_path), diff1)
    return md_title


def get_code_md_lable_by_suffix(suffix):
    if md_suffix_table.get(suffix) is not None:
        return md_suffix_table.get(suffix)

    return suffix


def write2md(content, suffix, code_file_path, project_path, markdown_file_path):
    with open(markdown_file_path, "a", encoding="utf-8") as f:  # 打开文件
        md_title = get_md_title_path(code_file_path, project_path)
        f.write("\n")
        f.write(f"# `{md_title}`\n\n")
        f.write(f"```{get_code_md_lable_by_suffix(suffix)}\n")
        f.write(content)
        f.write("\n")
        f.write("```\n\n\n")


def get_root_path(path):
    dir_path = path
    # 判断当前文件是否是文件
    if os.path.isfile(path):
        dir_path = os.path.dirname(path)
    return dir_path


def get_file_name():
    # Get the current time
    now = datetime.now()
    # Format the time as a string
    time_str = now.strftime("%Y-%m-%d_%H-%M-%S")
    # Create the file name
    file_name = f"Z_{time_str}_NOTE.md"
    return file_name


if __name__ == "__main__":
    print("请拖入项目根目录下随便一个文件:\n")
    root_path = get_root_path(sys.argv[1])
    md_file_name = get_file_name()
    md_file_path = os.path.join(root_path, md_file_name)
    file_path_list = get_deep_dirs_fast(root_path)

    for i, file_path in enumerate(file_path_list):
        print(i, "->", get_md_title_path(file_path, root_path))
        readcode_writemd(file_path, root_path, md_file_path)

    print("=============done=============")

    # os.system('open '+root_path)
posted on 2024-01-17 07:07 超级无敌美少男战士阅读(7) 评论(0) 编辑收藏举报
刷新页面返回顶部
怪物奇妙物语

convert code 2 markdown by python