code2markdown class

"""convert code to markdown

"""
import os
import re
from datetime import datetime

# 需要过滤的文件夹
exclude_dirs = [
    "__pycache__",
    "venv",
    "build",
    "dist",
    "node_modules",
    "public",
    "LICENSE",
    "assets",
    "vendor",
    "tmp",
    "static",
    "templates",
    "bin",
    "obj",
    "Migrations",
    "Properties",
    "packages",  # dotnet 的本地依赖包
]

# 需要过滤文件后缀
exclude_files = [
    "_NOTE.md",
    ".d.ts",
    ".lock",
    ".png",
    ".woff2",
    ".ttf",
    ".woff",
    ".css",
    "README.md",
    ".toml",
    "swagger-ui-bundle.js",
    "-lock.json",
    "zz_code2md.py",
    "temp.md",
]

# 需要保留的文件
include_exts = [
    ".py",
    ".vue",
    ".js",
    ".ts",
    ".html",
    ".go",
    ".mod",
    ".json",
    ".txt",
    ".sh",
    ".command",
    ".cs",
    "csproj",
    ".jsx",
    ".sln",
    ".sh",
    ".bat",
]

#
md_suffix_table = {"command": "sh", "csproj": "xml"}


class CodeToMarkDown:
    """_summary_"""

    __slots__ = ["path", "md_path", "code_file_path"]

    def __init__(self, path: str = None) -> None:
        if path:
            self.path = path
        else:
            self.path = os.getcwd()

    def generate_md(self):
        self.__generate_md_file_path()
        self.__collect_code_files()
        self.__generate_md_file()

    def __generate_md_file_path(self):
        cur_time_str = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
        md_name = f"Z_{cur_time_str}_NOTE.md"
        self.md_path = os.path.join(self.path, md_name)

    def __collect_code_files(self):
        """_summary_
        Returns:
            _type_: _description_
        """
        self.code_file_path = []
        for root, dirs, files in os.walk(self.path):
            # 过滤不符合的文件夹------------------------------------------------------------------------
            dirs[:] = [d for d in dirs if not d.startswith(".") and not any(ex in d for ex in exclude_dirs)]
            # 过滤不符合的文件-----------------------------------------------------------------------------
            files[:] = [f for f in files if not f.startswith(".") and not any(ex in f for ex in exclude_files)]
            # 筛选所有符合后缀的文件------------------------------------------------------------------------
            for file in files:
                # 正向过滤含有(\.py|vue|js|ts)$ 结尾的文件
                if any(file.endswith(ext) for ext in include_exts):
                    self.code_file_path.append(os.path.join(root, file))

    def __generate_md_file(self):
        for i, code_file_path in enumerate(self.code_file_path):
            print(i + 1, "->", self.__get_md_title_level_one(code_file_path))
            self.__readcode_writemd(code_file_path)

    def __get_md_title_level_one(self, code_file_path):
        """获取每个代码文件的md标题,去掉项目之前的文件路径
        Args:
            code_file_path (_type_): 代码路径
            project_path (_type_): 项目根路径
        Returns:
            _type_: 每个代码文件的md标题
        """
        # Get the common prefix of the two paths
        common_prefix = os.path.commonprefix([code_file_path, self.path])
        # Get the different parts of the two paths
        diff1 = code_file_path[len(common_prefix) + 1 :]
        md_title = os.path.join(os.path.basename(self.path), diff1)
        return md_title

    def __readcode_writemd(self, code_file_path):
        """_summary_
        Args:
            code_file_path (_type_): _description_
        """
        with open(code_file_path, "r", encoding="utf-8") as f:  # 打开文件
            try:
                content = f.read()
            except Exception as e:
                print(f"{code_file_path}{e}文件编码读取错误")
                content = ""
            self.__write2md(content, code_file_path)

    def __write2md(
        self,
        content,
        code_file_path,
    ):
        """_summary_
        Args:
            content (_type_): _description_
            suffix (_type_): _description_
            code_file_path (_type_): _description_
        """
        with open(self.md_path, "a", encoding="utf-8") as f:  # 打开文件
            md_title_level_one = self.__get_md_title_level_one(code_file_path)
            code_label = self.__get_code_md_lable_by_suffix(code_file_path)
            f.write("\n")
            f.write(f"# `{md_title_level_one}`\n\n")
            f.write(f"```{code_label}\n")
            f.write(content)
            f.write("\n")
            f.write("```\n\n\n")

    def __get_code_md_lable_by_suffix(self, code_file_path):
        suffix = re.findall(r'\.[^.\\/:*?"<>|\r\n]+$', code_file_path)
        if len(suffix):
            suffix = suffix[0][1:]
        if md_suffix_table.get(suffix) is not None:
            return md_suffix_table.get(suffix)
        return suffix


if __name__ == "__main__":
    print("====================start====================")
    root_path = """F:\\song\\dotnet_efcore_two_database_learn\\SqliteToOracle"""
    code2md = CodeToMarkDown(root_path)
    code2md.generate_md()
    print("====================done====================")
posted on 2024-01-18 20:23 超级无敌美少男战士阅读(4) 评论(0) 编辑收藏举报
刷新页面返回顶部
怪物奇妙物语