日常生活的交流与学习

首页 新随笔 联系 管理

code2md md2code

"""convert code to markdown
"""
import datetime
import os
import re


class CodeToMarkDown:
    """_summary_"""

    __slots__ = ["path", "md_path", "code_file_path", "exclude_dirs", "exclude_files", "md_suffix_table", "include_exts", "key_work_filter_list"]

    def __init__(self, path: str = None) -> None:
        if path:
            self.path = path
        else:
            self.path = os.getcwd()
        # 需要排除的目录
        self.exclude_dirs = [
            "__pycache__",
            "venv",
            "build",
            "dist",
            "node_modules",
            "public",
            "LICENSE",
            "assets",
            "vendor",
            "tmp",
            "static",
            "templates",
            "bin",
            "obj",
            "Migrations",
            "Properties",
            "packages",
        ]
        # 排除的文件的后缀
        self.exclude_files = [
            "_NOTE.md",
            ".d.ts",
            ".lock",
            ".png",
            ".woff2",
            ".ttf",
            ".woff",
            ".css",
            "README.md",
            ".toml",
            "swagger-ui-bundle.js",
            "-lock.json",
            "zz_code2md.py",
            "temp.md",
        ]
        # 文件后缀名对应的 md code代码提示名
        self.md_suffix_table = {"command": "sh", "csproj": "xml"}
        # 需要包含的文件后缀名
        self.include_exts = [
            ".py",
            ".vue",
            ".js",
            ".ts",
            ".html",
            ".go",
            ".mod",
            ".json",
            ".txt",
            ".sh",
            ".command",
            ".cs",
            "csproj",
            ".jsx",
            ".sln",
            ".sh",
            ".bat",
        ]
        # 需要过滤的文件名的后缀
        self.key_work_filter_list = [""]

    def generate_md(self):
        self.__generate_md_file_path()
        self.__collect_code_files()
        self.__generate_md_file()

    def __generate_md_file_path(self):
        cur_time_str = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
        md_name = f"Z_{cur_time_str}_NOTE.md"
        self.md_path = os.path.join(self.path, md_name)

    def __collect_code_files(self):
        """_summary_
        Returns:
            _type_: _description_
        """
        self.code_file_path = []
        for root, dirs, files in os.walk(self.path):
            # 过滤不符合的文件夹------------------------------------------------------------------------
            dirs[:] = [d for d in dirs if not d.startswith(".") and not any(ex in d for ex in self.exclude_dirs)]
            # 过滤不符合的文件-----------------------------------------------------------------------------
            files[:] = [f for f in files if not f.startswith(".") and not any(ex in f for ex in self.exclude_files)]
            # 筛选所有符合后缀的文件------------------------------------------------------------------------
            for file in files:
                # 正向过滤含有(\.py|vue|js|ts)$ 结尾的文件
                if any(file.endswith(ext) for ext in self.include_exts):
                    self.code_file_path.append(os.path.join(root, file))

    def __generate_md_file(self):
        for i, code_file_path in enumerate(self.code_file_path):
            print(i + 1, "->", self.__get_md_title_level_one(code_file_path))
            self.__readcode_writemd(code_file_path)

    def __get_md_title_level_one(self, code_file_path):
        """获取每个代码文件的md标题,去掉项目之前的文件路径
        Args:
            code_file_path (_type_): 代码路径
            project_path (_type_): 项目根路径
        Returns:
            _type_: 每个代码文件的md标题
        """
        # Get the common prefix of the two paths
        common_prefix = os.path.commonprefix([code_file_path, self.path])
        # Get the different parts of the two paths
        diff1 = code_file_path[len(common_prefix) + 1 :]
        md_title = os.path.join(os.path.basename(self.path), diff1)
        return md_title

    def __readcode_writemd(self, code_file_path):
        """_summary_
        Args:
            code_file_path (_type_): _description_
        """
        with open(code_file_path, "r", encoding="utf-8") as f:  # 打开文件
            try:
                content = f.read()
            except Exception as e:
                print(f"{code_file_path}{e}文件编码读取错误")
                content = ""
            self.__write2md(content, code_file_path)

    def __write2md(
        self,
        content,
        code_file_path,
    ):
        """_summary_
        Args:
            content (_type_): _description_
            suffix (_type_): _description_
            code_file_path (_type_): _description_
        """
        with open(self.md_path, "a", encoding="utf-8") as f:  # 打开文件
            md_title_level_one = self.__get_md_title_level_one(code_file_path)
            code_label = self.__get_code_md_lable_by_suffix(code_file_path)
            f.write("\n")
            f.write(f"# `{md_title_level_one}`\n\n")
            f.write(f"```{code_label}\n")
            f.write(content)
            f.write("\n")
            f.write("```\n\n\n")

    def __get_code_md_lable_by_suffix(self, code_file_path):
        suffix = re.findall(r'\.[^.\\/:*?"<>|\r\n]+$', code_file_path)
        if len(suffix):
            suffix = suffix[0][1:]
        if self.md_suffix_table.get(suffix) is not None:
            return self.md_suffix_table.get(suffix)
        return suffix


class MarkdownToCode:
    """_summary_
    Returns:
        _type_: _description_
    """

    __slots__ = ["path", "base_dir"]

    def __init__(self, path: str = None) -> None:
        if path:
            self.path = path
        else:
            self.path = self.__get_latest_md_file_path()

    def __get_latest_md_file_path(self):
        dst_md_files = []
        for root, _, files in os.walk(os.getcwd()):
            for file in files:
                if file.endswith("_NOTE.md"):
                    dst_md_files.append(os.path.join(root, file))

        return sorted(dst_md_files).pop()

    def generate_code(self):
        self.__set_base_dir()
        self.__read_md_file()

    def __read_md_file(self):
        """_summary_"""
        with open(self.path, "r", encoding="utf-8") as f:  # 打开文件
            md_text = f.read()
        # Match the first-level headings and code blocks
        # \n{1,}# `(.+)`\n{1,}```\w{2,5}\n{1,}
        pattern = r"^# `(.+)`\n{1,}```(?:\w{2,}\n)([\s\S]+?)\n{1,}```\n{1,}"
        matches = re.findall(pattern, md_text, re.MULTILINE)
        # Loop over the matches
        for i, (file_path, code) in enumerate(matches):
            print(f"{i}->", file_path)
            self.__create_from_file_path(file_path, code)

    def __set_base_dir(self):
        self.base_dir = os.path.dirname(self.path)

    def __create_from_file_path(self, file_path, content):
        """_summary_
        Args:
            file_path (_type_): _description_
            content (_type_): _description_
        """
        dir_path = os.path.join(self.base_dir, os.path.dirname(file_path))
        os.makedirs(dir_path, exist_ok=True)
        full_file_path = os.path.join(self.base_dir, file_path)
        with open(full_file_path, "w", encoding="utf-8") as f:
            f.write(content)


class CollectMarkdownNote:
    """_summary_"""

    __slots__ = [
        "path",
        "md_path",
        "cur_file_name",
        "cur_file_new_name",
        "md_files",
        "dotnet_md_files",
        "python_md_files",
        "dotnet_line_number_start",
        "dotnet_split_flag",
        "dotnet_file_names",
        "python_split_flag",
        "current_new_md_file_name",
        "save_md_file_heads",
    ]

    def __init__(self, path: str = None) -> None:
        if path:
            self.path = path
        else:
            self.path = os.getcwd()

        self.dotnet_line_number_start = []
        self.dotnet_split_flag = "#==dotnet=="
        self.python_split_flag = "#==python=="
        self.dotnet_file_names = []
        self.current_new_md_file_name = None
        # 需要保留的markdown文件的开头
        # self.save_md_file_heads = ["z_dotnet", "z_python", "z_csharp", "z_note", "z_"]
        self.save_md_file_heads = ["z_"]

    def collect_markdown(self):
        self.__get_cur_python_file_name()
        self.__get_new_python_file_name()
        self.__get_cur_markdown_notes()
        self.__read_cur_markdown_notes()
        self.__generate_new_python_file()

    def __write2md(self, line):
        if self.current_new_md_file_name is not None and line != '"""':
            nf = open(self.current_new_md_file_name, "a+", encoding="utf-8")
            nf.write(line)
            nf.close()

    def un_collect_markdown(self):
        """doc"""
        self.__get_cur_python_file_name()
        f = open(self.cur_file_name, "r", encoding="utf-8")
        for i, line in enumerate(f):
            self.__write2md(line)
            if line.startswith(f'"""{self.dotnet_split_flag}'):
                self.current_new_md_file_name = self.__get_output_md_file_name_by_line(line)
                print(i + 1, "->", self.current_new_md_file_name, "start")
            if line.startswith('"""\n'):
                print(i + 1, "->", self.current_new_md_file_name, "end")
                self.current_new_md_file_name = None

    def __get_output_md_file_name_by_line(self, line):
        cur_time_str = str(datetime.datetime.now().strftime("%Y_%m_%d_%H_%M_%S"))
        return line.split(self.dotnet_split_flag)[1].strip().split(".")[0] + "_" + cur_time_str + ".md"

    def __read_cur_markdown_notes(self):
        """_summary_"""
        self.dotnet_md_files = []
        self.python_md_files = []
        for md_file in self.md_files:
            if "dotnet" in md_file:
                self.dotnet_md_files.append(md_file)
            elif "python" in md_file:
                self.python_md_files.append(md_file)

    def __get_cur_markdown_notes(self):
        """_summary_"""
        self.md_files = []
        for root, _, files in os.walk(self.path):
            # 过滤符合的文件-----------------------------------------------------------------------------
            files[:] = [f for f in files if not f.startswith(".") and any(ex in f for ex in [".md"])]
            # 筛选所有符合后缀的文件------------------------------------------------------------------------
            for file in files:
                if any(file.startswith(ext) for ext in self.save_md_file_heads):
                    self.md_files.append(os.path.join(root, file))

    def __get_new_python_file_name(self):
        cur_time_str = str(datetime.datetime.now().strftime("%Y_%m_%d_%H_%M_%S"))
        file_names = self.cur_file_name.split("_")
        if len(file_names) > 1:
            self.cur_file_new_name = f"{os.path.splitext(file_names[0])[0]}_{cur_time_str}.py"
        else:
            self.cur_file_new_name = f"{os.path.splitext(self.cur_file_name)[0]}_{cur_time_str}.py"

    def __get_cur_file_name(self, md_file_path):
        return os.path.basename(md_file_path)

    def __generate_new_python_file(self):
        """_summary_"""
        f = open(self.cur_file_name, "r", encoding="utf-8")
        for line in f:
            nf = open(self.cur_file_new_name, "a+", encoding="utf-8")
            nf.write(line)

        for md_file in self.dotnet_md_files:
            md_title = self.__get_cur_file_name(md_file)
            content = self.__read_md_file(md_file)
            nf.write(f'\n\n"""{self.dotnet_split_flag}{md_title}\n\n')
            nf.write(content)
            nf.write('\n"""\n\n')

        for md_file in self.python_md_files:
            md_title = self.__get_cur_file_name(md_file)
            content = self.__read_md_file(md_file)
            nf.write(f'\n\n"""{self.python_split_flag}{md_title}\n\n')
            nf.write(content)
            nf.write('\n"""\n\n')

        f.close()
        nf.close()

    def __read_md_file(self, file):
        with open(file, "r", encoding="utf-8") as f:
            content = f.read()
            return content

    def __get_cur_python_file_name(self):
        file_path = __file__
        self.cur_file_name = os.path.basename(file_path)


if __name__ == "__main__":
    print("===============================start===============================")
    print("1. Project Convert to Markdown 2. Markdown Convert to Project")
    print("3. Collect Markdown Notes      4. Uncollect Markdown Notes")
    try:
        option_number = int(input("Please input a number: "))
    except Exception as e:
        option_number = 0
    print("您输入的整数是:", option_number)
    if option_number == 1:
        code2md = CodeToMarkDown()
        code2md.generate_md()
    elif option_number == 2:
        md_path = input("请输入需要转换的markdown文件路径(默认使用当前路径最新的markdown文件) : ")
        md2code = MarkdownToCode(md_path)
        md2code.generate_code()
    elif option_number == 3:
        collect_md = CollectMarkdownNote()
        collect_md.collect_markdown()
    elif option_number == 4:
        collect_md = CollectMarkdownNote()
        collect_md.un_collect_markdown()
    else:
        print("unknown option")
    print("===============================done===============================")

# =============================================================================================================
posted on 2024-01-20 00:02  lazycookie  阅读(12)  评论(0编辑  收藏  举报