md2code code2md

"""convert code to markdown
"""
import asyncio
import datetime
import os
import platform
import re
import shutil
import xmlrpc.client


class ST:
    # <xxxxxxxxx>
    # <xxxxxxxxx>
    # <xxxxxxxxx>
    # <xxxxxxxxx>
    pass


class MarkdownToCnblog:
    """_summary_
    Returns:
        _type_: _description_
    """

    __slots__ = [
        "path",
        "base_dir",
        "raw_md",
        "images",
        "net_images",
        "raw_md",
        "new_md",
        "mime_mapping",
        "server",
        "blog_title",
        "blog_tags",
        "img_format",
        "gen_network_file",
        "publish",
    ]

    def __init__(self, path: str) -> None:
        self.path = path
        self.base_dir = os.path.dirname(path)
        self.net_images = []
        self.mime_mapping = {
            ".ico": "image/x-icon",
            ".jpg": "image/jpeg",
            ".jpe": "image/jpeg",
            ".jpeg": "image/jpeg",
            ".png": "image/png",
        }
        self.img_format = "typora"
        self.publish = True
        self.gen_network_file = False
        try:
            self.server = xmlrpc.client.ServerProxy(ST.blog_url)
        except Exception as ex:
            if "unsupported XML-RPC protocol" in str(ex):
                print("请查看config.yaml文件中的blog_url,应该是这个URL地址没设置对")

    def upload_markdown(self):
        self.__read_markdown()
        self.__find_md_img()
        self.__upload_img()
        self.__get_title_and_tag()
        self.__upload_blog()

    def __get_title_and_tag(self):
        res = re.findall(re.compile(r"<!--(.*)-->"), self.raw_md)
        if len(res) > 0:
            self.blog_title = res[0].strip()
            self.blog_tags = res[1].strip()

    def __upload_blog(self):
        post = dict(description=self.new_md, title=self.blog_title, categories=["[Markdown]"], mt_keywords=self.blog_tags)
        try:
            self.server.metaWeblog.newPost(ST.blog_id, ST.username, ST.password, post, self.publish)
            print(f"===========上传成功===============")
        except Exception as ex:
            print(f"上传失败,相同博文已存在{ex}")

    def __upload_img(self):
        """upload img"""
        if self.images:  # 有本地图片，异步上传
            tasks = []
            for i, li in enumerate(self.images):
                if li.startswith("/"):  # 本地图片
                    li = "." + li
                image_full_path = os.path.join(self.base_dir, li)
                if not os.path.exists(image_full_path):
                    print(f"图片{image_full_path}不存在")
                    continue
                task = asyncio.ensure_future(self.__upload_md_img(image_full_path))
                task.add_done_callback(lambda futrue, idx=i, img_path=image_full_path: self.get_image_url(futrue, idx, img_path))
                tasks.append(task)
            if len(tasks) == 0:
                return
            loop = asyncio.get_event_loop()
            loop.run_until_complete(asyncio.wait(tasks))
            loop.close()
            image_mapping = dict(zip(self.images, self.net_images))
            self.new_md = self.replace_md_img(image_mapping)

    def get_image_url(self, t: asyncio.Task, idx, img_path):
        """回调，获取url"""
        img_url = t.result()
        print(f"{idx}->\t{img_path}\t上传成功,URL:{img_url}")
        self.net_images.append(img_url)

    async def __upload_md_img(self, path):
        """上传图片"""
        await asyncio.sleep(2)
        name = os.path.basename(path)
        _, suffix = os.path.splitext(name)
        with open(path, "rb") as f:
            file = {"bits": f.read(), "name": name, "type": self.mime_mapping[suffix]}
            url = self.server.metaWeblog.newMediaObject(ST.blog_id, ST.username, ST.password, file)
            return url.get("url")

    def __find_md_img(self):
        """查找markdown中的图片，排除网络图片(不用上传)"""
        images = re.findall("!\\[.*?\\]\\((.*)\\)", self.raw_md)
        images += re.findall('<img src="(.*?)"', self.raw_md)
        self.images = [i for i in images if not re.match("((http(s?))|(ftp))://.*", i)]
        print(f"共找到{len(images)}张本地图片{images}")

    def __read_markdown(self):
        with open(self.path, "r", encoding="utf-8") as f:
            self.raw_md = f.read()
            self.new_md = self.raw_md

    def replace_md_img(self, img_mapping):
        """替换markdown中的图片链接"""
        with open(self.path, "r", encoding="utf-8") as fr:
            md = fr.read()
            for local, net in img_mapping.items():  # 替换图片链接
                md = md.replace(local, net)
            if self.img_format:
                md_links = re.findall("!\\[.*?\\]\\(.*?\\)", md)
                md_links += re.findall("<img src=.*/>", md)
                for ml in md_links:
                    img_url = re.findall("!\\[.*?\\]\\((.*?)\\)", ml)
                    img_url += re.findall('<img src="(.*?)"', ml)
                    img_url = img_url[0]

                    if self.img_format == "typora":
                        zoom = re.findall(r'style="zoom:(.*)%;"', ml)
                        if zoom:
                            md = md.replace(ml, f'<center><img src="{img_url}"  style="width:{zoom[0]}%;" /></center>')
                    else:
                        md = md.replace(ml, self.img_format.format(img_url))

            if self.gen_network_file:
                path_net = os.path.join(os.path.dirname(self.path), "_network".join(os.path.splitext(os.path.basename(self.path))))
                with open(path_net, "w", encoding="utf-8") as fw:
                    fw.write(md)
                    print(f"图片链接替换完成，生成新markdown:{path_net}")
            return md


class CodeToMarkDown:
    """_summary_"""

    __slots__ = ["path", "md_path", "code_file_path", "exclude_dirs", "exclude_files", "md_suffix_table", "include_exts", "key_work_filter_list"]

    def __init__(self, path: str = None) -> None:
        if path:
            self.path = path
        else:
            self.path = os.getcwd()
        # 需要排除的目录
        self.exclude_dirs = [
            "__pycache__",
            "venv",
            "build",
            "dist",
            "node_modules",
            "public",
            "LICENSE",
            "assets",
            "vendor",
            "tmp",
            "static",
            "templates",
            "bin",
            "obj",
            "Migrations",
            "Properties",
            "packages",
        ]
        # 排除的文件的后缀
        self.exclude_files = [
            "_NOTE.md",
            ".d.ts",
            ".lock",
            ".png",
            ".woff2",
            ".ttf",
            ".woff",
            ".css",
            "README.md",
            ".toml",
            "swagger-ui-bundle.js",
            "-lock.json",
            "zz_code2md.py",
            "zz.py",
            "temp.md",
        ]
        # 文件后缀名对应的 md code代码提示名
        self.md_suffix_table = {"command": "sh", "csproj": "xml"}
        # 需要包含的文件后缀名
        self.include_exts = [
            ".py",
            ".vue",
            ".js",
            ".ts",
            ".html",
            ".go",
            ".mod",
            ".json",
            ".txt",
            ".sh",
            ".command",
            ".cs",
            "csproj",
            ".jsx",
            ".sln",
            ".sh",
            ".bat",
        ]
        # 需要过滤的文件名的后缀
        self.key_work_filter_list = [""]

    def generate_md(self):
        self.__generate_md_file_path()
        self.__collect_code_files()
        self.__generate_md_file()

    def __generate_md_file_path(self):
        cur_time_str = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
        md_name = f"Z_{cur_time_str}_NOTE.md"
        self.md_path = os.path.join(self.path, md_name)

    def __collect_code_files(self):
        """_summary_
        Returns:
            _type_: _description_
        """
        self.code_file_path = []
        for root, dirs, files in os.walk(self.path):
            # 过滤不符合的文件夹------------------------------------------------------------------------
            dirs[:] = [d for d in dirs if not d.startswith(".") and not any(ex in d for ex in self.exclude_dirs)]
            # 过滤不符合的文件-----------------------------------------------------------------------------
            files[:] = [f for f in files if not f.startswith(".") and not any(ex in f for ex in self.exclude_files)]
            # 筛选所有符合后缀的文件------------------------------------------------------------------------
            for file in files:
                # 正向过滤含有(\.py|vue|js|ts)$ 结尾的文件
                if any(file.endswith(ext) for ext in self.include_exts):
                    self.code_file_path.append(os.path.join(root, file))

    def __generate_md_file(self):
        for i, code_file_path in enumerate(self.code_file_path):
            print(i + 1, "->", self.__get_md_title_level_one(code_file_path))
            self.__readcode_writemd(code_file_path)

    def __get_md_title_level_one(self, code_file_path):
        """获取每个代码文件的md标题,去掉项目之前的文件路径
        Args:
            code_file_path (_type_): 代码路径
            project_path (_type_): 项目根路径
        Returns:
            _type_: 每个代码文件的md标题
        """
        # Get the common prefix of the two paths
        common_prefix = os.path.commonprefix([code_file_path, self.path])
        # Get the different parts of the two paths
        diff1 = code_file_path[len(common_prefix) + 1 :]
        md_title = os.path.join(os.path.basename(self.path), diff1)
        return md_title

    def __readcode_writemd(self, code_file_path):
        """_summary_
        Args:
            code_file_path (_type_): _description_
        """
        with open(code_file_path, "r", encoding="utf-8") as f:  # 打开文件
            try:
                content = f.read()
            except Exception as e:
                print(f"{code_file_path}{e}文件编码读取错误")
                content = ""
            self.__write2md(content, code_file_path)

    def __write2md(
        self,
        content,
        code_file_path,
    ):
        """_summary_
        Args:
            content (_type_): _description_
            suffix (_type_): _description_
            code_file_path (_type_): _description_
        """
        with open(self.md_path, "a", encoding="utf-8") as f:  # 打开文件
            md_title_level_one = self.__get_md_title_level_one(code_file_path)
            code_label = self.__get_code_md_lable_by_suffix(code_file_path)
            f.write("\n")
            f.write(f"# `{md_title_level_one}`\n\n")
            f.write(f"```{code_label}\n")
            f.write(content)
            f.write("\n")
            f.write("```\n\n\n")

    def __get_code_md_lable_by_suffix(self, code_file_path):
        suffix = re.findall(r'\.[^.\\/:*?"<>|\r\n]+$', code_file_path)
        if len(suffix):
            suffix = suffix[0][1:]
        if self.md_suffix_table.get(suffix) is not None:
            return self.md_suffix_table.get(suffix)
        return suffix


class MarkdownToCode:
    """_summary_
    Returns:
        _type_: _description_
    """

    __slots__ = ["path", "base_dir"]

    def __init__(self, path: str = None) -> None:
        if path:
            self.path = path
        else:
            self.path = self.__get_latest_md_file_path()

    def __get_latest_md_file_path(self):
        dst_md_files = []
        for root, _, files in os.walk(os.getcwd()):
            for file in files:
                if file.endswith("_NOTE.md"):
                    dst_md_files.append(os.path.join(root, file))

        return sorted(dst_md_files).pop()

    def generate_code(self):
        self.__set_base_dir()
        self.__read_md_file()

    def __read_md_file(self):
        """_summary_"""
        with open(self.path, "r", encoding="utf-8") as f:  # 打开文件
            md_text = f.read()
        # Match the first-level headings and code blocks
        # \n{1,}# `(.+)`\n{1,}```\w{2,5}\n{1,}
        pattern = r"^# `(.+)`\n{1,}```(?:\w{2,}\n)([\s\S]+?)\n{1,}```\n{1,}"
        matches = re.findall(pattern, md_text, re.MULTILINE)
        # Loop over the matches
        for i, (file_path, code) in enumerate(matches):
            print(f"{i}->", file_path)
            self.__create_from_file_path(file_path, code)

    def __set_base_dir(self):
        self.base_dir = os.path.dirname(self.path)

    def __create_from_file_path(self, file_path, content):
        """_summary_
        Args:
            file_path (_type_): _description_
            content (_type_): _description_
        """
        dir_path = os.path.join(self.base_dir, os.path.dirname(file_path))
        os.makedirs(dir_path, exist_ok=True)
        full_file_path = os.path.join(self.base_dir, file_path)
        with open(full_file_path, "w", encoding="utf-8") as f:
            f.write(content)


class CollectMarkdownNote:
    """_summary_"""

    __slots__ = [
        "path",
        "md_path",
        "cur_file_name",
        "cur_file_new_name",
        "md_files",
        "dotnet_md_files",
        "python_md_files",
        "dotnet_line_number_start",
        "dotnet_split_flag",
        "dotnet_file_names",
        "python_split_flag",
        "current_new_md_file_name",
        "save_md_file_heads",
        "split_flag",
        "other_split_flag",
        "other_md_files",
    ]

    def __init__(self, path: str = None) -> None:
        if path:
            self.path = path
        else:
            self.path = os.getcwd()

        self.dotnet_line_number_start = []

        self.dotnet_file_names = []
        self.current_new_md_file_name = None

        # 需要保留的markdown文件的开头
        # self.save_md_file_heads = ["zz_dotnet", "zz_python", "zz_csharp", "zz_note", "zz_"]
        self.save_md_file_heads = ["zz_"]
        # 需要需要的markdown文件,zz_开头,不在zz_note里面的markdown文件
        self.md_files = []
        #  需要收集的关于dotent的md文件,zz_开头,不在zz_note里面
        self.dotnet_md_files = []
        #  需要收集的关于python的md文件,zz_开头,不在zz_note里面
        self.python_md_files = []
        #  需要收集的关于python的md文件,zz_开头,不在zz_note里面
        self.other_md_files = []
        # 即将生成的python文件的文件名,就是当前文件的文件夹,加上时间戳的后缀
        self.cur_file_new_name = None
        # 当前这个python脚本的文件名
        self.cur_file_name = os.path.basename(__file__)

    def collect_markdown(self):
        """收集当前项目中的markdown笔记"""
        self.__get_new_python_file_name()
        self.__get_cur_markdown_notes()
        self.__read_cur_markdown_notes()
        self.__generate_new_python_file()

    def __write2md(self, line):
        # 如我文件名不为空,代表可以写入,同时要过滤到python中的注释符号,防止python脚本出现错误
        if (self.current_new_md_file_name is not None) and line != '"""\n' and line != '"""' and line != '\n"""':
            nf = open(self.current_new_md_file_name, "a+", encoding="utf-8")
            nf.write(line)
            nf.close()

    def un_collect_markdown(self):
        """doc"""
        f = open(self.cur_file_name, "r", encoding="utf-8")
        for i, line in enumerate(f):
            self.__write2md(line)
            res = re.findall(re.compile(r"^\"\"\"#==(.{1,10})=="), line)
            if len(res) > 0:
                self.current_new_md_file_name = self.__get_output_md_file_name_by_line(line)
                print(i + 1, "->", self.current_new_md_file_name, "start")
            if line.startswith('"""\n'):
                print(i + 1, "->", self.current_new_md_file_name, "end")
                self.current_new_md_file_name = None

    def __get_output_md_file_name_by_line(self, line):
        # 输出zz_note的子文件夹的名称,从"""==python==提取
        dir_name = "other"
        name = "other"
        pattern = re.compile(r"^\"\"\"#==(.{1,10})==(.+).md")
        res = pattern.match(line)
        if res:
            dir_name = res.groups()[0]
            name = res.groups()[1]

        # 生成markdown文件的后缀名,防止冲突
        cur_time_str = str(datetime.datetime.now().strftime("%Y_%m_%d_%H_%M_%S"))
        # 生成的markdown文件的文件夹,zz_note/python,或者给zz_dotnet,取决于什么类型的文件
        dir_path = os.path.join("zz_note", dir_name)
        os.makedirs(dir_path, exist_ok=True)

        return "zz_note/" + dir_name + "/" + name + "_" + cur_time_str + ".md"

    def __read_cur_markdown_notes(self):
        """_summary_"""

        for md_file in self.md_files:
            if "dotnet" in md_file:
                self.dotnet_md_files.append(md_file)
            elif "python" in md_file:
                self.python_md_files.append(md_file)
            elif "other" in md_file:
                self.other_md_files.append(md_file)

    def __get_cur_markdown_notes(self):
        """_summary_"""
        print("开始收集,所以以zz_开头的,不在zz_note文件夹中的markdown文件")
        pattern = re.compile(r"zz_(\w+)_")

        for root, dirs, files in os.walk(self.path):
            dirs[:] = [d for d in dirs if "zz_note" not in d]
            # 过滤符合的文件-----------------------------------------------------------------------------
            files[:] = [f for f in files if not f.startswith(".") and any(ex in f for ex in [".md"])]
            # 筛选所有符合后缀的文件------------------------------------------------------------------------
            for i, file in enumerate(files):
                res = pattern.findall(file)
                if len(res) > 0:
                    print(i + 1, "->", file)
                    self.md_files.append(os.path.join(root, file))

    def __get_new_python_file_name(self):
        cur_time_str = str(datetime.datetime.now().strftime("%Y_%m_%d_%H_%M_%S"))
        file_names = self.cur_file_name.split("_")
        if len(file_names) > 1:
            self.cur_file_new_name = f"{os.path.splitext(file_names[0])[0]}_{cur_time_str}.py"
        else:
            self.cur_file_new_name = f"{os.path.splitext(self.cur_file_name)[0]}_{cur_time_str}.py"

    def __get_cur_file_name(self, md_file_path):
        return os.path.basename(md_file_path)

    def __generate_new_python_file(self):
        """_summary_"""
        f = open(self.cur_file_name, "r", encoding="utf-8")
        for line in f:
            # 忽略需要隐藏的行
            pattern = re.compile(r"# {0,1}@ignore")
            res = pattern.findall(line)
            if len(res) > 0:
                line = "# <xxxxxxxxx>\n"

            nf = open(self.cur_file_new_name, "a+", encoding="utf-8")
            nf.write(line)

        for md_file in self.md_files:
            split_flag = "other"
            pattern = re.compile(r"zz_(\w+)_")
            res = pattern.findall(md_file)
            if len(res) > 0:
                split_flag = res[0]

            md_title = self.__get_cur_file_name(md_file)
            content = self.__read_md_file(md_file)
            nf.write(f'\n\n"""#=={split_flag}=={md_title}\n\n')
            nf.write(self.__filter_python_comment(content))
            nf.write('\n"""\n\n')

        f.close()
        nf.close()
        # 备份新生成的python文件
        self.__copy_current_python_file_to_dst_dir(self.cur_file_new_name)

    def __copy_current_python_file_to_dst_dir(self, current_python_file_name):
        """备份新生成的python文件,到特定的文件夹

        Args:
            current_python_file_name (_type_): _description_
        """
        system = platform.system()
        if system == "Windows":
            dir_path = r"D:/zz"
            os.makedirs(dir_path, exist_ok=True)
            shutil.copy(current_python_file_name, os.path.join(dir_path, current_python_file_name))
        elif system == "Darwin":
            dir_path = r"/Users/song/Code/zz_note"
            os.makedirs(dir_path, exist_ok=True)
            shutil.copy(current_python_file_name, os.path.join(dir_path, current_python_file_name))
        else:
            print("Unknown system")

    def __filter_python_comment(self, content: str):
        return content.replace('"""', "")

    def __read_md_file(self, file):
        with open(file, "r", encoding="utf-8") as f:
            content = f.read()
            return content


def backup_current_project():
    """_summary_"""
    src_dir = os.path.dirname(__file__)
    cur_time_str = str(datetime.datetime.now().strftime("%Y%m%d_%H%M%S"))
    compress_filename = f"{os.path.basename(src_dir)}_{cur_time_str}"
    dst_dir = os.path.join(
        os.path.dirname(os.path.dirname(__file__)),
        compress_filename,
    )
    shutil.copytree(src_dir, dst_dir)


if __name__ == "__main__":
    print("===============================start===============================")
    print("1. Project Convert to Markdown    2. Markdown Convert to Project")
    print("3. Collect Markdown Notes         4. Uncollect Markdown Notes")
    print("5. Backup the project             6. Upload a markdown file ")
    try:
        option_number = int(input("Please input a number: "))
    except Exception as e:
        option_number = 0
    print("您输入的整数是:", option_number)
    if option_number == 1:
        code2md = CodeToMarkDown()
        code2md.generate_md()
    elif option_number == 2:
        md_path = input("请输入需要转换的markdown文件路径(默认使用当前路径最新的markdown文件) : ")
        md2code = MarkdownToCode(md_path)
        md2code.generate_code()
    elif option_number == 3:
        collect_md = CollectMarkdownNote()
        collect_md.collect_markdown()
    elif option_number == 4:
        is_del = input("是否要删除现有的文件夹(Y/N):")
        if is_del == "N" or is_del == "n":
            pass
        else:
            try:
                res = shutil.rmtree("zz_note")
                print("zz_note文件夹,删除成功")
            except FileNotFoundError as e:
                print("no such directory,zz_note")
        collect_md = CollectMarkdownNote()
        collect_md.un_collect_markdown()
    elif option_number == 5:
        backup_current_project()
    elif option_number == 6:
        md_path = input("请输入需要上传的markdown文件路径: ")
        md2cnblog = MarkdownToCnblog(os.path.abspath(md_path))
        md2cnblog.upload_markdown()
    else:
        print("unknown option")
    print("===============================done===============================")
posted on 2024-01-21 13:35 Song的学习笔记阅读(36) 评论(0) 收藏举报
刷新页面返回顶部