code2md md2code
"""convert code to markdown
"""
import datetime
import os
import re
class CodeToMarkDown:
"""_summary_"""
__slots__ = ["path", "md_path", "code_file_path", "exclude_dirs", "exclude_files", "md_suffix_table", "include_exts", "key_work_filter_list"]
def __init__(self, path: str = None) -> None:
if path:
self.path = path
else:
self.path = os.getcwd()
# 需要排除的目录
self.exclude_dirs = [
"__pycache__",
"venv",
"build",
"dist",
"node_modules",
"public",
"LICENSE",
"assets",
"vendor",
"tmp",
"static",
"templates",
"bin",
"obj",
"Migrations",
"Properties",
"packages",
]
# 排除的文件的后缀
self.exclude_files = [
"_NOTE.md",
".d.ts",
".lock",
".png",
".woff2",
".ttf",
".woff",
".css",
"README.md",
".toml",
"swagger-ui-bundle.js",
"-lock.json",
"zz_code2md.py",
"temp.md",
]
# 文件后缀名对应的 md code代码提示名
self.md_suffix_table = {"command": "sh", "csproj": "xml"}
# 需要包含的文件后缀名
self.include_exts = [
".py",
".vue",
".js",
".ts",
".html",
".go",
".mod",
".json",
".txt",
".sh",
".command",
".cs",
"csproj",
".jsx",
".sln",
".sh",
".bat",
]
# 需要过滤的文件名的后缀
self.key_work_filter_list = [""]
def generate_md(self):
self.__generate_md_file_path()
self.__collect_code_files()
self.__generate_md_file()
def __generate_md_file_path(self):
cur_time_str = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
md_name = f"Z_{cur_time_str}_NOTE.md"
self.md_path = os.path.join(self.path, md_name)
def __collect_code_files(self):
"""_summary_
Returns:
_type_: _description_
"""
self.code_file_path = []
for root, dirs, files in os.walk(self.path):
# 过滤不符合的文件夹------------------------------------------------------------------------
dirs[:] = [d for d in dirs if not d.startswith(".") and not any(ex in d for ex in self.exclude_dirs)]
# 过滤不符合的文件-----------------------------------------------------------------------------
files[:] = [f for f in files if not f.startswith(".") and not any(ex in f for ex in self.exclude_files)]
# 筛选所有符合后缀的文件------------------------------------------------------------------------
for file in files:
# 正向过滤含有(\.py|vue|js|ts)$ 结尾的文件
if any(file.endswith(ext) for ext in self.include_exts):
self.code_file_path.append(os.path.join(root, file))
def __generate_md_file(self):
for i, code_file_path in enumerate(self.code_file_path):
print(i + 1, "->", self.__get_md_title_level_one(code_file_path))
self.__readcode_writemd(code_file_path)
def __get_md_title_level_one(self, code_file_path):
"""获取每个代码文件的md标题,去掉项目之前的文件路径
Args:
code_file_path (_type_): 代码路径
project_path (_type_): 项目根路径
Returns:
_type_: 每个代码文件的md标题
"""
# Get the common prefix of the two paths
common_prefix = os.path.commonprefix([code_file_path, self.path])
# Get the different parts of the two paths
diff1 = code_file_path[len(common_prefix) + 1 :]
md_title = os.path.join(os.path.basename(self.path), diff1)
return md_title
def __readcode_writemd(self, code_file_path):
"""_summary_
Args:
code_file_path (_type_): _description_
"""
with open(code_file_path, "r", encoding="utf-8") as f: # 打开文件
try:
content = f.read()
except Exception as e:
print(f"{code_file_path}{e}文件编码读取错误")
content = ""
self.__write2md(content, code_file_path)
def __write2md(
self,
content,
code_file_path,
):
"""_summary_
Args:
content (_type_): _description_
suffix (_type_): _description_
code_file_path (_type_): _description_
"""
with open(self.md_path, "a", encoding="utf-8") as f: # 打开文件
md_title_level_one = self.__get_md_title_level_one(code_file_path)
code_label = self.__get_code_md_lable_by_suffix(code_file_path)
f.write("\n")
f.write(f"# `{md_title_level_one}`\n\n")
f.write(f"```{code_label}\n")
f.write(content)
f.write("\n")
f.write("```\n\n\n")
def __get_code_md_lable_by_suffix(self, code_file_path):
suffix = re.findall(r'\.[^.\\/:*?"<>|\r\n]+$', code_file_path)
if len(suffix):
suffix = suffix[0][1:]
if self.md_suffix_table.get(suffix) is not None:
return self.md_suffix_table.get(suffix)
return suffix
class MarkdownToCode:
"""_summary_
Returns:
_type_: _description_
"""
__slots__ = ["path", "base_dir"]
def __init__(self, path: str = None) -> None:
if path:
self.path = path
else:
self.path = self.__get_latest_md_file_path()
def __get_latest_md_file_path(self):
dst_md_files = []
for root, _, files in os.walk(os.getcwd()):
for file in files:
if file.endswith("_NOTE.md"):
dst_md_files.append(os.path.join(root, file))
return sorted(dst_md_files).pop()
def generate_code(self):
self.__set_base_dir()
self.__read_md_file()
def __read_md_file(self):
"""_summary_"""
with open(self.path, "r", encoding="utf-8") as f: # 打开文件
md_text = f.read()
# Match the first-level headings and code blocks
# \n{1,}# `(.+)`\n{1,}```\w{2,5}\n{1,}
pattern = r"^# `(.+)`\n{1,}```(?:\w{2,}\n)([\s\S]+?)\n{1,}```\n{1,}"
matches = re.findall(pattern, md_text, re.MULTILINE)
# Loop over the matches
for i, (file_path, code) in enumerate(matches):
print(f"{i}->", file_path)
self.__create_from_file_path(file_path, code)
def __set_base_dir(self):
self.base_dir = os.path.dirname(self.path)
def __create_from_file_path(self, file_path, content):
"""_summary_
Args:
file_path (_type_): _description_
content (_type_): _description_
"""
dir_path = os.path.join(self.base_dir, os.path.dirname(file_path))
os.makedirs(dir_path, exist_ok=True)
full_file_path = os.path.join(self.base_dir, file_path)
with open(full_file_path, "w", encoding="utf-8") as f:
f.write(content)
class CollectMarkdownNote:
"""_summary_"""
__slots__ = [
"path",
"md_path",
"cur_file_name",
"cur_file_new_name",
"md_files",
"dotnet_md_files",
"python_md_files",
"dotnet_line_number_start",
"dotnet_split_flag",
"dotnet_file_names",
"python_split_flag",
"current_new_md_file_name",
"save_md_file_heads",
]
def __init__(self, path: str = None) -> None:
if path:
self.path = path
else:
self.path = os.getcwd()
self.dotnet_line_number_start = []
self.dotnet_split_flag = "#==dotnet=="
self.python_split_flag = "#==python=="
self.dotnet_file_names = []
self.current_new_md_file_name = None
# 需要保留的markdown文件的开头
# self.save_md_file_heads = ["z_dotnet", "z_python", "z_csharp", "z_note", "z_"]
self.save_md_file_heads = ["z_"]
def collect_markdown(self):
self.__get_cur_python_file_name()
self.__get_new_python_file_name()
self.__get_cur_markdown_notes()
self.__read_cur_markdown_notes()
self.__generate_new_python_file()
def __write2md(self, line):
if self.current_new_md_file_name is not None and line != '"""':
nf = open(self.current_new_md_file_name, "a+", encoding="utf-8")
nf.write(line)
nf.close()
def un_collect_markdown(self):
"""doc"""
self.__get_cur_python_file_name()
f = open(self.cur_file_name, "r", encoding="utf-8")
for i, line in enumerate(f):
self.__write2md(line)
if line.startswith(f'"""{self.dotnet_split_flag}'):
self.current_new_md_file_name = self.__get_output_md_file_name_by_line(line)
print(i + 1, "->", self.current_new_md_file_name, "start")
if line.startswith('"""\n'):
print(i + 1, "->", self.current_new_md_file_name, "end")
self.current_new_md_file_name = None
def __get_output_md_file_name_by_line(self, line):
cur_time_str = str(datetime.datetime.now().strftime("%Y_%m_%d_%H_%M_%S"))
return line.split(self.dotnet_split_flag)[1].strip().split(".")[0] + "_" + cur_time_str + ".md"
def __read_cur_markdown_notes(self):
"""_summary_"""
self.dotnet_md_files = []
self.python_md_files = []
for md_file in self.md_files:
if "dotnet" in md_file:
self.dotnet_md_files.append(md_file)
elif "python" in md_file:
self.python_md_files.append(md_file)
def __get_cur_markdown_notes(self):
"""_summary_"""
self.md_files = []
for root, _, files in os.walk(self.path):
# 过滤符合的文件-----------------------------------------------------------------------------
files[:] = [f for f in files if not f.startswith(".") and any(ex in f for ex in [".md"])]
# 筛选所有符合后缀的文件------------------------------------------------------------------------
for file in files:
if any(file.startswith(ext) for ext in self.save_md_file_heads):
self.md_files.append(os.path.join(root, file))
def __get_new_python_file_name(self):
cur_time_str = str(datetime.datetime.now().strftime("%Y_%m_%d_%H_%M_%S"))
file_names = self.cur_file_name.split("_")
if len(file_names) > 1:
self.cur_file_new_name = f"{os.path.splitext(file_names[0])[0]}_{cur_time_str}.py"
else:
self.cur_file_new_name = f"{os.path.splitext(self.cur_file_name)[0]}_{cur_time_str}.py"
def __get_cur_file_name(self, md_file_path):
return os.path.basename(md_file_path)
def __generate_new_python_file(self):
"""_summary_"""
f = open(self.cur_file_name, "r", encoding="utf-8")
for line in f:
nf = open(self.cur_file_new_name, "a+", encoding="utf-8")
nf.write(line)
for md_file in self.dotnet_md_files:
md_title = self.__get_cur_file_name(md_file)
content = self.__read_md_file(md_file)
nf.write(f'\n\n"""{self.dotnet_split_flag}{md_title}\n\n')
nf.write(content)
nf.write('\n"""\n\n')
for md_file in self.python_md_files:
md_title = self.__get_cur_file_name(md_file)
content = self.__read_md_file(md_file)
nf.write(f'\n\n"""{self.python_split_flag}{md_title}\n\n')
nf.write(content)
nf.write('\n"""\n\n')
f.close()
nf.close()
def __read_md_file(self, file):
with open(file, "r", encoding="utf-8") as f:
content = f.read()
return content
def __get_cur_python_file_name(self):
file_path = __file__
self.cur_file_name = os.path.basename(file_path)
if __name__ == "__main__":
print("===============================start===============================")
print("1. Project Convert to Markdown 2. Markdown Convert to Project")
print("3. Collect Markdown Notes 4. Uncollect Markdown Notes")
try:
option_number = int(input("Please input a number: "))
except Exception as e:
option_number = 0
print("您输入的整数是:", option_number)
if option_number == 1:
code2md = CodeToMarkDown()
code2md.generate_md()
elif option_number == 2:
md_path = input("请输入需要转换的markdown文件路径(默认使用当前路径最新的markdown文件) : ")
md2code = MarkdownToCode(md_path)
md2code.generate_code()
elif option_number == 3:
collect_md = CollectMarkdownNote()
collect_md.collect_markdown()
elif option_number == 4:
collect_md = CollectMarkdownNote()
collect_md.un_collect_markdown()
else:
print("unknown option")
print("===============================done===============================")
# =============================================================================================================