"""convert code to markdown
"""
import os
import re
from datetime import datetime
# 需要过滤的文件夹
exclude_dirs = [
"__pycache__",
"venv",
"build",
"dist",
"node_modules",
"public",
"LICENSE",
"assets",
"vendor",
"tmp",
"static",
"templates",
"bin",
"obj",
"Migrations",
"Properties",
"packages", # dotnet 的本地依赖包
]
# 需要过滤文件后缀
exclude_files = [
"_NOTE.md",
".d.ts",
".lock",
".png",
".woff2",
".ttf",
".woff",
".css",
"README.md",
".toml",
"swagger-ui-bundle.js",
"-lock.json",
"zz_code2md.py",
"temp.md",
]
# 需要保留的文件
include_exts = [
".py",
".vue",
".js",
".ts",
".html",
".go",
".mod",
".json",
".txt",
".sh",
".command",
".cs",
"csproj",
".jsx",
".sln",
".sh",
".bat",
]
#
md_suffix_table = {"command": "sh", "csproj": "xml"}
class CodeToMarkDown:
"""_summary_"""
__slots__ = ["path", "md_path", "code_file_path"]
def __init__(self, path: str = None) -> None:
if path:
self.path = path
else:
self.path = os.getcwd()
def generate_md(self):
self.__generate_md_file_path()
self.__collect_code_files()
self.__generate_md_file()
def __generate_md_file_path(self):
cur_time_str = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
md_name = f"Z_{cur_time_str}_NOTE.md"
self.md_path = os.path.join(self.path, md_name)
def __collect_code_files(self):
"""_summary_
Returns:
_type_: _description_
"""
self.code_file_path = []
for root, dirs, files in os.walk(self.path):
# 过滤不符合的文件夹------------------------------------------------------------------------
dirs[:] = [d for d in dirs if not d.startswith(".") and not any(ex in d for ex in exclude_dirs)]
# 过滤不符合的文件-----------------------------------------------------------------------------
files[:] = [f for f in files if not f.startswith(".") and not any(ex in f for ex in exclude_files)]
# 筛选所有符合后缀的文件------------------------------------------------------------------------
for file in files:
# 正向过滤含有(\.py|vue|js|ts)$ 结尾的文件
if any(file.endswith(ext) for ext in include_exts):
self.code_file_path.append(os.path.join(root, file))
def __generate_md_file(self):
for i, code_file_path in enumerate(self.code_file_path):
print(i + 1, "->", self.__get_md_title_level_one(code_file_path))
self.__readcode_writemd(code_file_path)
def __get_md_title_level_one(self, code_file_path):
"""获取每个代码文件的md标题,去掉项目之前的文件路径
Args:
code_file_path (_type_): 代码路径
project_path (_type_): 项目根路径
Returns:
_type_: 每个代码文件的md标题
"""
# Get the common prefix of the two paths
common_prefix = os.path.commonprefix([code_file_path, self.path])
# Get the different parts of the two paths
diff1 = code_file_path[len(common_prefix) + 1 :]
md_title = os.path.join(os.path.basename(self.path), diff1)
return md_title
def __readcode_writemd(self, code_file_path):
"""_summary_
Args:
code_file_path (_type_): _description_
"""
with open(code_file_path, "r", encoding="utf-8") as f: # 打开文件
try:
content = f.read()
except Exception as e:
print(f"{code_file_path}{e}文件编码读取错误")
content = ""
self.__write2md(content, code_file_path)
def __write2md(
self,
content,
code_file_path,
):
"""_summary_
Args:
content (_type_): _description_
suffix (_type_): _description_
code_file_path (_type_): _description_
"""
with open(self.md_path, "a", encoding="utf-8") as f: # 打开文件
md_title_level_one = self.__get_md_title_level_one(code_file_path)
code_label = self.__get_code_md_lable_by_suffix(code_file_path)
f.write("\n")
f.write(f"# `{md_title_level_one}`\n\n")
f.write(f"```{code_label}\n")
f.write(content)
f.write("\n")
f.write("```\n\n\n")
def __get_code_md_lable_by_suffix(self, code_file_path):
suffix = re.findall(r'\.[^.\\/:*?"<>|\r\n]+$', code_file_path)
if len(suffix):
suffix = suffix[0][1:]
if md_suffix_table.get(suffix) is not None:
return md_suffix_table.get(suffix)
return suffix
if __name__ == "__main__":
print("====================start====================")
root_path = """F:\\song\\dotnet_efcore_two_database_learn\\SqliteToOracle"""
code2md = CodeToMarkDown(root_path)
code2md.generate_md()
print("====================done====================")