convert code 2 markdown by python
"""convert code to markdown
"""
import os
import re
import sys
from datetime import datetime
# 需要过滤的文件夹
exclude_dirs = ["__pycache__", "venv", "build", "dist", "node_modules", "public", "LICENSE", "assets", "vendor", "tmp", "static", "templates", "bin", "obj", "Migrations", "Properties"]
# 需要过滤文件后缀
exclude_files = ["_NOTE.md", ".d.ts", ".lock", ".png", ".woff2", ".ttf", ".woff", ".css", "README.md", ".toml", "swagger-ui-bundle.js", "-lock.json"]
# 需要保留的文件
include_exts = [".py", ".vue", ".js", ".ts", ".html", ".go", ".mod", ".json", ".txt", ".sh", ".command", "cs", "csproj", ".jsx"]
#
md_suffix_table = {
"command": "sh",
}
def get_root_dir(dir_path):
file_list = os.listdir(dir_path)
path_list = []
root_file_list = []
for file in file_list:
print(file)
# 过滤隐藏文件
if file.startswith("."):
continue
# 过滤所有的文件
is_file = re.findall(r'\.[^.\\/:*?"<>|\r\n]+$', file)
if len(is_file):
# 反向过滤,后缀文件
res_abort = re.findall(re.compile(r"(d\.ts|config\.ts|-lock\.json)$"), file)
if res_abort:
continue
# 保留根文件夹的(\.py|vue|js|ts)$ 结尾的文件
res_save = re.findall(re.compile(r"(\.py|vue|config\.js|js|ts|html|txt|go|mod|json)$"), file)
if len(res_save):
root_file_list.append(file)
continue
# 过滤node_modules
res_abort = re.findall(re.compile(r"(__pycache__|venv|build|dist|node_modules|public|LICENSE)"), file)
if len(res_abort):
continue
# 拼接成路径
code_file_path = os.path.join(dir_path, file)
path_list.append(code_file_path)
return path_list, root_file_list
def get_deep_dirs(path):
file_path = []
for root, dirs, files in os.walk(path):
# 过滤不符合的文件夹------------------------------------------------------------------------
del_dir_index = []
for i, dir in enumerate(dirs):
# 过滤隐藏文件
if dir.startswith("."):
del_dir_index.append(i)
# 过滤掉所有不符合的文件夹
res_abort = re.findall(re.compile(r"(__pycache__|venv|build|dist|node_modules|public|LICENSE|assets|vendor|tmp|static|templates)"), dir)
if len(res_abort):
del_dir_index.append(i)
# 去重,排序,过滤文件夹
del_dir_index = list(set(del_dir_index))
del_dir_index.sort()
for counter, index in enumerate(del_dir_index):
index = index - counter
dirs.pop(index)
# 过滤不符合的文件-----------------------------------------------------------------------------
del_file_index = []
for i, file in enumerate(files):
# 过滤隐藏文件
# (\.gitignore)|(\.prettierrc)
if file.startswith("."):
del_file_index.append(i)
# 过滤掉所有不符合的文件
res_abort = re.findall(re.compile(r"(_NOTE\.md|\.d\.ts|\.lock|\.png|\.woff2|\.ttf|\.woff|\.css|README\.md|\.toml|swagger-ui-bundle.js|-lock\.json)$"), file)
if len(res_abort):
del_file_index.append(i)
# 去重排序,过滤文件
del_file_index = list(set(del_file_index))
del_file_index.sort()
for counter, index in enumerate(del_file_index):
index = index - counter
files.pop(index)
# 筛选所有符合后缀的文件------------------------------------------------------------------------
for file in files:
# 正向过滤含有(\.py|vue|js|ts)$ 结尾的文件
res_save = re.findall(re.compile(r"(\.py|vue|js|ts|html|go|mod|json)$"), file)
if len(res_save):
file_path.append(os.path.join(root, file))
return file_path
def get_deep_dirs_fast(path):
"""获取所有的代码文件的路径
Args:
path (_type_): 项目的根目录
Returns:
_type_: 所有的代码文件的路径,是个列表
"""
# global exclude_dirs
# global exclude_files
# global include_exts
code_file_path = []
for root, dirs, files in os.walk(path):
# 过滤不符合的文件夹------------------------------------------------------------------------
dirs[:] = [d for d in dirs if not d.startswith(".") and not any(ex in d for ex in exclude_dirs)]
# 过滤不符合的文件-----------------------------------------------------------------------------
files[:] = [f for f in files if not f.startswith(".") and not any(ex in f for ex in exclude_files)]
# 筛选所有符合后缀的文件------------------------------------------------------------------------
for file in files:
# 正向过滤含有(\.py|vue|js|ts)$ 结尾的文件
if any(file.endswith(ext) for ext in include_exts):
code_file_path.append(os.path.join(root, file))
return code_file_path
def readcode_writemd(code_file_path, project_path, markdown_file_path):
"""读取代码文件,写入markdown文件
Args:
code_file_path (_type_): 代码文件的路径
project_path (_type_): 项目的根路径
markdown_file_path (_type_): 输出的markdown文件的路径
"""
suffix = re.findall(r'\.[^.\\/:*?"<>|\r\n]+$', code_file_path)
if len(suffix):
suffix = suffix[0][1:]
with open(code_file_path, "r", encoding="utf-8") as f: # 打开文件
try:
rest_line = f.read()
except Exception as e:
print(f"{code_file_path}{e}文件编码读取错误,非utf-8")
rest_line = ""
write2md(rest_line, suffix, code_file_path, project_path, markdown_file_path)
def get_md_title_path(code_file_path, project_path):
"""获取每个代码文件的md标题,去掉项目之前的文件路径
Args:
code_file_path (_type_): 代码路径
project_path (_type_): 项目根路径
Returns:
_type_: 每个代码文件的md标题
"""
# Get the common prefix of the two paths
common_prefix = os.path.commonprefix([code_file_path, project_path])
# Get the different parts of the two paths
diff1 = code_file_path[len(common_prefix) + 1 :]
md_title = os.path.join(os.path.basename(project_path), diff1)
return md_title
def get_code_md_lable_by_suffix(suffix):
if md_suffix_table.get(suffix) is not None:
return md_suffix_table.get(suffix)
return suffix
def write2md(content, suffix, code_file_path, project_path, markdown_file_path):
with open(markdown_file_path, "a", encoding="utf-8") as f: # 打开文件
md_title = get_md_title_path(code_file_path, project_path)
f.write("\n")
f.write(f"# `{md_title}`\n\n")
f.write(f"```{get_code_md_lable_by_suffix(suffix)}\n")
f.write(content)
f.write("\n")
f.write("```\n\n\n")
def get_root_path(path):
dir_path = path
# 判断当前文件是否是文件
if os.path.isfile(path):
dir_path = os.path.dirname(path)
return dir_path
def get_file_name():
# Get the current time
now = datetime.now()
# Format the time as a string
time_str = now.strftime("%Y-%m-%d_%H-%M-%S")
# Create the file name
file_name = f"Z_{time_str}_NOTE.md"
return file_name
if __name__ == "__main__":
print("请拖入项目根目录下随便一个文件:\n")
root_path = get_root_path(sys.argv[1])
md_file_name = get_file_name()
md_file_path = os.path.join(root_path, md_file_name)
file_path_list = get_deep_dirs_fast(root_path)
for i, file_path in enumerate(file_path_list):
print(i, "->", get_md_title_path(file_path, root_path))
readcode_writemd(file_path, root_path, md_file_path)
print("=============done=============")
# os.system('open '+root_path)