使用python对markdown标题进行排序
效果
排序前:
排序后:
方法一:使用regex直接对文件进行处理,无需依赖库 (推荐)
原理
找出所有的二级标题即可
#%%
filename = r"mysql.md"
text = open(filename,encoding='utf8').read()
text_lines = open(filename,encoding='utf8').readlines()
#%%
import re
regex = r"^##\s+(.*?\n)"
matches = re.finditer(regex, text, re.MULTILINE)
header_lines = [(x.group(0),x.group(1)) for x in matches]
#%%
li = []
for line in text_lines:
if header_lines and line == header_lines[0][0]:
li.append([header_lines[0][1],header_lines[0][0]])
del header_lines[0]
else:
li[-1][1] = f"{li[-1][1]}{line}"
li.sort(key=lambda x:-int(x[0][0]))
#%%
with open(r"result.md","w",encoding="utf8") as f:
f.write("".join([x[1] for x in li]))
#%%
方法二:使用markdown解析库进行处理
原理
使用 mistune
实现 md -> html
使用 bs4
进行修改html(变相操作md)
使用 html2text
实现 html -> md
代码:
#%%
filename = r"XXXXXXXXXXXXXXXXXXXXXX\mysql.md"
text = open(filename,encoding='utf8').read()
#%%
import mistune
markdown = mistune.create_markdown(renderer='html')
html = markdown(text)
#%%
from bs4 import BeautifulSoup
soup = BeautifulSoup(html, "lxml")
first_h2_tag = soup.find("h2")
# 查找下一个同级别标签
next_tag = first_h2_tag.find_next_sibling()
# 查找所有在两个h2标签之间的标签
li = [[f"{first_h2_tag.text}",f"{first_h2_tag}{next_tag}"]]
while next_tag:
if next_tag.name != 'h2':
li[-1][1] = f"{li[-1][1]}{next_tag}"
else:
li.append([f"{next_tag.text}",f"{next_tag}"])
next_tag = next_tag.find_next_sibling()
li.sort(key=lambda x: -int(x[0][0]))
result = "".join([x[1] for x in li])
import html2text
# 将HTML转换为Markdown
markdown = html2text.html2text(result)
with open("result.md", "w",encoding="utf8") as f:
f.write(markdown)