
import fitz
import pandas as pd
pdf_path = '控制之美.pdf'
document = fitz.open(pdf_path)
toc_list = []
def extract_toc(toc, level=0):
for item in toc:
if len(item) > 1 and item[1]:
title = item[1]
page = item[2] if len(item) > 2 and isinstance(item[2], int) else None
if page is not None:
toc_list.append({
'Title': title,
'Page': page,
'Level': level
})
if len(item) > 3 and item[3]:
extract_toc(item[3], level + 1)
toc = document.get_toc(simple=False)
extract_toc(toc)
document.close()
toc_df = pd.DataFrame(toc_list)
toc_df.to_csv('output.csv', index=False, encoding='utf-8')
· [翻译] 为什么 Tracebit 用 C# 开发
· 腾讯ima接入deepseek-r1,借用别人脑子用用成真了~
· Deepseek官网太卡,教你白嫖阿里云的Deepseek-R1满血版
· DeepSeek崛起:程序员“饭碗”被抢,还是职业进化新起点?
· RFID实践——.NET IoT程序读取高频RFID卡/标签