import json
from pptx import Presentation
from pptx.enum.shapes import MSO_SHAPE_TYPE
def ppt_catch_format_text(filename):
"""
抓取PPT的内容,按段落返回
其中 filename 是PPT文件的路径
"""
prs = Presentation(filename)
txt_oa = {}
for x in range(len(prs.slides)):
txt_oa[x] = []
# ---Only on text-boxes outside group elements---
for shape in prs.slides[x].shapes:
if hasattr(shape, "text"):
row_text = shape.text.encode('utf-8').strip().decode()
txt_oa[x].append(row_text)
# ---Only operate on group shapes---
group_shapes = [shp for shp in prs.slides[x].shapes
if shp.shape_type ==MSO_SHAPE_TYPE.GROUP]
for group_shape in group_shapes:
for shape in group_shape.shapes:
if shape.has_text_frame:
row_text = shape.text.encode('utf-8').strip().decode()
txt_oa[x].append(row_text)
return txt_oa
text_list = ppt_catch_format_text('report.pptx')
text_list = json.dumps(text_list, ensure_ascii=False, indent=4).replace("\\n","")
print(text_list)
'''
Presentation priːzenˈteɪʃn 演示
slides slaɪdz 幻灯片
shape ʃeɪp 形状
'''
python读取ppt内容
努力加载评论中...
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】凌霞软件回馈社区,博客园 & 1Panel & Halo 联合会员上线
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】博客园社区专享云产品让利特惠,阿里云新客6.5折上折
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步