C# Net 使用 openxml 提取ppt中的音频、视频、图片、文本

C# Net 使用 openxml 提取ppt中的音频、视频、图片、文本

 

名称空间:

using System;
using DocumentFormat.OpenXml.Packaging;
using System.IO;
using System.Linq;
using DocumentFormat.OpenXml;
using DocumentFormat.OpenXml.Presentation;
using A = DocumentFormat.OpenXml.Drawing;
using P14 = DocumentFormat.OpenXml.Office2010.PowerPoint;

  

代码如下:

  

        public void PptInfo(string path)
        {
            using (var doc = PresentationDocument.Open(path, false))
            {
                var presentation = doc.PresentationPart.Presentation;

                foreach (SlideId slideId in presentation.SlideIdList)
                {
                    SlidePart slidePart = doc.PresentationPart.GetPartById(slideId.RelationshipId) as SlidePart;
                    if (slidePart == null || slidePart.Slide == null)
                        continue;

                    //ppt中显示的真实编号
                    var SlideNumber = presentation.FirstSlideNum?.Value ?? 1 + presentation.SlideIdList.ToList().IndexOf(slideId);

                    Slide slide = slidePart.Slide;

                    //音频
                    var audioList = slide.Descendants<Audio>();
                    //视频
                    var videoList = slide.Descendants<Video>();
                    //图片
                    var picList = slide.CommonSlideData.ShapeTree.Descendants<Picture>().Where(o => !o.NonVisualPictureProperties.ApplicationNonVisualDrawingProperties.Any());
                    //文本框
                    var txBodyList = slide.CommonSlideData.ShapeTree.Descendants<TextBody>();

                    //提取音视频(将 audioList 换成 videoList )就行了
                    foreach (var media in videoList)
                    {
                        //音频关联的形状
                        var spTgt = media.CommonMediaNode.TargetElement.ShapeTarget;
                        //形状属性
                        var cNvPr = slide.Descendants<NonVisualDrawingProperties>().FirstOrDefault(o => o.Id == spTgt.ShapeId);

                        //形状信息
                        var ShapeId = cNvPr.Id.Value;
                        var ShapeName = cNvPr.Name.Value;
                        var ShapeDescr = cNvPr.Description?.Value;

                        //上级和上上级
                        var nvPicPr = (NonVisualPictureProperties)cNvPr.Parent;
                        var pic = (Picture)nvPicPr.Parent;

                        ////音频文件关联
                        //var audioFile = nvPicPr.ApplicationNonVisualDrawingProperties.Elements<A.AudioFromFile>().FirstOrDefault();
                        //视频文件关联
                        var videoFile = nvPicPr.ApplicationNonVisualDrawingProperties.Elements<A.VideoFromFile>().FirstOrDefault();

                        //获取音视频文件 外部/内部
                        var externalRelationship = slidePart.ExternalRelationships.FirstOrDefault(o => o.Id == videoFile.Link);//外部关系
                        var uri = externalRelationship?.Uri;
                        if (uri == null || uri.OriginalString.ToUpper() == "NULL")
                        {
                            var media14 = nvPicPr.ApplicationNonVisualDrawingProperties.Descendants<P14.Media>().FirstOrDefault();

                            //媒体裁剪信息
                            var mediaStart = media14.MediaTrim?.Start?.Value;
                            var mediaEnd = media14.MediaTrim?.End?.Value;

                            //获取媒体
                            var dataPartReferenceRelationship = slidePart.DataPartReferenceRelationships.FirstOrDefault(o => o.Id == media14.Embed);//内部关系
                            var mediaStream = dataPartReferenceRelationship.DataPart.GetStream();
                            uri = dataPartReferenceRelationship?.Uri;
                        }

                        //媒体文件关联的图片(视频默认为第一帧,音频默认喇叭的图像)
                        var embed = pic.BlipFill.Blip.Embed.Value;
                        var part = slidePart.GetPartById(embed);
                        var imgStream = part.GetStream();
                        var imgUri = part.Uri;
                    }

                    //提取图片
                    foreach (var pic in picList)
                    {
                        var cNvPr = pic.NonVisualPictureProperties.NonVisualDrawingProperties;

                        //形状信息
                        var ShapeId = cNvPr.Id.Value;
                        var ShapeName = cNvPr.Name.Value;
                        var ShapeDescr = cNvPr.Description?.Value;

                        //获取图片
                        var embed = pic.BlipFill.Blip.Embed.Value;
                        var part = slidePart.GetPartById(embed);
                        var imgStream = part.GetStream();
                        var imgUri = part.Uri;
                    }

                    //提取文本
                    foreach (var txBody in txBodyList)
                    {
                        //上级
                        var sp = (Shape)txBody.Parent;
                        //形状属性
                        var cNvPr = sp.NonVisualShapeProperties.NonVisualDrawingProperties;

                        //形状信息
                        var ShapeId = cNvPr.Id.Value;
                        var ShapeName = cNvPr.Name.Value;

                        //获取文本信息
                        //方式1
                        var text = txBody.InnerText;
                        //方式2
                        var texts = txBody.Descendants<A.Text>();
                        text = string.Join(null, texts.Select(o => o.Text));
                        //获取文本信息(含段落)
                        var ps = txBody.Descendants<A.Paragraph>();
                        text = string.Join(Environment.NewLine, ps.Select(o => o.InnerText));
                    }

                }
            }
        }

  

  

 

ppt文档的形状结构大概为:

 

 

 

 

完毕

 

posted @ 2020-08-13 17:58  爱恋的红尘  阅读(1276)  评论(1编辑  收藏  举报