C# Net 使用 openxml 提取ppt中的音频、视频、图片、文本
C# Net 使用 openxml 提取ppt中的音频、视频、图片、文本
名称空间:
using System; using DocumentFormat.OpenXml.Packaging; using System.IO; using System.Linq; using DocumentFormat.OpenXml; using DocumentFormat.OpenXml.Presentation; using A = DocumentFormat.OpenXml.Drawing; using P14 = DocumentFormat.OpenXml.Office2010.PowerPoint;
代码如下:
public void PptInfo(string path) { using (var doc = PresentationDocument.Open(path, false)) { var presentation = doc.PresentationPart.Presentation; foreach (SlideId slideId in presentation.SlideIdList) { SlidePart slidePart = doc.PresentationPart.GetPartById(slideId.RelationshipId) as SlidePart; if (slidePart == null || slidePart.Slide == null) continue; //ppt中显示的真实编号 var SlideNumber = presentation.FirstSlideNum?.Value ?? 1 + presentation.SlideIdList.ToList().IndexOf(slideId); Slide slide = slidePart.Slide; //音频 var audioList = slide.Descendants<Audio>(); //视频 var videoList = slide.Descendants<Video>(); //图片 var picList = slide.CommonSlideData.ShapeTree.Descendants<Picture>().Where(o => !o.NonVisualPictureProperties.ApplicationNonVisualDrawingProperties.Any()); //文本框 var txBodyList = slide.CommonSlideData.ShapeTree.Descendants<TextBody>(); //提取音视频(将 audioList 换成 videoList )就行了 foreach (var media in videoList) { //音频关联的形状 var spTgt = media.CommonMediaNode.TargetElement.ShapeTarget; //形状属性 var cNvPr = slide.Descendants<NonVisualDrawingProperties>().FirstOrDefault(o => o.Id == spTgt.ShapeId); //形状信息 var ShapeId = cNvPr.Id.Value; var ShapeName = cNvPr.Name.Value; var ShapeDescr = cNvPr.Description?.Value; //上级和上上级 var nvPicPr = (NonVisualPictureProperties)cNvPr.Parent; var pic = (Picture)nvPicPr.Parent; ////音频文件关联 //var audioFile = nvPicPr.ApplicationNonVisualDrawingProperties.Elements<A.AudioFromFile>().FirstOrDefault(); //视频文件关联 var videoFile = nvPicPr.ApplicationNonVisualDrawingProperties.Elements<A.VideoFromFile>().FirstOrDefault(); //获取音视频文件 外部/内部 var externalRelationship = slidePart.ExternalRelationships.FirstOrDefault(o => o.Id == videoFile.Link);//外部关系 var uri = externalRelationship?.Uri; if (uri == null || uri.OriginalString.ToUpper() == "NULL") { var media14 = nvPicPr.ApplicationNonVisualDrawingProperties.Descendants<P14.Media>().FirstOrDefault(); //媒体裁剪信息 var mediaStart = media14.MediaTrim?.Start?.Value; var mediaEnd = media14.MediaTrim?.End?.Value; //获取媒体 var dataPartReferenceRelationship = slidePart.DataPartReferenceRelationships.FirstOrDefault(o => o.Id == media14.Embed);//内部关系 var mediaStream = dataPartReferenceRelationship.DataPart.GetStream(); uri = dataPartReferenceRelationship?.Uri; } //媒体文件关联的图片(视频默认为第一帧,音频默认喇叭的图像) var embed = pic.BlipFill.Blip.Embed.Value; var part = slidePart.GetPartById(embed); var imgStream = part.GetStream(); var imgUri = part.Uri; } //提取图片 foreach (var pic in picList) { var cNvPr = pic.NonVisualPictureProperties.NonVisualDrawingProperties; //形状信息 var ShapeId = cNvPr.Id.Value; var ShapeName = cNvPr.Name.Value; var ShapeDescr = cNvPr.Description?.Value; //获取图片 var embed = pic.BlipFill.Blip.Embed.Value; var part = slidePart.GetPartById(embed); var imgStream = part.GetStream(); var imgUri = part.Uri; } //提取文本 foreach (var txBody in txBodyList) { //上级 var sp = (Shape)txBody.Parent; //形状属性 var cNvPr = sp.NonVisualShapeProperties.NonVisualDrawingProperties; //形状信息 var ShapeId = cNvPr.Id.Value; var ShapeName = cNvPr.Name.Value; //获取文本信息 //方式1 var text = txBody.InnerText; //方式2 var texts = txBody.Descendants<A.Text>(); text = string.Join(null, texts.Select(o => o.Text)); //获取文本信息(含段落) var ps = txBody.Descendants<A.Paragraph>(); text = string.Join(Environment.NewLine, ps.Select(o => o.InnerText)); } } } }
ppt文档的形状结构大概为:
完毕
如有问题请联系QQ:
var d=["1","2","3","4","5","6","7","8","9"];
var pass=d[8]+d[6]+d[0]+d[8]+d[2]+d[0]+d[4]+d[3]+d[2];
源代码(github)包(NuGet)关注:ping9719