使用iText 7读取PDF文件中的文本和图片
读取文本
using (PdfReader reader = new PdfReader(fileName)) { using (PdfDocument pdfDocument = new PdfDocument(reader)) { for (int i = 1; i <= pdfDocument.GetNumberOfPages(); i++) { string pdfContentString = PdfTextExtractor.GetTextFromPage(pdfDocument.GetPage(i)); MessageBox.Show(pdfContentString); } } }
读取图片
using (PdfReader reader = new PdfReader(fileName)) { using (PdfDocument pdfDocument = new PdfDocument(reader)) { IEventListener strategy = new ImageRenderListener(imageFileName); PdfCanvasProcessor parser = new PdfCanvasProcessor(strategy); for (var i = 1; i <= pdfDocument.GetNumberOfPages(); i++) { parser.ProcessPageContent(pdfDocument.GetPage(i)); } } } public class ImageRenderListener : IEventListener { string format; int index = 0; public ImageRenderListener(string format) { this.format = format; } public void EventOccurred(IEventData data, EventType type) { if (data is ImageRenderInfo imageData) { try { PdfImageXObject imageObject = imageData.GetImage(); if (imageObject != null) { File.WriteAllBytes(string.Format(format, index++, imageObject.IdentifyImageFileExtension()), imageObject.GetImageBytes()); } } catch { } } } public ICollection<EventType> GetSupportedEvents() { return null; } }