web页面展示PDF文件
简单展示PDF
1. 下载pdf.js插件
<script src='https://cdnjs.cloudflare.com/ajax/libs/pdf.js/3.6.172/pdf.min.js'></script>
2. 解析PDF文件渲染为canvas
const pdfjsLib = window.pdfjsLib pdfjsLib.getDocument('path/demo1.pdf').promise .then(function(pdf) { pdf.getData().then(e => { }) pdf.getPage(pageNum).then(function(page) { page.getTextContent().then(function(textContent) {
let canvas = document.getElementById('canvas');
let context = canvas.getContext('2d');
let viewport = this.page.getViewport({scale: 2.5});
canvas.height = viewport.height;
canvas.width = viewport.width;
let renderContext = {
canvasContext: context,
viewport: viewport,
// background: '#f0f098',
pageColors: {
background: '#fff',
foreground: '#fff'
}
};
page.render(renderContext)
}) }); }).catch(function(error) { });
3. 下载PDF为word
<script src="https://unpkg.com/docx@7.1.0/build/index.js"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/FileSaver.js/1.3.8/FileSaver.js"></script>
page.render(renderContext).promise.then(() => {
// 获取canvas画布上的图片数据
const imageData = canvas.toDataURL('image/png');
// 将数据转化为Blob格式
const pdfBlob = dataURItoBlob(imageData);
const doc = new window.docx.Document({
sections: [{
children: [
new window.docx.Paragraph({
children: [
new window.docx.ImageRun({
data: imageData,
transformation: {
width: 600,
height: 1000,
}
})
],
}),
],
}]
})
// 保存文档
window.docx.Packer.toBlob(doc).then(blob => {
saveAs(blob, 'my-word.docx')
})
})
function dataURItoBlob(dataURI) {
const byteString = atob(dataURI.split(',')[1]);
const mimeString = dataURI.split(',')[0].split(':')[1].split(';')[0];
const ab = new ArrayBuffer(byteString.length);
const ia = new Uint8Array(ab);
for (let i = 0; i < byteString.length; i++) {
ia[i] = byteString.charCodeAt(i);
}
}
以上主要是把pdf解析为canvas,如果想解析dom标签,目前只能拿到位置信息把文本排版展示好,范样式信息,线段,表格等信息无法获取
如何想要拿到样式信息,需要深入阅读pdf.js源码了