pdfjs 使用
如何使用pdfjs在网页中渲染pdf文件
渲染单页pdf的代码样例
读取文件,然后渲染其中的一页的代码样例
// 假设pdfBlob 已经是pdf 文件了
import {
PDFDocumentProxy,
getDocument,
PDFLoadingTask,
PDFPageProxy,
} from 'pdfjs-dist';
import * as PDFJSViewer from 'pdfjs-dist/web/pdf_viewer'
import * as pdfjsLib from 'pdfjs-dist/build/pdf';
// 获取PDFDocumentProxy 对象
function async getPDFDocument(pdfBlob:Blob){
return pdfBlob.arrayBuffer()
.then(file=>getDocument({data:new Unit8Array(data)}))
.then((pdfProxy:PDFDocumentProxy)=>pdfProxy);
}
// 渲染某个PDF 页面,其中的canvas对象是对应html 中的canvas,
// 需要外面提供
function async renderSinglePage(pdfProxy:PDFDocumentProxy,pageNumber:number,canvas:HTMLCanvasElement){
// 获取PDFPageProxy对象
let pdfPage:PDFPageProxy = await pdfProxy.getPage(pageNumber);
// 获取canvas上下文
let canvasContext = canvas.getContext('2d',{alpha:false});
// 获取pdfPage的viewport 并将大小应用于canvas元素
let viewport = pdfPage.getViewport({scale:1});
let {width,height}=viewport;
Object.assign(canvas,{width,height});
canvas.style.width=`${width}px`;
canvas.style.height=`${height}px`;
// 构建pdfPage渲染上下文,调用PDFPageProxy的渲染方法,完成绘制
let renderContext ={canvasContext,viewport};
await pdfPage.render(renderContext);
}
function async renderSingleFile(file:Blob,pageNumber:number,canvas:HTMLCanvasElement){
let pdf:PDFDocumentProxy = await getPDFDocument(file);
await renderSignlePage(pdf,pageNumber,canvas);
}
概念解析
- PDFDocumentProxy这个是PDF对象的抽象,通过getDocument(pdfFile)获取,我们也可以通过它来获取页面对象的抽象
- PDFPageProxy这个是PDF页面对象的抽象,通过
pdfProxy.getPage(pageNumber)
获取,这个对象有个render()方法,用来向canvas渲染pdf。
渲染出文字层以方便选择
代码示例,以上面的代码为模板,文字层需要在canvas 之上有一个同宽同高的容器,pdfjs会绘制出文字层的html. 这个里面会有个样式文件。其实文字是透明色,只是用来辅助选择的。
//假设textlayout顶层的div 是.textLayout类
.textLayout{
position: absolute;
top:0;
display: inline-block;
opacity: 0.2;
> span{
position: absolute;
color:transparent;
}
::selection{
background:rgb(0,0,255)
}
}
import {
PDFDocumentProxy,
getDocument,
PDFLoadingTask,
PDFPageProxy,
} from 'pdfjs-dist';
import * as PDFJSViewer from 'pdfjs-dist/web/pdf_viewer'
import * as pdfjsLib from 'pdfjs-dist/build/pdf';
const textLayoutFactory = new PDFJSViewer.DefaultTextLayerFactory();
const eventBus = new PDFJSViewer.EventBus();
function async renderSinglePage(pdfProxy:PDFDocumentProxy,pageNumber:number,canvas:HTMLCanvasElement,textLayoutContainer:HTMLDivElement){
// 获取PDFPageProxy对象
let pdfPage:PDFPageProxy = await pdfProxy.getPage(pageNumber);
// 获取canvas上下文
let canvasContext = canvas.getContext('2d',{alpha:false});
// 获取pdfPage的viewport 并将大小应用于canvas元素
let viewport = pdfPage.getViewport({scale:1});
let {width,height}=viewport;
Object.assign(canvas,{width,height});
canvas.style.width=`${width}px`;
canvas.style.height=`${height}px`;
textLayoutContainer.style.width=`${width}px`;
textLayoutContainer.style.height=`${height}px`;
// 构建textLayout构建器
const textLayout = textLayoutFactory.createTextLayerBuilder(
textLayoutContainer,
0,
viewport,
false,
eventBus,
null
);
// 构建pdfPage渲染上下文,调用PDFPageProxy的渲染方法,完成绘制
let renderContext ={canvasContext,viewport};
await pdfPage.render(renderContext);
//要在渲染完成后操作这个是读取文字流
const readableStream = (page as any).streamTextContent({
includeMarkedContent: true,
});
textLayout.setTextContentStream(readableStream);
// 文字流输出到文字层容器中
textLayout.render();
}
缩略图的展示 代码示例如下
//渲染出缩略图,缩略图的宽为98px
// 思路是:首先按照比例将pdfpage渲染到canvas,其中scale = 98/ viewport.width
// 然后将canvas 保存到图片,这个里面使用了一个技巧,首先输出的是一个98*2宽的canvas
//然后将这个canvas缩小到98的canvas.这样的好处是缩略图的像素不会太低。
async _calculateThumberNail(drawCanvas: HTMLCanvasElement, imgCanvas: HTMLCanvasElement, page: PDFPageProxy) {
const viewport = page.getViewport({ scale: 1 });
const pageRatio = viewport.width / viewport.height;
const scale = THUMBNAIL_WIDTH / viewport.width;
const drawViewPort = viewport.clone({ scale: DRAW_UPSCALE_FACTOR * scale } as any);
const { ctx: canvasContext, transform } = this._getDrawContextAndModifyCanvas(drawCanvas, pageRatio, DRAW_UPSCALE_FACTOR);
const { ctx: imgCtx } = this._getDrawContextAndModifyCanvas(imgCanvas, pageRatio);
const renderContext = {
canvasContext,
transform,
viewport: drawViewPort,
}
let { width, height } = imgCanvas;
return await page.render(renderContext).promise
.then(() => {
imgCtx.drawImage(drawCanvas, 0, 0, drawCanvas.width, drawCanvas.height, 0, 0, imgCanvas.width, imgCanvas.height);
return { width, height, imgData: imgCanvas.toDataURL(), ratio: scale };
},
() => ({ width, height, imgData: '', ratio: scale }))
}
_getDrawContextAndModifyCanvas(canvas: HTMLCanvasElement, pageRatio: number, upscaleFactor = 1) {
const ctx = canvas.getContext("2d", { alpha: false });
// clear previous images
ctx.clearRect(0, 0, canvas.width, canvas.height);
const pixelRatio = window.devicePixelRatio || 1;
const canvasWidth = THUMBNAIL_WIDTH;
const canvasHeight = (canvasWidth / pageRatio) | 0;
canvas.width = (upscaleFactor * canvasWidth * pixelRatio) | 0;
canvas.height = (upscaleFactor * canvasHeight * pixelRatio) | 0;
const transform = pixelRatio != 1
? [pixelRatio, 0, 0, pixelRatio, 0, 0]
: null;
return { ctx, transform };
}