pdfjs 使用

如何使用pdfjs在网页中渲染pdf文件

渲染单页pdf的代码样例

读取文件,然后渲染其中的一页的代码样例

// 假设pdfBlob 已经是pdf 文件了
import {
  PDFDocumentProxy,
  getDocument,
  PDFLoadingTask,
  PDFPageProxy,
} from 'pdfjs-dist';

import * as PDFJSViewer from 'pdfjs-dist/web/pdf_viewer'
import * as pdfjsLib from 'pdfjs-dist/build/pdf';

// 获取PDFDocumentProxy 对象
function async getPDFDocument(pdfBlob:Blob){
  return pdfBlob.arrayBuffer()
  .then(file=>getDocument({data:new Unit8Array(data)}))
  .then((pdfProxy:PDFDocumentProxy)=>pdfProxy);
}

// 渲染某个PDF 页面,其中的canvas对象是对应html 中的canvas,
// 需要外面提供
function async renderSinglePage(pdfProxy:PDFDocumentProxy,pageNumber:number,canvas:HTMLCanvasElement){
  // 获取PDFPageProxy对象
  let pdfPage:PDFPageProxy = await pdfProxy.getPage(pageNumber);
  // 获取canvas上下文
  let canvasContext = canvas.getContext('2d',{alpha:false});
  // 获取pdfPage的viewport 并将大小应用于canvas元素
  let viewport = pdfPage.getViewport({scale:1});
  let {width,height}=viewport;
  Object.assign(canvas,{width,height});
  canvas.style.width=`${width}px`;
  canvas.style.height=`${height}px`;
  
  // 构建pdfPage渲染上下文,调用PDFPageProxy的渲染方法,完成绘制
  let renderContext ={canvasContext,viewport};
  await pdfPage.render(renderContext);
}

function async renderSingleFile(file:Blob,pageNumber:number,canvas:HTMLCanvasElement){
  let pdf:PDFDocumentProxy = await getPDFDocument(file);
  await renderSignlePage(pdf,pageNumber,canvas);
}

概念解析

  • PDFDocumentProxy这个是PDF对象的抽象,通过getDocument(pdfFile)获取,我们也可以通过它来获取页面对象的抽象
  • PDFPageProxy这个是PDF页面对象的抽象,通过pdfProxy.getPage(pageNumber)获取,这个对象有个render()方法,用来向canvas渲染pdf。

渲染出文字层以方便选择

代码示例,以上面的代码为模板,文字层需要在canvas 之上有一个同宽同高的容器,pdfjs会绘制出文字层的html. 这个里面会有个样式文件。其实文字是透明色,只是用来辅助选择的。

//假设textlayout顶层的div 是.textLayout类
.textLayout{
    position: absolute;
    top:0;
    display: inline-block;
    opacity: 0.2;
    > span{
      position: absolute;
      color:transparent;
    }
     ::selection{
       background:rgb(0,0,255)
     }
  }
import {
  PDFDocumentProxy,
  getDocument,
  PDFLoadingTask,
  PDFPageProxy,
} from 'pdfjs-dist';

import * as PDFJSViewer from 'pdfjs-dist/web/pdf_viewer'
import * as pdfjsLib from 'pdfjs-dist/build/pdf';

const textLayoutFactory = new PDFJSViewer.DefaultTextLayerFactory();
const eventBus = new PDFJSViewer.EventBus();

function async renderSinglePage(pdfProxy:PDFDocumentProxy,pageNumber:number,canvas:HTMLCanvasElement,textLayoutContainer:HTMLDivElement){
  // 获取PDFPageProxy对象
  let pdfPage:PDFPageProxy = await pdfProxy.getPage(pageNumber);
  // 获取canvas上下文
  let canvasContext = canvas.getContext('2d',{alpha:false});
  // 获取pdfPage的viewport 并将大小应用于canvas元素
  let viewport = pdfPage.getViewport({scale:1});
  let {width,height}=viewport;
  Object.assign(canvas,{width,height});
  canvas.style.width=`${width}px`;
  canvas.style.height=`${height}px`;
  textLayoutContainer.style.width=`${width}px`;
  textLayoutContainer.style.height=`${height}px`;

  // 构建textLayout构建器
  const textLayout = textLayoutFactory.createTextLayerBuilder(
      textLayoutContainer,
      0,
      viewport,
      false,
      eventBus,
      null
    );
  
  // 构建pdfPage渲染上下文,调用PDFPageProxy的渲染方法,完成绘制
  let renderContext ={canvasContext,viewport};
  await pdfPage.render(renderContext);

  //要在渲染完成后操作这个是读取文字流
  const readableStream = (page as any).streamTextContent({
          includeMarkedContent: true,
        });
  textLayout.setTextContentStream(readableStream);
  // 文字流输出到文字层容器中
  textLayout.render();
}

缩略图的展示 代码示例如下

//渲染出缩略图,缩略图的宽为98px
// 思路是:首先按照比例将pdfpage渲染到canvas,其中scale = 98/ viewport.width
// 然后将canvas 保存到图片,这个里面使用了一个技巧,首先输出的是一个98*2宽的canvas
//然后将这个canvas缩小到98的canvas.这样的好处是缩略图的像素不会太低。
async _calculateThumberNail(drawCanvas: HTMLCanvasElement, imgCanvas: HTMLCanvasElement, page: PDFPageProxy) {
    const viewport = page.getViewport({ scale: 1 });
    const pageRatio = viewport.width / viewport.height;
    const scale = THUMBNAIL_WIDTH / viewport.width;
    const drawViewPort = viewport.clone({ scale: DRAW_UPSCALE_FACTOR * scale } as any);
    const { ctx: canvasContext, transform } = this._getDrawContextAndModifyCanvas(drawCanvas, pageRatio, DRAW_UPSCALE_FACTOR);
    const { ctx: imgCtx } = this._getDrawContextAndModifyCanvas(imgCanvas, pageRatio);
    const renderContext = {
      canvasContext,
      transform,
      viewport: drawViewPort,
    }
    let { width, height } = imgCanvas;
    return await page.render(renderContext).promise
      .then(() => {
        imgCtx.drawImage(drawCanvas, 0, 0, drawCanvas.width, drawCanvas.height, 0, 0, imgCanvas.width, imgCanvas.height);
        return { width, height, imgData: imgCanvas.toDataURL(), ratio: scale };
      },
        () => ({ width, height, imgData: '', ratio: scale }))
  }

  _getDrawContextAndModifyCanvas(canvas: HTMLCanvasElement, pageRatio: number, upscaleFactor = 1) {
    const ctx = canvas.getContext("2d", { alpha: false });
    // clear previous images
    ctx.clearRect(0, 0, canvas.width, canvas.height);
    const pixelRatio = window.devicePixelRatio || 1;
    const canvasWidth = THUMBNAIL_WIDTH;
    const canvasHeight = (canvasWidth / pageRatio) | 0;

    canvas.width = (upscaleFactor * canvasWidth * pixelRatio) | 0;
    canvas.height = (upscaleFactor * canvasHeight * pixelRatio) | 0;

    const transform = pixelRatio != 1
      ? [pixelRatio, 0, 0, pixelRatio, 0, 0]
      : null;

    return { ctx, transform };
  }
posted @ 2022-05-22 09:56  kongshu  阅读(2538)  评论(3编辑  收藏  举报