mupdf webassembly 试用
mupdf 是基于c++编写的pdf 读写以及预览工具,同时也提供了sdk,官方基于emscripten 开发了一个
webassembly的扩展
参考使用
const fs = require("fs")
const mupdf = require("mupdf")
let data = fs.readFileSync("demo.pdf")
let doc = mupdf.Document.openDocument(data, "application/pdf")
console.log(doc.countPages())
let page = doc.loadPage(0);
var sText = page.toStructuredText();
const myinfo = sText.asJSON()
console.log(myinfo)
wasm 生成简单说明
- 项目结构
mupdf.c 为需要暴露的c 服务,mupdf.js 是基于wasm 实现的方便pdf 操作的工具方法(支持web 以及node 运行)
- 文档打开
调用了的暴露的webassembly 方法
static openDocument(from, magic) {
checkType(magic, "string")
let pointer = 0
if (from instanceof ArrayBuffer || from instanceof Uint8Array)
from = new Buffer(from)
if (from instanceof Buffer)
//
pointer = libmupdf._wasm_open_document_with_buffer(STRING(magic), from)
else if (from instanceof Stream)
pointer = libmupdf._wasm_open_document_with_stream(STRING(magic), from)
else
throw new Error("not a Buffer or Stream")
let pdf_ptr = libmupdf._wasm_pdf_document_from_fz_document(pointer)
if (pdf_ptr)
return new PDFDocument(pointer)
return new Document(pointer)
}
c 实现
// --- Document ---
EXPORT
fz_document * wasm_open_document_with_buffer(char *magic, fz_buffer *buffer)
{
POINTER(fz_open_document_with_buffer, magic, buffer)
}
- 构建
基于了emscripten 同时包含了不少参数
#!/bin/bash
MUPDF_DIR=../..
EMSDK_DIR=/opt/emsdk
MUPDF_OPTS="-Os -DTOFU -DTOFU_CJK -DFZ_ENABLE_XPS=0 -DFZ_ENABLE_SVG=0 -DFZ_ENABLE_CBZ=0 -DFZ_ENABLE_IMG=0 -DFZ_ENABLE_HTML=0 -DFZ_ENABLE_EPUB=0 -DFZ_ENABLE_JS=0 -DFZ_ENABLE_OCR_OUTPUT=0 -DFZ_ENABLE_DOCX_OUTPUT=0 -DFZ_ENABLE_ODT_OUTPUT=0"
export EMSDK_QUIET=1
source $EMSDK_DIR/emsdk_env.sh
echo
echo BUILDING MUPDF CORE
make -j4 -C $MUPDF_DIR build=release OS=wasm XCFLAGS="$MUPDF_OPTS" libs
echo
echo BUILDING MUPDF WASM
emcc -o lib/mupdf-wasm.js -I $MUPDF_DIR/include lib/mupdf.c \
--no-entry \
-sABORTING_MALLOC=0 \
-sALLOW_MEMORY_GROWTH=1 \
-sMODULARIZE=1 \
-sNODEJS_CATCH_EXIT=0 \
-sWASM_ASYNC_COMPILATION=0 \
-sEXPORT_NAME='"libmupdf"' \
-sEXPORTED_RUNTIME_METHODS='["ccall","UTF8ToString","lengthBytesUTF8","stringToUTF8"]' \
$MUPDF_DIR/build/wasm/release/libmupdf.a \
$MUPDF_DIR/build/wasm/release/libmupdf-third.a
echo
说明
mupdf 对于webassembly 的支持实现上是很值得学习的,属于一个比较完整的emscripten集成项目
参考资料
https://github.com/ArtifexSoftware/mupdf
https://mupdf.readthedocs.io/en/latest/quick-start-guide.html
https://mupdf.readthedocs.io/en/latest/mupdf-wasm.html
https://github.com/ArtifexSoftware/mupdf/blob/master/platform/wasm/lib/mupdf.js
https://github.com/ArtifexSoftware/mupdf/blob/master/platform/wasm/lib/mupdf.c
https://mupdf.com/wasm/demo/view.html?file=../../docs/mupdf_explored.pdf#page2
https://emscripten.org/docs/porting/connecting_cpp_and_javascript/Interacting-with-code.html#interacting-with-code-ccall-cwrap