cnblogs blogs backup & node.js crawler
cnblogs blogs backup & node.js crawler
const fs = require("fs");
var path = require("path");
const { exit } = require("process");
const log = console.log;
// const request = require("request");
// 解析 pdf 错误
const request = require("request-promise-native");
var folder = path.resolve(__dirname, '../pdf');
// log('folder', folder);
if (!fs.existsSync(folder)) {
fs.mkdirSync(folder);
}
async function downloadPDF(url, filename) {
log('🚧 pdf downloading ...');
const pdfBuffer = await request.get({
uri: url,
encoding: null,
// encoding: 'utf-8',
});
fs.writeFileSync(filename, pdfBuffer);
log('✅ pdf finished!');
// exit 0;
}
const url = 'https://cs193p.sites.stanford.edu/sites/g/files/sbiybj16636/files/media/file/l1.pdf';
const filename = folder + '/cs193p-2021-l1.pdf';
// log('filename =', filename);
downloadPDF(url, filename);
Node.js pdf crawler
"use strict";
/**
*
* @author xgqfrms
* @license MIT
* @copyright xgqfrms
* @created 2022-04-01
* @modified
*
* @description Node.js pdf crawler
* @augments
* @example
* @link
*
*/
// 0. commonjs module using `require` keyword
const fs = require("fs");
const path = require("path");
const { exit } = require("process");
// const process= require("process");
// process.exit(0);
// const request = require("request");
// request 解析 pdf 错误
// 1. just using `request` instead of `request-promise-native`, which is too slow!
const request = require("request-promise-native");
const log = console.log;
// 2. custom download folder
const folder = path.resolve(__dirname, '../pdf');
// log('folder', folder);
// 3. check if the folder exists, if not create it
if (!fs.existsSync(folder)) {
fs.mkdirSync(folder);
}
async function downloadPDF(url, filename) {
log('🚧 pdf downloading ...');
const pdfBuffer = await request.get({
uri: url,
// encoding: null,
encoding: 'utf-8',
});
// 4. write file to local file system
fs.writeFileSync(filename, pdfBuffer);
log('✅ pdf download finished!');
// 5. exit the terminal after download finished
exit(0);
}
const url = 'https://cs193p.sites.stanford.edu/sites/g/files/sbiybj16636/files/media/file/l1.pdf';
const filename = folder + '/cs193p-2021-l1.pdf';
// log('filename =', filename);
downloadPDF(url, filename);
TypeScript & Node.js crawler All In One
https://www.cnblogs.com/xgqfrms/p/16086580.html
process.exit(0);
//CJS
const process = require('process');
process.on('SIGHUP', () => {
console.log('Got SIGHUP signal.');
});
setTimeout(() => {
console.log('Exiting.');
process.exit(0);
}, 100);
process.kill(process.pid, 'SIGHUP');
// ESM
import process, { kill } from 'process';
process.on('SIGHUP', () => {
console.log('Got SIGHUP signal.');
});
setTimeout(() => {
console.log('Exiting.');
process.exit(0);
}, 100);
kill(process.pid, 'SIGHUP');
https://nodejs.org/api/process.html#processkillpid-signal
https://nodejs.org/api/process.html#event-exit
https://nodejs.org/api/process.html#exit-codes
refs
https://stackoverflow.com/questions/25945714/how-to-download-pdf-file-from-url-in-node-js
©xgqfrms 2012-2020
www.cnblogs.com 发布文章使用:只允许注册用户才可以访问!
原创文章,版权所有©️xgqfrms, 禁止转载 🈲️,侵权必究⚠️!
本文首发于博客园,作者:xgqfrms,原文链接:https://www.cnblogs.com/xgqfrms/p/14249005.html
未经授权禁止转载,违者必究!