xgqfrms™, xgqfrms® : xgqfrms's offical website of cnblogs! xgqfrms™, xgqfrms® : xgqfrms's offical website of GitHub!

cnblogs blogs backup & node.js crawler

cnblogs blogs backup & node.js crawler

const fs = require("fs");
var path = require("path");
const { exit } = require("process");
const log = console.log;
// const request = require("request");
// 解析 pdf 错误
const request = require("request-promise-native");

var folder = path.resolve(__dirname, '../pdf');

// log('folder', folder);

if (!fs.existsSync(folder)) {
  fs.mkdirSync(folder);
}


async function downloadPDF(url, filename) {
  log('🚧 pdf downloading ...');
  const pdfBuffer = await request.get({
    uri: url,
    encoding: null,
    // encoding: 'utf-8',
  });
  fs.writeFileSync(filename, pdfBuffer);
  log('✅ pdf finished!');
  // exit 0;
}

const url = 'https://cs193p.sites.stanford.edu/sites/g/files/sbiybj16636/files/media/file/l1.pdf';
const filename = folder + '/cs193p-2021-l1.pdf';


// log('filename =', filename);

downloadPDF(url, filename);

Node.js pdf crawler


"use strict";

/**
 *
 * @author xgqfrms
 * @license MIT
 * @copyright xgqfrms
 * @created 2022-04-01
 * @modified
 *
 * @description  Node.js pdf crawler
 * @augments
 * @example
 * @link
 *
 */

// 0. commonjs module using `require` keyword
const fs = require("fs");
const path = require("path");
const { exit } = require("process");
// const process= require("process");
// process.exit(0);

// const request = require("request");
// request 解析 pdf 错误 
// 1. just using `request` instead of `request-promise-native`, which is too slow! 
const request = require("request-promise-native");

const log = console.log;

// 2. custom download folder
const folder = path.resolve(__dirname, '../pdf');
// log('folder', folder);

// 3. check if the folder exists, if not create it
if (!fs.existsSync(folder)) {
  fs.mkdirSync(folder);
}

async function downloadPDF(url, filename) {
  log('🚧 pdf downloading ...');
  const pdfBuffer = await request.get({
    uri: url,
    // encoding: null,
    encoding: 'utf-8',
  });
  // 4. write file to local file system
  fs.writeFileSync(filename, pdfBuffer);
  log('✅ pdf download finished!');
  // 5. exit the terminal after download finished
  exit(0);
}

const url = 'https://cs193p.sites.stanford.edu/sites/g/files/sbiybj16636/files/media/file/l1.pdf';
const filename = folder + '/cs193p-2021-l1.pdf';
// log('filename =', filename);

downloadPDF(url, filename);


TypeScript & Node.js crawler All In One

https://www.cnblogs.com/xgqfrms/p/16086580.html

process.exit(0);

//CJS

const process = require('process');

process.on('SIGHUP', () => {
  console.log('Got SIGHUP signal.');
});

setTimeout(() => {
  console.log('Exiting.');
  process.exit(0);
}, 100);

process.kill(process.pid, 'SIGHUP');

// ESM

import process, { kill } from 'process';

process.on('SIGHUP', () => {
  console.log('Got SIGHUP signal.');
});

setTimeout(() => {
  console.log('Exiting.');
  process.exit(0);
}, 100);

kill(process.pid, 'SIGHUP');

https://nodejs.org/api/process.html#processkillpid-signal

https://nodejs.org/api/process.html#event-exit

https://nodejs.org/api/process.html#exit-codes

refs

https://stackoverflow.com/questions/25945714/how-to-download-pdf-file-from-url-in-node-js



©xgqfrms 2012-2020

www.cnblogs.com 发布文章使用:只允许注册用户才可以访问!

原创文章,版权所有©️xgqfrms, 禁止转载 🈲️,侵权必究⚠️!


posted @ 2021-01-07 22:02  xgqfrms  阅读(90)  评论(5编辑  收藏  举报