用于抓取vijos所有题目信息的node.js脚本
代码如下:
var superagent = require('superagent');
var fs = require('fs');
/*
fetch_vijos_problems
这个脚本用于获取vijos里面的所有题目的描述和信息。
注意:因为1000至1099已经有信息了,所以题目好顺延100。
*/
var fetchProblem = function (problemId, callback) {
var url = "https://vijos.org/p/" + problemId;
superagent.get(url)
.end( (err, res)=> {
if (err) {
console.log("[error fetch]" + url);
if (callback)
callback();
} else {
// console.log(res.text);
var html = res.text;
var titleIdx1 = html.indexOf('<title>');
var titleIdx2 = html.indexOf('</title>');
var title = html.substr(titleIdx1+7, titleIdx2-titleIdx1-15);
var idx = html.indexOf("section__body typo");
html = html.substr(idx+21);
idx = html.indexOf("</div>");
html = html.substr(0, idx).trim();
var dir = `${__dirname}/../moon-web/resources/problems/${problemId+100}`;
if (fs.existsSync(dir) == false) {
fs.mkdirSync(dir);
}
var info = {
"id": "" + (problemId + 100),
"title": title,
"tags": []
}
var descriptionFile = `${dir}/description.html`;
var infoFile = `${dir}/info.json`;
fs.writeFileSync(descriptionFile, html, 'utf8');
fs.writeFileSync(infoFile, JSON.stringify(info), 'utf8');
if (callback)
callback();
}
} );
}
// 获取vijos所有题目,编号1000 - 2056
var fetchProblems = function (problemId) {
console.log(`fetch ${problemId} ...`);
setTimeout(()=>{
fetchProblem(problemId, ()=>{
console.log(`fetch ${problemId} finished.`);
if (problemId < 2056)
fetchProblems(problemId+1);
});
}, 1000);
}
fetchProblems(1000);