一种通用数据采集的schema定义形式
{ "name": "凤凰金融", "notice": { "data": "attribute", "matcher": [ { "match": "xpath", "pattern": "//*[@id=\"page-financing\"]/div[1]/div[5]/div/div/div[3]" } ],
"comments": "网站通告"
}, "url": { "data": "attribute", "value": "http://www.fengjr.com/financing/list?type=cx"
"comments": "本平台数据的采集URL"
}, "project": { "data": "url", "url": { "data": "attribute", "matcher": [ { "match": "xpath", "pattern": "//*[@id=\"page-financing\"]/div[1]/div[5]/div/div/div[3]" } ], "template": "" }, "title": { "data": "attribute", "matcher": [ { "match": "xpath", "pattern": "//*[@id=\"page-financing\"]/div[1]/div[5]/div/div/div[3]" } ] }, "detail": { "title": { "data": "attribute", "matcher": [ { "match": "xpath", "pattern": "//*[@id=\"page-financing\"]/div[1]/div[5]/div/div/div[3]" } ] }, "amount": { "data": "attribute", "matcher": [ { "match": "xpath", "pattern": "//*[@id=\"page-financing\"]/div[1]/div[5]/div/div/div[3]" } ] } } }, "member": { "data": "sub_item", "sub_item": { "matcher": [ { "match": "xpath", "pattern": "//*[@id=\"page-financing\"]/div[1]/div[5]/div/div/div[3]" } ], "src-save": 0, "url": { "matcher": [ { "match": "xpath", "pattern": "//*[@id=\"page-financing\"]/div[1]/div[5]/div/div/div[3]" } ], "template": "" } }, "detail": { "title": { "data": "attribute", "matcher": [ { "match": "xpath", "pattern": "//*[@id=\"page-financing\"]/div[1]/div[5]/div/div/div[3]" } ] }, "amount": { "data": "attribute", "matcher": [ { "match": "xpath", "pattern": "//*[@id=\"page-financing\"]/div[1]/div[5]/div/div/div[3]" } ] } } }, "src-save": 1 }
补充:
{ "name": "凤凰金融", "notice": { "data": "attribute", "matcher": [ { "match": "xpath", "pattern": "//*[@id=\"page-financing\"]/div[1]/div[5]/div/div/div[3]" } ] }, "url": { "data": "attribute", "value": "http://www.fengjr.com/financing/list?type=cx" }, "project": { "data": "url", "url": { "data": "attribute", "matcher": [ { "match": "xpath", "pattern": "//*[@id=\"page-financing\"]/div[1]/div[5]/div/div/div[3]" } ], "template": "" }, "title": { "data": "attribute", "matcher": [ { "match": "xpath", "pattern": "//*[@id=\"page-financing\"]/div[1]/div[5]/div/div/div[3]" } ] }, "detail": { "name": "网贷列表", "title": { "data": "attribute", "matcher": [ { "match": "xpath", "pattern": "//*[@id=\"page-financing\"]/div[1]/div[5]/div/div/div[3]" } ] }, "amount": { "data": "attribute", "matcher": [ { "match": "xpath", "pattern": "//*[@id=\"page-financing\"]/div[1]/div[5]/div/div/div[3]" } ] } } }, "member": { "data": "sub_item", "sub_item": { "matcher": [ { "match": "xpath", "pattern": "//*[@id=\"page-financing\"]/div[1]/div[5]/div/div/div[3]" } ], "src-save": 0, "url": { "data": "attribute", "matcher": [ { "match": "xpath", "pattern": "//*[@id=\"page-financing\"]/div[1]/div[5]/div/div/div[3]" } ], "template": "" } }, "detail": { "name": "会员材料", "title": { "data": "attribute", "matcher": [ { "match": "xpath", "pattern": "//*[@id=\"page-financing\"]/div[1]/div[5]/div/div/div[3]" } ] }, "amount": { "data": "attribute", "matcher": [ { "match": "xpath", "pattern": "//*[@id=\"page-financing\"]/div[1]/div[5]/div/div/div[3]" } ] } } }, "src-save": 1,
"crawler": {
"handler":"httpClient|selenium",
"results":"html|json|text",
"next_page": {
"matcher": [
{
"match": "xpath",
"pattern": "//*[@id=\"page-financing\"]/div[1]/div[5]/div/div/div[3]"
}
],
"template": ""
},
"history": "re-crawl|skip|stop"
}
}