爬虫配置文件
<?xml version="1.0" encoding="utf-8"?> <template> <id>××××.com</id> <!--行政区域--> <page threads="false" dynamic="false" desc="index" setcookie="false" pattern="^http://www.×××××.com/search/category/\d+/\d+(/)?(r\d+)?(p\d+)?$"> <enqueue> <xpath>//ul[@data-key='region']/li/a/@href</xpath> <xpath>//ul[@class='detail']/li[1]/a[1]/@href</xpath> <xpath>//div[@class='Pages']/a/@href</xpath> </enqueue> </page> <!--商品页--> <page threads="false" dynamic="false" desc="non-index" setcookie="false" pattern="^http://www.*****.com/shop/\d+(\?KID=\d+)?$"> <check> <!--这个是无用的--> <xpath>//*[@id="pt1:pgl4"]</xpath> <pattern>\d+.00</pattern> </check> </page> </template>