scrapy crawl xmlfeed spider

from scrapy.spiders import XMLFeedSpider
from myxml.items import MyxmlItem

class XmlspiderSpider(XMLFeedSpider):
    name = 'xmlspider'
    allowed_domains = ['sina.com.cn']
    start_urls = ['http://blog.sina.com.cn/rss/1165656262.xml']
    iterator = 'iternodes' # you can change this; see the docs
    itertag = 'rss' # change it accordingly

    def parse_node(self, response, selector):
        i =MyxmlItem()
        i['title'] = selector.xpath('/rss/channel/item/title/text()').extract()
        #i['url'] = selector.select('url').extract()
        #i['name'] = selector.select('name').extract()
        #i['description'] = selector.select('description').extract()
        for j in range(len(i['title'])):
            print(i['title'][j])
        return i

 

posted @ 2017-05-10 13:35  Erick-LONG  阅读(209)  评论(0编辑  收藏  举报