scrapy crawl xmlfeed spider
from scrapy.spiders import XMLFeedSpider from myxml.items import MyxmlItem class XmlspiderSpider(XMLFeedSpider): name = 'xmlspider' allowed_domains = ['sina.com.cn'] start_urls = ['http://blog.sina.com.cn/rss/1165656262.xml'] iterator = 'iternodes' # you can change this; see the docs itertag = 'rss' # change it accordingly def parse_node(self, response, selector): i =MyxmlItem() i['title'] = selector.xpath('/rss/channel/item/title/text()').extract() #i['url'] = selector.select('url').extract() #i['name'] = selector.select('name').extract() #i['description'] = selector.select('description').extract() for j in range(len(i['title'])): print(i['title'][j]) return i