Scrapy_Request对象dont_filter演示

import scrapy


class BaiduSpider(scrapy.Spider):
    name = "baidu"
    allowed_domains = ["baidu.com"]
    start_urls = ["https://baidu.com"]

    def parse(self, response):
        title = response.xpath('//title/text()').get()
        print(title)
        yield scrapy.Request('https://baidu.com',callback=self.parse_info)

    def parse_info(self, response):
        title = response.xpath('//title/text()').get()
        print(title)
        yield scrapy.Request('https://baidu.com',callback=self.parse_info)

理论上是死循环

默认去重

import scrapy


class BaiduSpider(scrapy.Spider):
    name = "baidu"
    allowed_domains = ["baidu.com"]
    start_urls = ["https://baidu.com"]

    def start_requests(self):
        for url in self.start_urls:
            # dont_filter:取消去重    True继续访问,Falsa取消访问
            yield scrapy.Request(url, dont_filter=True)
    
    def parse(self, response):
        title = response.xpath('//title/text()').get()
        print(title)
        yield scrapy.Request('https://baidu.com',callback=self.parse_info)

    def parse_info(self, response):
        title = response.xpath('//title/text()').get()
        print(title)
        yield scrapy.Request('https://baidu.com',callback=self.parse_info)

 

posted @   jiang_jiayun  阅读(172)  评论(0编辑  收藏  举报
相关博文:
阅读排行:
· 一个费力不讨好的项目,让我损失了近一半的绩效!
· 清华大学推出第四讲使用 DeepSeek + DeepResearch 让科研像聊天一样简单!
· 实操Deepseek接入个人知识库
· CSnakes vs Python.NET:高效嵌入与灵活互通的跨语言方案对比
· Plotly.NET 一个为 .NET 打造的强大开源交互式图表库
点击右上角即可分享
微信分享提示