Selenium接管已经打开的浏览器并爬取数据

    """
    P.S：需要接管现有浏览器
    
   ** 使用步骤：
    1、打开浏览器，设置好远程调试端口，并扫描登录淘宝。
        chrome.exe --remote-debugging-port=9333 --user-data-dir="G:\spider_taobao"**
        
    2、运行程序，自动采集数据
    
    """

def main():
    service = ChromeService(executable_path=DIRVER_PATH)

    options = webdriver.ChromeOptions()

    # 添加插件运行
    # options.add_argument(r"--load-extension=C:\Users\Administrator\Desktop\爬虫\XPath-Helper_v2.0.2.crx")
    # options.add_extension(r"C:\Users\Administrator\Desktop\爬虫\XPath-Helper_v2.0.2.crx")

    # 接管后这个参数会报错，不要设置...
    # options.add_experimental_option("excludeSwitches", ["enable-automation"])
    # options.add_experimental_option("useAutomationExtension", False)

 
    **# 接管已经打开的浏览器
    options.add_experimental_option("debuggerAddress", "127.0.0.1:9333")**

    # 防止检测到selenium
    options.add_argument("--disable-blink-features")
    options.add_argument("--disable-blink-features=AutomationControlled")

    driver = webdriver.Chrome(service=service, options=options)

    with open(STEALTH_JS) as f:
        js = f.read()

    driver.execute_cdp_cmd(
        cmd="Page.addScriptToEvaluateOnNewDocument",
        cmd_args={
            "source": js
        }
    )

posted @ 2023-07-17 11:29 蕝戀阅读(432) 评论(0) 编辑收藏举报

会员力量，点亮园子希望

刷新页面返回顶部

绝恋。。

Selenium接管已经打开的浏览器并爬取数据

公告