Selenium接管已经打开的浏览器并爬取数据

    """
    P.S:需要接管现有浏览器
    
   ** 使用步骤:
    1、打开浏览器,设置好远程调试端口,并扫描登录淘宝。
        chrome.exe --remote-debugging-port=9333 --user-data-dir="G:\spider_taobao"**
        
    2、运行程序,自动采集数据
    
    """
def main():
    service = ChromeService(executable_path=DIRVER_PATH)

    options = webdriver.ChromeOptions()

    # 添加插件运行
    # options.add_argument(r"--load-extension=C:\Users\Administrator\Desktop\爬虫\XPath-Helper_v2.0.2.crx")
    # options.add_extension(r"C:\Users\Administrator\Desktop\爬虫\XPath-Helper_v2.0.2.crx")

    # 接管后这个参数会报错,不要设置...
    # options.add_experimental_option("excludeSwitches", ["enable-automation"])
    # options.add_experimental_option("useAutomationExtension", False)

 
    **# 接管已经打开的浏览器
    options.add_experimental_option("debuggerAddress", "127.0.0.1:9333")**

    # 防止检测到selenium
    options.add_argument("--disable-blink-features")
    options.add_argument("--disable-blink-features=AutomationControlled")

    driver = webdriver.Chrome(service=service, options=options)

    with open(STEALTH_JS) as f:
        js = f.read()

    driver.execute_cdp_cmd(
        cmd="Page.addScriptToEvaluateOnNewDocument",
        cmd_args={
            "source": js
        }
    )
posted @ 2023-07-17 11:29  蕝戀  阅读(432)  评论(0编辑  收藏  举报