selenium单例模式下 docker-chrome 多线程并发代码

最近需要写爬虫,在解决docker-standalone-chrome 发现只能有一个chrome被执行。所以写了这个多线程并发控制类来管理。当模板记录下。


#! /usr/bin/env python3
import threading
import traceback

from loguru import logger
from selenium import webdriver
from selenium.common import WebDriverException
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
from selenium.webdriver.common.by import By
import json
from utils.configs import ScreenshotDir, ChromeUserDataDir, REMOTE_DRIVER


def options():
    """默认选项"""
    opts = webdriver.ChromeOptions()
    opts.add_argument(f"user-data-dir={ChromeUserDataDir}")
    opts.page_load_strategy = 'eager'
    opts.add_experimental_option('excludeSwitches', ['enable-automation'])  # 开发者模式
    opts.add_argument("--disable-popup-blocking")  # 关闭操作bar
    opts.add_argument("--blink-settings=imagesEnabled=false")  # 禁用图片
    opts.add_argument("--window-size=1920,1080")  # 窗口大小
    opts.add_argument("--no-sandbox")  # 无沙箱
    opts.add_argument("--disable-gpu")
    # opts.add_argument('--headless')  # 无头模式
    # opts.add_experimental_option('debuggerAddress', '127.0.0.1:9222')  # 指定server
    return opts


class Browser:
    """浏览器,线程不安全"""

    def __init__(self, opts=None):
        if not opts:
            opts = options()
        self.driver = webdriver.Remote(command_executor=REMOTE_DRIVER, options=opts)
        # self.driver = webdriver.Chrome(options=opts)

    def quit(self):
        try:
            if hasattr(self, 'driver') and hasattr(self.driver, 'quit'):
                self.driver.quit()
                self.driver = None
        except Exception as e:
            logger.error("browser quit err:{e}\ntraceback:{traceback}".format(e=e, traceback=traceback.format_exc()))

    def send(self, cmd, params):
        """发送命令"""
        resource = "/session/%s/chromium/send_command_and_get_result" % self.driver.session_id
        url = self.driver.command_executor._url + resource
        body = json.dumps({'cmd': cmd, 'params': params})
        response = self.driver.command_executor._request('POST', url, body)
        return response.get('value')

    def get(self, url) -> None:
        logger.info(f'browser starting open url={url}')
        self.driver.get(url)
        logger.info(f'browser open suc url={url}')

    def wait_by_xpath(self, path, timeout=10) -> None:
        try:
            WebDriverWait(self.driver, timeout, 1, ignored_exceptions=(WebDriverException,)).until(
                EC.element_to_be_clickable((By.XPATH, path)))
        except Exception:
            logger.error(f'browser wait xpath not find')

    def wait_by_id(self, tid, timeout=10) -> None:
        try:
            WebDriverWait(self.driver, timeout, 1, ignored_exceptions=(WebDriverException,)).until(
                EC.presence_of_element_located((By.ID, tid)))
        except Exception:
            logger.error(f'browser wait id not find')

    def save(self, path):
        with open(path, 'w', encoding='utf-8') as fw:
            fw.write(self.get_page_source())

    def get_page_source(self) -> str:
        return self.driver.page_source

    def click(self, path):
        try:
            button = self.driver.find_element(By.XPATH, path)
            if button:
                button.click()
                time.sleep(2)
                # 跳转到新标签
                windows = self.driver.window_handles
                self.driver.switch_to.window(windows[-1])

                return self.driver.current_url
            else:
                return ""
        except Exception as e:
            logger.error(f'browser click err={e}')

    def screenshot(self, name=None):
        full_name = 'full_page_screenshot.png'
        if name:
            full_name = name
        screenshot_path = f'{ScreenshotDir}/{full_name}'
        self.driver.save_screenshot(screenshot_path)


class BrowserManager:
    """浏览器管理类,线程安全
    usages:
        with BrowserManager() as browser:
            browser.get(url)
    """
    _browser = None
    _is_used = False
    _condition = threading.Condition()

    def __init__(self, opts=None):
        self.opts = opts

    def __enter__(self):
        with BrowserManager._condition:
            while BrowserManager._is_used:
                if not BrowserManager._condition.wait(timeout=10):
                    continue

            BrowserManager._is_used = True
            try:
                BrowserManager._browser = Browser(self.opts)
            except Exception as e:
                BrowserManager._is_used = False
                raise e

            return BrowserManager._browser

    def __exit__(self, exc_type, exc_value, traceback):
        with BrowserManager._condition:
            BrowserManager._browser.quit()
            BrowserManager._is_used = False
            BrowserManager._condition.notify()

用法就很简单了,例如


with BrowserManager() as browser:
    try:
        browser.get(url)
        return browser.get_page_source()
    except Exception as e:
        logger.error(e)
posted @ 2024-10-23 09:26  happy_codes  阅读(8)  评论(0编辑  收藏  举报