Python Selenium 网页截全图

Python Selenium 网页截全图


代码如下:

from selenium import webdriver
from selenium.webdriver.support.expected_conditions import _find_element
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import StaleElementReferenceException


class text_to_be_present_in_element(object):
    """ An expectation for checking if the given text is present in the
    specified element.
    locator, text
    """

    def __init__(self, locator, text_):
        self.locator = locator
        self.text_ = text_

    def __call__(self, driver):
        try:
            element_text = _find_element(driver, self.locator).text
            return self.text_ in element_text
        except StaleElementReferenceException:
            return False


class text_to_be_present_in_element_value(object):
    """
    An expectation for checking if the given text is present in the element's
    locator, text
    """

    def __init__(self, locator, key, text_):
        self.locator = locator
        self.text = text_
        self.key = key

    def __call__(self, driver):
        try:
            element_text = _find_element(driver,
                                         self.locator).get_attribute(f"{self.key}")
            if element_text:
                return self.text in element_text
            else:
                return False
        except StaleElementReferenceException:
            return False


def chrome_headless():
    options = webdriver.ChromeOptions()
    options.add_argument('--headless')
    options.add_argument('--disable-gpu')
    options.add_argument('--no-sandbox')
    options.add_argument('window-size=1920x1080')
    driver = webdriver.Chrome(options=options,
                              executable_path='/Users/dengjiajie/Desktop/mark_book/my_awesome_book/my_tools/chromedriver')
    return driver


def select_driver(name='chrome_headless'):
    driver_map = {
        'chrome_headless': chrome_headless
    }
    driver = driver_map.get(name, None)
    if driver is None:
        raise ValueError('driver is None, please check driver exist ')
    return driver()


class SeHandler():

    def __init__(self, driver=None):
        self.driver = driver or select_driver()

    def save_img_from_url(self, url, abs_file_path, width=None, height=None, locator=None, text=None, attribute=None):
        self.driver.get(url)
        wait = WebDriverWait(self.driver, 5, 0.5)
        if locator:
            print('进入元素显现并可定位')
            wait.until(EC.visibility_of_element_located(locator=locator))
        if text:
            print('进入文本显式等待')
            wait.until(text_to_be_present_in_element(locator=locator, text_=text))
        if attribute and isinstance(attribute, (tuple, list)):
            print('进入属性值等待')
            attribute_name, attribute_value = attribute
            wait.until(
                text_to_be_present_in_element_value(locator=locator, key=attribute_name, text_=attribute_value))

        # 获取页面宽度
        width = width or 1920
        # 获取页面高度
        print(f'width:{width}')
        if not height:
            height = self.driver.execute_script('return document.body.scrollHeight')
        print(f'height:{height}')
        # 设置窗口大小
        self.driver.set_window_size(width=width, height=height)
        # 截图
        self.driver.save_screenshot(abs_file_path)
        return True

    def quit(self):
        self.driver.quit()
        self.driver = None


if __name__ == '__main__':
    import time
    from selenium.webdriver.common.by import By

    se_handler = SeHandler()
    url = 'https://debugtalk.com/post/use-pyenv-manage-multiple-python-virtualenvs/'
    file_name = f'{int(time.time())}_test-canvas.png'
    print(f'filename:{file_name}')
    locator = (By.ID, '背景')
    ret = se_handler.save_img_from_url(url, file_name, locator=locator, text='背景', attribute=('id', '背景'))


posted @ 2021-10-07 23:11  深圳-逸遥  阅读(286)  评论(0编辑  收藏  举报