Python Selenium 网页截全图
代码如下:
from selenium import webdriver
from selenium.webdriver.support.expected_conditions import _find_element
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import StaleElementReferenceException
class text_to_be_present_in_element(object):
""" An expectation for checking if the given text is present in the
specified element.
locator, text
"""
def __init__(self, locator, text_):
self.locator = locator
self.text_ = text_
def __call__(self, driver):
try:
element_text = _find_element(driver, self.locator).text
return self.text_ in element_text
except StaleElementReferenceException:
return False
class text_to_be_present_in_element_value(object):
"""
An expectation for checking if the given text is present in the element's
locator, text
"""
def __init__(self, locator, key, text_):
self.locator = locator
self.text = text_
self.key = key
def __call__(self, driver):
try:
element_text = _find_element(driver,
self.locator).get_attribute(f"{self.key}")
if element_text:
return self.text in element_text
else:
return False
except StaleElementReferenceException:
return False
def chrome_headless():
options = webdriver.ChromeOptions()
options.add_argument('--headless')
options.add_argument('--disable-gpu')
options.add_argument('--no-sandbox')
options.add_argument('window-size=1920x1080')
driver = webdriver.Chrome(options=options,
executable_path='/Users/dengjiajie/Desktop/mark_book/my_awesome_book/my_tools/chromedriver')
return driver
def select_driver(name='chrome_headless'):
driver_map = {
'chrome_headless': chrome_headless
}
driver = driver_map.get(name, None)
if driver is None:
raise ValueError('driver is None, please check driver exist ')
return driver()
class SeHandler():
def __init__(self, driver=None):
self.driver = driver or select_driver()
def save_img_from_url(self, url, abs_file_path, width=None, height=None, locator=None, text=None, attribute=None):
self.driver.get(url)
wait = WebDriverWait(self.driver, 5, 0.5)
if locator:
print('进入元素显现并可定位')
wait.until(EC.visibility_of_element_located(locator=locator))
if text:
print('进入文本显式等待')
wait.until(text_to_be_present_in_element(locator=locator, text_=text))
if attribute and isinstance(attribute, (tuple, list)):
print('进入属性值等待')
attribute_name, attribute_value = attribute
wait.until(
text_to_be_present_in_element_value(locator=locator, key=attribute_name, text_=attribute_value))
# 获取页面宽度
width = width or 1920
# 获取页面高度
print(f'width:{width}')
if not height:
height = self.driver.execute_script('return document.body.scrollHeight')
print(f'height:{height}')
# 设置窗口大小
self.driver.set_window_size(width=width, height=height)
# 截图
self.driver.save_screenshot(abs_file_path)
return True
def quit(self):
self.driver.quit()
self.driver = None
if __name__ == '__main__':
import time
from selenium.webdriver.common.by import By
se_handler = SeHandler()
url = 'https://debugtalk.com/post/use-pyenv-manage-multiple-python-virtualenvs/'
file_name = f'{int(time.time())}_test-canvas.png'
print(f'filename:{file_name}')
locator = (By.ID, '背景')
ret = se_handler.save_img_from_url(url, file_name, locator=locator, text='背景', attribute=('id', '背景'))