Python selenium PIL 全网页滚动截屏 && headless全网页截屏
-
思路
先截取当前屏幕的图片,获取其高度作为base高度 h,再获取全网页body到尾部的高度 H ,循环截取图片,再通过PIL进行拼接。
-
代码
# -*- coding:utf-8 -*- # author: donttouchkeyboard@gmail.com # software: PyCharm import os from PIL import Image from time import sleep class ScreenShot: __JS__ = { 'scroll_to_bottom': "window.scroll({top:document.body.clientHeight,left:0,behavior:'auto'});", 'scroll_to_y': "window.scroll({top:%d,left:0,behavior:'auto'});", } __base_end__ = 'tmp_end.png' __scroll_bottom__ = 'scroll_to_bottom' __scroll_y__ = 'scroll_to_y' __body__ = '//body' __height__ = 'height' __clear_shell__ = 'rm -rf *.png' __RGB__ = 'RGB' @classmethod def screen_shot(cls, driver, title, uploader_url='', delete=False): """ 全网页滚动截屏 :param driver: webdriver 示例 :param title: 标题(最终图片命名) :param uploader_url: 上传url :param delete: 是否清除所有图片 :return: """ base_image = '{}.png'.format(title) driver.save_screenshot(base_image) body_h = int(driver.find_element_by_xpath(cls.__body__).size.get(cls.__height__)) current_h = Image.open(base_image).size[1] / 2 for i in range(1, int(body_h / current_h)): driver.execute_script(cls.__JS__[cls.__scroll_y__] % (current_h * i)) sleep(.5) driver.save_screenshot(f'tmp_{i}.png') cls.__join_images__(base_image, f'tmp_{i}.png', 0, base_image) driver.execute_script(cls.__JS__[cls.__scroll_bottom__]) driver.save_screenshot(cls.__base_end__) cls.__join_images__(base_image, cls.__base_end__, int(current_h - int(body_h % current_h)), base_image) # TODO 上传图片 url = '' # 移除图片 if delete: os.system(cls.__clear_shell__) return url @classmethod def __join_images__(cls, png1, png2, size=0, output='result.png'): """ 图片拼接 :param png1: 图片1 :param png2: 图片2 :param size: 两个图片重叠的距离 :param output: 输出的图片文件 :return: """ size = size * 2 img1, img2 = Image.open(png1), Image.open(png2) size1, size2 = img1.size, img2.size joint = Image.new(cls.__RGB__, (size1[0], size1[1] + size2[1] - size)) loc1, loc2 = (0, 0), (0, size1[1] - size) joint.paste(img1, loc1) joint.paste(img2, loc2) joint.save(output) if __name__ == '__main__': from selenium import webdriver driver = webdriver.Chrome() driver.get("https://www.cnblogs.com/worldline/") ScreenShot.screen_shot(driver, 'worldline') driver.quit()
-
其他
如果是在headless模式,可以使用
def get_image(url, pic_name): """ 适用于无头全屏截图 :param url: url访问路径 :param pic_name: 图片名称 :return: """ chrome_options = Options() chrome_options.add_argument('headless') driver = webdriver.Chrome(options=chrome_options) driver.get(url) time.sleep(.5) width = driver.execute_script("return document.documentElement.scrollWidth") height = driver.execute_script("return document.documentElement.scrollHeight") print(width, height) driver.set_window_size(width, height) time.sleep(.5) driver.save_screenshot(pic_name) driver.close()