python: Capture Full Webpage Screenshots with Selenium

 

browser = webdriver.Firefox()
edge=webdriver.Edge()
safari=webdriver.Safari()
chrome=webdriver.Chrome()

 https://pypi.org/project/selenium/

案例1:

# encoding: utf-8
# 版权所有 2023 ©涂聚文有限公司
# 许可信息查看:
# 描述:堆区Heap Area 栈区 Stack Area
# Author    : geovindu,Geovin Du 涂聚文.
# IDE       : PyCharm 2023.1 python 311
# Datetime  : 2023/10/26 8:54
# User      : geovindu
# python.exe -m pip install --upgrade pip
# pip install Pillow
# pip install selenium
# Product   : PyCharm
# Project   : EssentialAlgorithms
# File      : LongScreenShot.py
# explain   : 学习


import time
from PIL import Image
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
import os
from datetime import datetime

class LongScreenShot(object):
    """
    根据网址长截图  "C:\Program Files\Google\Chrome\Application\chrome.exe"
    """


    def __init__(self):
        self.url=""
        self.fielname=""

    def screenShot(self,weburl:str,filename:str):
        """
        网页全页面截图  有效
        :param weburl:网址
        :param filename:生成图片的文件名 这里生成时间产生的
        :return:
        """
        try:

            today = datetime.now()
            image_name = today.strftime("%Y%m%d%H%M%S")
            print(image_name)
            # Set the path where the screenshot will be saved
            print(os.path.abspath(__file__))
            path = os.path.dirname(os.path.abspath(__file__))

            # Configure Chrome WebDriver options
            options = Options()
            options.add_argument("--window-size=1920,1080")
            options.add_argument("--start-maximized")
            options.add_argument("--headless")  # Use headless mode for running in the background
            options.add_argument("--disable-gpu")

            # Initialize the Chrome WebDriver
            driver = webdriver.Chrome(options=options)
            driver.maximize_window()

            # Navigate to the URL you want to capture
            driver.get(weburl)

            # Wait for the page to load (you can adjust the sleep time as needed)
            time.sleep(1)

            # Use JavaScript to get the full width and height of the webpage
            width = driver.execute_script(
                "return Math.max( document.body.scrollWidth, document.body.offsetWidth, document.documentElement.clientWidth, document.documentElement.scrollWidth, document.documentElement.offsetWidth );")
            height = driver.execute_script(
                "return Math.max( document.body.scrollHeight, document.body.offsetHeight, document.documentElement.clientHeight, document.documentElement.scrollHeight, document.documentElement.offsetHeight );")

            # Set the window size to match the entire webpage
            driver.set_window_size(width, height)

            # Find the full page element (usually 'body') and capture the screenshot
            full_page = driver.find_element(By.TAG_NAME, "body")
            # image_name="geovidu"
            full_page.screenshot(f"{image_name}.png")

            # Close the browser window
            driver.quit()
        except Exception as e:
            print(e)

  

调用:

    lscreen=BLL.LongScreenShot.LongScreenShot()
    lscreen.screenShot("https://www.csdn.net/",r"result.png")  #ok

  

 案列2:

from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
import time
import sys
import random


def print_hi(name):
    # Use a breakpoint in the code line below to debug your script.
    print(f'Hi, {name}')  # Press Ctrl+F8 to toggle the breakpoint.


# Press the green button in the gutter to run the script.
if __name__ == '__main__':
    print_hi('PyCharm,geovindu')
    #takeScreenshot("https://www.csdn.net/")

    options = webdriver.ChromeOptions()
    #options.headless = True
    options.add_argument("--window-size=1920,1080")
    options.add_argument("--start-maximized")
    options.add_argument("--headless")  # Use headless mode for running in the background
    options.add_argument("--disable-gpu")

    driver = webdriver.Chrome(options=options)

    URL = 'https://www.csdn.net/'

    driver.get(URL)
    time.sleep(1)
    S = lambda X: driver.execute_script('return document.body.parentNode.scroll' + X)
    driver.set_window_size(S('Width'),
                           S('Height'))  # May need manual adjustment
    driver.maximize_window()
    #driver.find_element_by_tag_name('body').screenshot('web_screenshot.png')
    fullpage=driver.find_element(By.TAG_NAME,'body') #生成了第一页
    fullpage.screenshot('geovindu.png')
    driver.quit()

  

封装类:

 

# encoding: utf-8
# 版权所有 2023 ©涂聚文有限公司
# 许可信息查看:网页截图
# 描述:
# Author    : geovindu,Geovin Du 涂聚文.
# IDE       : PyCharm 2023.1 python 311
# Datetime  : 2023/10/26 14:36
# User      : geovindu
# Product   : PyCharm
# Project   : pythonWebScreenShot
# File      : CaptureWeb.py
# explain   : 学习
# python.exe -m pip install --upgrade pip
# pip install Pillow
# pip install selenium
# pip install pywin32
# pip install PyPDF2

from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
import time
from datetime import datetime
import sys
import random

class CaptureWebFull(object):
    """
    网页截图
    """
    def __init__(self):
        self.webulr="";
        self.filename="";

    def CaptureGetImage(self,weburl:str,filename:str):
        """
        网页截图  geovindu
        :param weburl: 网址
        :param filename: 生成的图片文件名
        :return: None
        """
        try:
            options = webdriver.ChromeOptions()
            # options.headless = True
            options.add_argument("--window-size=1920,1080")
            options.add_argument("--start-maximized")
            options.add_argument("--headless")  # Use headless mode for running in the background
            options.add_argument("--disable-gpu")

            driver = webdriver.Chrome(options=options)

            URL = weburl #'https://news.163.com/'
            today = datetime.now()
            imagename = today.strftime("%Y%m%d%H%M%S")
            driver.get(URL)
            time.sleep(2)
            S = lambda X: driver.execute_script('return document.body.parentNode.scroll' + X)
            driver.set_window_size(S('Width'),
                                   S('Height'))  # May need manual adjustment
            driver.maximize_window()
            # driver.find_element_by_tag_name('body').screenshot('web_screenshot.png')
            fullpage = driver.find_element(By.TAG_NAME, 'body')  # 生成了第一页
            if(filename==''):
                filename = imagename
            fullpage.screenshot(f'{filename}.png')

            driver.quit()
        except Exception as e:
            print(e)

        print("ok")

  

调用:

    bl=BLL.CaptureWeb.CaptureWebFull()
    bl.CaptureGetImage("http://www.dusystem.com/","")

  

posted @ 2023-10-26 10:50  ®Geovin Du Dream Park™  阅读(18)  评论(0编辑  收藏  举报