python: Capture Full Webpage Screenshots with Selenium

 

browser = webdriver.Firefox()
edge=webdriver.Edge()
safari=webdriver.Safari()
chrome=webdriver.Chrome()

 https://pypi.org/project/selenium/

案例1:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
# encoding: utf-8
# 版权所有 2023 ©涂聚文有限公司
# 许可信息查看:
# 描述:堆区Heap Area 栈区 Stack Area
# Author    : geovindu,Geovin Du 涂聚文.
# IDE       : PyCharm 2023.1 python 311
# Datetime  : 2023/10/26 8:54
# User      : geovindu
# python.exe -m pip install --upgrade pip
# pip install Pillow
# pip install selenium
# Product   : PyCharm
# Project   : EssentialAlgorithms
# File      : LongScreenShot.py
# explain   : 学习
 
 
import time
from PIL import Image
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
import os
from datetime import datetime
 
class LongScreenShot(object):
    """
    根据网址长截图  "C:\Program Files\Google\Chrome\Application\chrome.exe"
    """
 
 
    def __init__(self):
        self.url=""
        self.fielname=""
 
    def screenShot(self,weburl:str,filename:str):
        """
        网页全页面截图  有效
        :param weburl:网址
        :param filename:生成图片的文件名 这里生成时间产生的
        :return:
        """
        try:
 
            today = datetime.now()
            image_name = today.strftime("%Y%m%d%H%M%S")
            print(image_name)
            # Set the path where the screenshot will be saved
            print(os.path.abspath(__file__))
            path = os.path.dirname(os.path.abspath(__file__))
 
            # Configure Chrome WebDriver options
            options = Options()
            options.add_argument("--window-size=1920,1080")
            options.add_argument("--start-maximized")
            options.add_argument("--headless"# Use headless mode for running in the background
            options.add_argument("--disable-gpu")
 
            # Initialize the Chrome WebDriver
            driver = webdriver.Chrome(options=options)
            driver.maximize_window()
 
            # Navigate to the URL you want to capture
            driver.get(weburl)
 
            # Wait for the page to load (you can adjust the sleep time as needed)
            time.sleep(1)
 
            # Use JavaScript to get the full width and height of the webpage
            width = driver.execute_script(
                "return Math.max( document.body.scrollWidth, document.body.offsetWidth, document.documentElement.clientWidth, document.documentElement.scrollWidth, document.documentElement.offsetWidth );")
            height = driver.execute_script(
                "return Math.max( document.body.scrollHeight, document.body.offsetHeight, document.documentElement.clientHeight, document.documentElement.scrollHeight, document.documentElement.offsetHeight );")
 
            # Set the window size to match the entire webpage
            driver.set_window_size(width, height)
 
            # Find the full page element (usually 'body') and capture the screenshot
            full_page = driver.find_element(By.TAG_NAME, "body")
            # image_name="geovidu"
            full_page.screenshot(f"{image_name}.png")
 
            # Close the browser window
            driver.quit()
        except Exception as e:
            print(e)

  

调用:

1
2
lscreen=BLL.LongScreenShot.LongScreenShot()
lscreen.screenShot("https://www.csdn.net/",r"result.png"#ok

  

 案列2:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
import time
import sys
import random
 
 
def print_hi(name):
    # Use a breakpoint in the code line below to debug your script.
    print(f'Hi, {name}'# Press Ctrl+F8 to toggle the breakpoint.
 
 
# Press the green button in the gutter to run the script.
if __name__ == '__main__':
    print_hi('PyCharm,geovindu')
    #takeScreenshot("https://www.csdn.net/")
 
    options = webdriver.ChromeOptions()
    #options.headless = True
    options.add_argument("--window-size=1920,1080")
    options.add_argument("--start-maximized")
    options.add_argument("--headless"# Use headless mode for running in the background
    options.add_argument("--disable-gpu")
 
    driver = webdriver.Chrome(options=options)
 
    URL = 'https://www.csdn.net/'
 
    driver.get(URL)
    time.sleep(1)
    S = lambda X: driver.execute_script('return document.body.parentNode.scroll' + X)
    driver.set_window_size(S('Width'),
                           S('Height'))  # May need manual adjustment
    driver.maximize_window()
    #driver.find_element_by_tag_name('body').screenshot('web_screenshot.png')
    fullpage=driver.find_element(By.TAG_NAME,'body') #生成了第一页
    fullpage.screenshot('geovindu.png')
    driver.quit()

  

封装类:

 

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
# encoding: utf-8
# 版权所有 2023 ©涂聚文有限公司
# 许可信息查看:网页截图
# 描述:
# Author    : geovindu,Geovin Du 涂聚文.
# IDE       : PyCharm 2023.1 python 311
# Datetime  : 2023/10/26 14:36
# User      : geovindu
# Product   : PyCharm
# Project   : pythonWebScreenShot
# File      : CaptureWeb.py
# explain   : 学习
# python.exe -m pip install --upgrade pip
# pip install Pillow
# pip install selenium
# pip install pywin32
# pip install PyPDF2
 
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
import time
from datetime import datetime
import sys
import random
 
class CaptureWebFull(object):
    """
    网页截图
    """
    def __init__(self):
        self.webulr="";
        self.filename="";
 
    def CaptureGetImage(self,weburl:str,filename:str):
        """
        网页截图  geovindu
        :param weburl: 网址
        :param filename: 生成的图片文件名
        :return: None
        """
        try:
            options = webdriver.ChromeOptions()
            # options.headless = True
            options.add_argument("--window-size=1920,1080")
            options.add_argument("--start-maximized")
            options.add_argument("--headless"# Use headless mode for running in the background
            options.add_argument("--disable-gpu")
 
            driver = webdriver.Chrome(options=options)
 
            URL = weburl #'https://news.163.com/'
            today = datetime.now()
            imagename = today.strftime("%Y%m%d%H%M%S")
            driver.get(URL)
            time.sleep(2)
            S = lambda X: driver.execute_script('return document.body.parentNode.scroll' + X)
            driver.set_window_size(S('Width'),
                                   S('Height'))  # May need manual adjustment
            driver.maximize_window()
            # driver.find_element_by_tag_name('body').screenshot('web_screenshot.png')
            fullpage = driver.find_element(By.TAG_NAME, 'body'# 生成了第一页
            if(filename==''):
                filename = imagename
            fullpage.screenshot(f'{filename}.png')
 
            driver.quit()
        except Exception as e:
            print(e)
 
        print("ok")

  

调用:

1
2
bl=BLL.CaptureWeb.CaptureWebFull()
bl.CaptureGetImage("http://www.dusystem.com/","")

  

posted @   ®Geovin Du Dream Park™  阅读(21)  评论(0编辑  收藏  举报
相关博文:
阅读排行:
· 25岁的心里话
· 闲置电脑爆改个人服务器(超详细) #公网映射 #Vmware虚拟网络编辑器
· 基于 Docker 搭建 FRP 内网穿透开源项目(很简单哒)
· 零经验选手,Compose 一天开发一款小游戏!
· 一起来玩mcp_server_sqlite,让AI帮你做增删改查!!
历史上的今天:
2022-10-26 Python: State Pattern
2022-10-26 Python: Template Method Pattern
< 2025年3月 >
23 24 25 26 27 28 1
2 3 4 5 6 7 8
9 10 11 12 13 14 15
16 17 18 19 20 21 22
23 24 25 26 27 28 29
30 31 1 2 3 4 5
点击右上角即可分享
微信分享提示