python: Capture Full Webpage Screenshots with Selenium
browser = webdriver.Firefox()
edge=webdriver.Edge()
safari=webdriver.Safari()
chrome=webdriver.Chrome()
edge=webdriver.Edge()
safari=webdriver.Safari()
chrome=webdriver.Chrome()
https://pypi.org/project/selenium/
案例1:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 | # encoding: utf-8 # 版权所有 2023 ©涂聚文有限公司 # 许可信息查看: # 描述:堆区Heap Area 栈区 Stack Area # Author : geovindu,Geovin Du 涂聚文. # IDE : PyCharm 2023.1 python 311 # Datetime : 2023/10/26 8:54 # User : geovindu # python.exe -m pip install --upgrade pip # pip install Pillow # pip install selenium # Product : PyCharm # Project : EssentialAlgorithms # File : LongScreenShot.py # explain : 学习 import time from PIL import Image from selenium import webdriver from selenium.webdriver.chrome.options import Options from selenium.webdriver.common.by import By import os from datetime import datetime class LongScreenShot( object ): """ 根据网址长截图 "C:\Program Files\Google\Chrome\Application\chrome.exe" """ def __init__( self ): self .url = "" self .fielname = "" def screenShot( self ,weburl: str ,filename: str ): """ 网页全页面截图 有效 :param weburl:网址 :param filename:生成图片的文件名 这里生成时间产生的 :return: """ try : today = datetime.now() image_name = today.strftime( "%Y%m%d%H%M%S" ) print (image_name) # Set the path where the screenshot will be saved print (os.path.abspath(__file__)) path = os.path.dirname(os.path.abspath(__file__)) # Configure Chrome WebDriver options options = Options() options.add_argument( "--window-size=1920,1080" ) options.add_argument( "--start-maximized" ) options.add_argument( "--headless" ) # Use headless mode for running in the background options.add_argument( "--disable-gpu" ) # Initialize the Chrome WebDriver driver = webdriver.Chrome(options = options) driver.maximize_window() # Navigate to the URL you want to capture driver.get(weburl) # Wait for the page to load (you can adjust the sleep time as needed) time.sleep( 1 ) # Use JavaScript to get the full width and height of the webpage width = driver.execute_script( "return Math.max( document.body.scrollWidth, document.body.offsetWidth, document.documentElement.clientWidth, document.documentElement.scrollWidth, document.documentElement.offsetWidth );" ) height = driver.execute_script( "return Math.max( document.body.scrollHeight, document.body.offsetHeight, document.documentElement.clientHeight, document.documentElement.scrollHeight, document.documentElement.offsetHeight );" ) # Set the window size to match the entire webpage driver.set_window_size(width, height) # Find the full page element (usually 'body') and capture the screenshot full_page = driver.find_element(By.TAG_NAME, "body" ) # image_name="geovidu" full_page.screenshot(f "{image_name}.png" ) # Close the browser window driver.quit() except Exception as e: print (e) |
调用:
1 2 | lscreen = BLL.LongScreenShot.LongScreenShot() lscreen.screenShot( "https://www.csdn.net/" ,r "result.png" ) #ok |
案列2:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 | from selenium import webdriver from selenium.webdriver.chrome.options import Options from selenium.webdriver.common.by import By import time import sys import random def print_hi(name): # Use a breakpoint in the code line below to debug your script. print (f 'Hi, {name}' ) # Press Ctrl+F8 to toggle the breakpoint. # Press the green button in the gutter to run the script. if __name__ = = '__main__' : print_hi( 'PyCharm,geovindu' ) #takeScreenshot("https://www.csdn.net/") options = webdriver.ChromeOptions() #options.headless = True options.add_argument( "--window-size=1920,1080" ) options.add_argument( "--start-maximized" ) options.add_argument( "--headless" ) # Use headless mode for running in the background options.add_argument( "--disable-gpu" ) driver = webdriver.Chrome(options = options) URL = 'https://www.csdn.net/' driver.get(URL) time.sleep( 1 ) S = lambda X: driver.execute_script( 'return document.body.parentNode.scroll' + X) driver.set_window_size(S( 'Width' ), S( 'Height' )) # May need manual adjustment driver.maximize_window() #driver.find_element_by_tag_name('body').screenshot('web_screenshot.png') fullpage = driver.find_element(By.TAG_NAME, 'body' ) #生成了第一页 fullpage.screenshot( 'geovindu.png' ) driver.quit() |
封装类:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 | # encoding: utf-8 # 版权所有 2023 ©涂聚文有限公司 # 许可信息查看:网页截图 # 描述: # Author : geovindu,Geovin Du 涂聚文. # IDE : PyCharm 2023.1 python 311 # Datetime : 2023/10/26 14:36 # User : geovindu # Product : PyCharm # Project : pythonWebScreenShot # File : CaptureWeb.py # explain : 学习 # python.exe -m pip install --upgrade pip # pip install Pillow # pip install selenium # pip install pywin32 # pip install PyPDF2 from selenium import webdriver from selenium.webdriver.chrome.options import Options from selenium.webdriver.common.by import By import time from datetime import datetime import sys import random class CaptureWebFull( object ): """ 网页截图 """ def __init__( self ): self .webulr = ""; self .filename = ""; def CaptureGetImage( self ,weburl: str ,filename: str ): """ 网页截图 geovindu :param weburl: 网址 :param filename: 生成的图片文件名 :return: None """ try : options = webdriver.ChromeOptions() # options.headless = True options.add_argument( "--window-size=1920,1080" ) options.add_argument( "--start-maximized" ) options.add_argument( "--headless" ) # Use headless mode for running in the background options.add_argument( "--disable-gpu" ) driver = webdriver.Chrome(options = options) URL = weburl #'https://news.163.com/' today = datetime.now() imagename = today.strftime( "%Y%m%d%H%M%S" ) driver.get(URL) time.sleep( 2 ) S = lambda X: driver.execute_script( 'return document.body.parentNode.scroll' + X) driver.set_window_size(S( 'Width' ), S( 'Height' )) # May need manual adjustment driver.maximize_window() # driver.find_element_by_tag_name('body').screenshot('web_screenshot.png') fullpage = driver.find_element(By.TAG_NAME, 'body' ) # 生成了第一页 if (filename = = ''): filename = imagename fullpage.screenshot(f '{filename}.png' ) driver.quit() except Exception as e: print (e) print ( "ok" ) |
调用:
1 2 | bl = BLL.CaptureWeb.CaptureWebFull() bl.CaptureGetImage( "http://www.dusystem.com/" ,"") |
哲学管理(学)人生, 文学艺术生活, 自动(计算机学)物理(学)工作, 生物(学)化学逆境, 历史(学)测绘(学)时间, 经济(学)数学金钱(理财), 心理(学)医学情绪, 诗词美容情感, 美学建筑(学)家园, 解构建构(分析)整合学习, 智商情商(IQ、EQ)运筹(学)生存.---Geovin Du(涂聚文)
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 25岁的心里话
· 闲置电脑爆改个人服务器(超详细) #公网映射 #Vmware虚拟网络编辑器
· 基于 Docker 搭建 FRP 内网穿透开源项目(很简单哒)
· 零经验选手,Compose 一天开发一款小游戏!
· 一起来玩mcp_server_sqlite,让AI帮你做增删改查!!
2022-10-26 Python: State Pattern
2022-10-26 Python: Template Method Pattern