python: Capture Full Webpage Screenshots with Selenium
browser = webdriver.Firefox()
edge=webdriver.Edge()
safari=webdriver.Safari()
chrome=webdriver.Chrome()
edge=webdriver.Edge()
safari=webdriver.Safari()
chrome=webdriver.Chrome()
https://pypi.org/project/selenium/
案例1:
# encoding: utf-8 # 版权所有 2023 ©涂聚文有限公司 # 许可信息查看: # 描述:堆区Heap Area 栈区 Stack Area # Author : geovindu,Geovin Du 涂聚文. # IDE : PyCharm 2023.1 python 311 # Datetime : 2023/10/26 8:54 # User : geovindu # python.exe -m pip install --upgrade pip # pip install Pillow # pip install selenium # Product : PyCharm # Project : EssentialAlgorithms # File : LongScreenShot.py # explain : 学习 import time from PIL import Image from selenium import webdriver from selenium.webdriver.chrome.options import Options from selenium.webdriver.common.by import By import os from datetime import datetime class LongScreenShot(object): """ 根据网址长截图 "C:\Program Files\Google\Chrome\Application\chrome.exe" """ def __init__(self): self.url="" self.fielname="" def screenShot(self,weburl:str,filename:str): """ 网页全页面截图 有效 :param weburl:网址 :param filename:生成图片的文件名 这里生成时间产生的 :return: """ try: today = datetime.now() image_name = today.strftime("%Y%m%d%H%M%S") print(image_name) # Set the path where the screenshot will be saved print(os.path.abspath(__file__)) path = os.path.dirname(os.path.abspath(__file__)) # Configure Chrome WebDriver options options = Options() options.add_argument("--window-size=1920,1080") options.add_argument("--start-maximized") options.add_argument("--headless") # Use headless mode for running in the background options.add_argument("--disable-gpu") # Initialize the Chrome WebDriver driver = webdriver.Chrome(options=options) driver.maximize_window() # Navigate to the URL you want to capture driver.get(weburl) # Wait for the page to load (you can adjust the sleep time as needed) time.sleep(1) # Use JavaScript to get the full width and height of the webpage width = driver.execute_script( "return Math.max( document.body.scrollWidth, document.body.offsetWidth, document.documentElement.clientWidth, document.documentElement.scrollWidth, document.documentElement.offsetWidth );") height = driver.execute_script( "return Math.max( document.body.scrollHeight, document.body.offsetHeight, document.documentElement.clientHeight, document.documentElement.scrollHeight, document.documentElement.offsetHeight );") # Set the window size to match the entire webpage driver.set_window_size(width, height) # Find the full page element (usually 'body') and capture the screenshot full_page = driver.find_element(By.TAG_NAME, "body") # image_name="geovidu" full_page.screenshot(f"{image_name}.png") # Close the browser window driver.quit() except Exception as e: print(e)
调用:
lscreen=BLL.LongScreenShot.LongScreenShot() lscreen.screenShot("https://www.csdn.net/",r"result.png") #ok
案列2:
from selenium import webdriver from selenium.webdriver.chrome.options import Options from selenium.webdriver.common.by import By import time import sys import random def print_hi(name): # Use a breakpoint in the code line below to debug your script. print(f'Hi, {name}') # Press Ctrl+F8 to toggle the breakpoint. # Press the green button in the gutter to run the script. if __name__ == '__main__': print_hi('PyCharm,geovindu') #takeScreenshot("https://www.csdn.net/") options = webdriver.ChromeOptions() #options.headless = True options.add_argument("--window-size=1920,1080") options.add_argument("--start-maximized") options.add_argument("--headless") # Use headless mode for running in the background options.add_argument("--disable-gpu") driver = webdriver.Chrome(options=options) URL = 'https://www.csdn.net/' driver.get(URL) time.sleep(1) S = lambda X: driver.execute_script('return document.body.parentNode.scroll' + X) driver.set_window_size(S('Width'), S('Height')) # May need manual adjustment driver.maximize_window() #driver.find_element_by_tag_name('body').screenshot('web_screenshot.png') fullpage=driver.find_element(By.TAG_NAME,'body') #生成了第一页 fullpage.screenshot('geovindu.png') driver.quit()
封装类:
# encoding: utf-8 # 版权所有 2023 ©涂聚文有限公司 # 许可信息查看:网页截图 # 描述: # Author : geovindu,Geovin Du 涂聚文. # IDE : PyCharm 2023.1 python 311 # Datetime : 2023/10/26 14:36 # User : geovindu # Product : PyCharm # Project : pythonWebScreenShot # File : CaptureWeb.py # explain : 学习 # python.exe -m pip install --upgrade pip # pip install Pillow # pip install selenium # pip install pywin32 # pip install PyPDF2 from selenium import webdriver from selenium.webdriver.chrome.options import Options from selenium.webdriver.common.by import By import time from datetime import datetime import sys import random class CaptureWebFull(object): """ 网页截图 """ def __init__(self): self.webulr=""; self.filename=""; def CaptureGetImage(self,weburl:str,filename:str): """ 网页截图 geovindu :param weburl: 网址 :param filename: 生成的图片文件名 :return: None """ try: options = webdriver.ChromeOptions() # options.headless = True options.add_argument("--window-size=1920,1080") options.add_argument("--start-maximized") options.add_argument("--headless") # Use headless mode for running in the background options.add_argument("--disable-gpu") driver = webdriver.Chrome(options=options) URL = weburl #'https://news.163.com/' today = datetime.now() imagename = today.strftime("%Y%m%d%H%M%S") driver.get(URL) time.sleep(2) S = lambda X: driver.execute_script('return document.body.parentNode.scroll' + X) driver.set_window_size(S('Width'), S('Height')) # May need manual adjustment driver.maximize_window() # driver.find_element_by_tag_name('body').screenshot('web_screenshot.png') fullpage = driver.find_element(By.TAG_NAME, 'body') # 生成了第一页 if(filename==''): filename = imagename fullpage.screenshot(f'{filename}.png') driver.quit() except Exception as e: print(e) print("ok")
调用:
bl=BLL.CaptureWeb.CaptureWebFull() bl.CaptureGetImage("http://www.dusystem.com/","")
哲学管理(学)人生, 文学艺术生活, 自动(计算机学)物理(学)工作, 生物(学)化学逆境, 历史(学)测绘(学)时间, 经济(学)数学金钱(理财), 心理(学)医学情绪, 诗词美容情感, 美学建筑(学)家园, 解构建构(分析)整合学习, 智商情商(IQ、EQ)运筹(学)生存.---Geovin Du(涂聚文)