python: Capture Full Webpage Screenshots with Selenium
browser = webdriver.Firefox()
# encoding: utf-8 # 版权所有 2023 ©涂聚文有限公司 # 许可信息查看: # 描述:堆区Heap Area 栈区 Stack Area # Author : geovindu,Geovin Du 涂聚文. # IDE : PyCharm 2023.1 python 311 # Datetime : 2023/10/26 8:54 # User : geovindu # python.exe -m pip install --upgrade pip # pip install Pillow # pip install selenium # Product : PyCharm # Project : EssentialAlgorithms # File : # explain : 学习 import time from PIL import Image from selenium import webdriver from import Options from import By import os from datetime import datetime class LongScreenShot(object): """ 根据网址长截图 "C:\Program Files\Google\Chrome\Application\chrome.exe" """ def __init__(self): self.url="" self.fielname="" def screenShot(self,weburl:str,filename:str): """ 网页全页面截图 有效 :param weburl:网址 :param filename:生成图片的文件名 这里生成时间产生的 :return: """ try: today = image_name = today.strftime("%Y%m%d%H%M%S") print(image_name) # Set the path where the screenshot will be saved print(os.path.abspath(__file__)) path = os.path.dirname(os.path.abspath(__file__)) # Configure Chrome WebDriver options options = Options() options.add_argument("--window-size=1920,1080") options.add_argument("--start-maximized") options.add_argument("--headless") # Use headless mode for running in the background options.add_argument("--disable-gpu") # Initialize the Chrome WebDriver driver = webdriver.Chrome(options=options) driver.maximize_window() # Navigate to the URL you want to capture driver.get(weburl) # Wait for the page to load (you can adjust the sleep time as needed) time.sleep(1) # Use JavaScript to get the full width and height of the webpage width = driver.execute_script( "return Math.max( document.body.scrollWidth, document.body.offsetWidth, document.documentElement.clientWidth, document.documentElement.scrollWidth, document.documentElement.offsetWidth );") height = driver.execute_script( "return Math.max( document.body.scrollHeight, document.body.offsetHeight, document.documentElement.clientHeight, document.documentElement.scrollHeight, document.documentElement.offsetHeight );") # Set the window size to match the entire webpage driver.set_window_size(width, height) # Find the full page element (usually 'body') and capture the screenshot full_page = driver.find_element(By.TAG_NAME, "body") # image_name="geovidu" full_page.screenshot(f"{image_name}.png") # Close the browser window driver.quit() except Exception as e: print(e)
lscreen=BLL.LongScreenShot.LongScreenShot() lscreen.screenShot("",r"result.png") #ok
from selenium import webdriver from import Options from import By import time import sys import random def print_hi(name): # Use a breakpoint in the code line below to debug your script. print(f'Hi, {name}') # Press Ctrl+F8 to toggle the breakpoint. # Press the green button in the gutter to run the script. if __name__ == '__main__': print_hi('PyCharm,geovindu') #takeScreenshot("") options = webdriver.ChromeOptions() #options.headless = True options.add_argument("--window-size=1920,1080") options.add_argument("--start-maximized") options.add_argument("--headless") # Use headless mode for running in the background options.add_argument("--disable-gpu") driver = webdriver.Chrome(options=options) URL = '' driver.get(URL) time.sleep(1) S = lambda X: driver.execute_script('return document.body.parentNode.scroll' + X) driver.set_window_size(S('Width'), S('Height')) # May need manual adjustment driver.maximize_window() #driver.find_element_by_tag_name('body').screenshot('web_screenshot.png') fullpage=driver.find_element(By.TAG_NAME,'body') #生成了第一页 fullpage.screenshot('geovindu.png') driver.quit()
# encoding: utf-8 # 版权所有 2023 ©涂聚文有限公司 # 许可信息查看:网页截图 # 描述: # Author : geovindu,Geovin Du 涂聚文. # IDE : PyCharm 2023.1 python 311 # Datetime : 2023/10/26 14:36 # User : geovindu # Product : PyCharm # Project : pythonWebScreenShot # File : # explain : 学习 # python.exe -m pip install --upgrade pip # pip install Pillow # pip install selenium # pip install pywin32 # pip install PyPDF2 from selenium import webdriver from import Options from import By import time from datetime import datetime import sys import random class CaptureWebFull(object): """ 网页截图 """ def __init__(self): self.webulr=""; self.filename=""; def CaptureGetImage(self,weburl:str,filename:str): """ 网页截图 geovindu :param weburl: 网址 :param filename: 生成的图片文件名 :return: None """ try: options = webdriver.ChromeOptions() # options.headless = True options.add_argument("--window-size=1920,1080") options.add_argument("--start-maximized") options.add_argument("--headless") # Use headless mode for running in the background options.add_argument("--disable-gpu") driver = webdriver.Chrome(options=options) URL = weburl #'' today = imagename = today.strftime("%Y%m%d%H%M%S") driver.get(URL) time.sleep(2) S = lambda X: driver.execute_script('return document.body.parentNode.scroll' + X) driver.set_window_size(S('Width'), S('Height')) # May need manual adjustment driver.maximize_window() # driver.find_element_by_tag_name('body').screenshot('web_screenshot.png') fullpage = driver.find_element(By.TAG_NAME, 'body') # 生成了第一页 if(filename==''): filename = imagename fullpage.screenshot(f'{filename}.png') driver.quit() except Exception as e: print(e) print("ok")
bl=BLL.CaptureWeb.CaptureWebFull() bl.CaptureGetImage("","")
