python + selenium 实现快照 (保存整个网页为图片)
研究了好久,期初只能保存页面可见部分;
后来采用 js 操作才保存成功,代码如下:
from selenium import webdriver import time import os.path from selenium.webdriver.chrome.options import Options def webshot(url,saveImgName): options = webdriver.ChromeOptions() options.add_argument('--headless') options.add_argument('--disable-gpu') options.add_argument('--no-sandbox') chromedriver = r"C:\Users\Shuai\AppData\Local\Google\Chrome\Application\chromedriver.exe" driver = webdriver.Chrome(options=options,executable_path =chromedriver) driver.maximize_window() # 返回网页的高度的js代码 js_height = "return document.body.clientHeight" picname = saveImgName link = url # driver.get(link) try: driver.get(link) k = 1 height = driver.execute_script(js_height) while True: if k * 500 < height: js_move = "window.scrollTo(0,{})".format(k * 500) print(js_move) driver.execute_script(js_move) time.sleep(0.2) height = driver.execute_script(js_height) k += 1 else: break scroll_width = driver.execute_script('return document.body.parentNode.scrollWidth') scroll_height = driver.execute_script('return document.body.parentNode.scrollHeight') driver.set_window_size(scroll_width, scroll_height) driver.get_screenshot_as_file(picname + ".png") print("Process {} get one pic !!!".format(os.getpid())) time.sleep(0.1) except Exception as e: print(picname, e) if __name__ == '__main__': t = time.time()
# 两个参数,前面url,后面保存地址 webshot('http://ybj.fujian.gov.cn/zfxxgkzl/zfxxgkml/zcwj/202006/t20200611_5300786.htm','F:\\tstImg1') print("操作结束,耗时:{:.2f}秒".format(float(time.time() - t)))
以上代码就实现了
这个是,判断文件夹是否存在,不存在创建
def get_dir(): '''判断文件夹是否存在,如果不存在就创建一个''' filename = "../pics" if not os.path.isdir(filename): os.makedirs(filename) return filename