python + selenium 实现快照 (保存整个网页为图片)

研究了好久,期初只能保存页面可见部分;  

后来采用 js 操作才保存成功,代码如下:

from selenium import webdriver
import time
import os.path
from selenium.webdriver.chrome.options import Options

def webshot(url,saveImgName):
    options = webdriver.ChromeOptions()
    options.add_argument('--headless')
    options.add_argument('--disable-gpu')
    options.add_argument('--no-sandbox')
    chromedriver = r"C:\Users\Shuai\AppData\Local\Google\Chrome\Application\chromedriver.exe"
    driver = webdriver.Chrome(options=options,executable_path =chromedriver)
    driver.maximize_window()
    # 返回网页的高度的js代码
    js_height = "return document.body.clientHeight"
    picname = saveImgName
    link = url 
    # driver.get(link)
    try:
        driver.get(link)
        k = 1
        height = driver.execute_script(js_height)
        while True:
            if k * 500 < height:
                js_move = "window.scrollTo(0,{})".format(k * 500)
                print(js_move)
                driver.execute_script(js_move)
                time.sleep(0.2)
                height = driver.execute_script(js_height)
                k += 1
            else:
                break
        scroll_width = driver.execute_script('return document.body.parentNode.scrollWidth')
        scroll_height = driver.execute_script('return document.body.parentNode.scrollHeight')
        driver.set_window_size(scroll_width, scroll_height)
        driver.get_screenshot_as_file(picname + ".png")
        
        print("Process {} get one pic !!!".format(os.getpid()))
        time.sleep(0.1)
    except Exception as e:
        print(picname, e)
 
 
if __name__ == '__main__':
    t = time.time()           
# 两个参数,前面url,后面保存地址 webshot(
'http://ybj.fujian.gov.cn/zfxxgkzl/zfxxgkml/zcwj/202006/t20200611_5300786.htm','F:\\tstImg1') print("操作结束,耗时:{:.2f}秒".format(float(time.time() - t)))

以上代码就实现了

 

这个是,判断文件夹是否存在,不存在创建

def get_dir():
    '''判断文件夹是否存在,如果不存在就创建一个'''
    filename = "../pics"
    if not os.path.isdir(filename):
        os.makedirs(filename)
    return filename

 

posted @ 2020-06-22 15:07  岳帅超  阅读(5015)  评论(0编辑  收藏  举报