一、selenium操作谷歌浏览器
1 from selenium import webdriver 2 import time 3 4 '''模拟创建一个浏览器对象,通过对象操作浏览器''' 5 browser = webdriver.Chrome() 6 print(browser) 7 8 # path = r'D:\googlediver\chromedriver.exe' 9 # browser = webdriver.Chrome(executable_path= path) 10 11 url = 'https://www.baidu.com/' 12 browser.get(url) 13 time.sleep(3) 14 15 '''查找输入框''' 16 input = browser.find_element_by_id('kw') 17 '''输入文字''' 18 input.send_keys('菊花') 19 '''查找搜索按钮,并点击''' 20 button = browser.find_element_by_id('su') 21 button.click() 22 time.sleep(3) 23 '''找到指定图片点击''' 24 img = browser.find_element_by_class_name('op-img-address-link-imgs') 25 img.click() 26 time.sleep(5) 27 28 '''退出浏览器''' 29 # browser.quit()
二、selenium操作phantomjs
1 from selenium import webdriver 2 import time 3 4 '''创建浏览器对象,通过对象操作浏览器''' 5 browser = webdriver.PhantomJS() 6 time.sleep(3) 7 8 '''打开百度''' 9 # url = 'https://www.baidu.com/' 10 # browser.get(url) 11 # time.sleep(3) 12 13 '''截图''' 14 # browser.save_screenshot(r'image/baidu.png') 15 # time.sleep(2) 16 17 '''查找输入框''' 18 # input = browser.find_element_by_id('kw') 19 '''输入文字''' 20 # input.send_keys('菊花') 21 # browser.save_screenshot(r'image/ju.png') 22 # time.sleep(2) 23 '''查找搜索按钮,并点击''' 24 # button = browser.find_element_by_id('su') 25 # button.click() 26 # time.sleep(3) 27 # browser.save_screenshot(r'image/hua.png')
三、phantomjs下拉滚动条
from selenium import webdriver import time '''创建浏览器对象,通过对象操作浏览器''' browser = webdriver.PhantomJS() time.sleep(3) url = 'https://dig.chouti.com/all/man/' browser.get(url) #打开抽屉 time.sleep(3) browser.save_screenshot(r'image/chouti.png') #截图 #下拉滚动条 js = 'document.body.scrollTop=10000' browser.execute_script(js) time.sleep(3) browser.save_screenshot(r'image/chouti2.png') #截图 '''获取网页代码,保存到文件''' html = browser.page_source with open(r'image/chouti.html','w',encoding='utf8') as fp: fp.write(html)
四、例子-下拉式动态加载
from selenium import webdriver import time '''创建浏览器对象''' browser = webdriver.PhantomJS() time.sleep(3) #建筑图片 url = 'http://sc.chinaz.com/tupian/tesejianzhutupian.html' '''打开网页,保存代码''' browser.get(url) #打开网页 time.sleep(3) with open(r'html/jianzhu1.html','w',encoding='utf8') as fp: fp.write(browser.page_source) '''下拉滚动条,再次保存代码''' js = 'document.body.scrollTop=10000' browser.execute_script(js) time.sleep(3) with open(r'html/jianzhu2.html','w',encoding='utf8') as fp: fp.write(browser.page_source) '''由于页面动态加载,所以下拉滚动条前后保存的代码是不一样的'''
五、例子-点击加载更多
from selenium import webdriver import time '''创建浏览器对象''' browser = webdriver.PhantomJS() time.sleep(3) #豆瓣经典电影 url = 'https://movie.douban.com/explore#!type=movie&tag=%E7%BB%8F%E5%85%B8&sort=recommend&page_limit=20&page_start=20' '''打开网页,保存代码''' browser.get(url) #打开网页 time.sleep(3) with open(r'html/dianying1.html','w',encoding='utf8') as fp: fp.write(browser.page_source) '''点击加载更多,再次保存代码''' show_more = browser.find_element_by_class_name('more') #查找“加载更多”按钮 show_more.click() #点击 time.sleep(3) with open(r'html/dianying2.html','w',encoding='utf8') as fp: fp.write(browser.page_source)
六、headless chrome的使用
from selenium import webdriver from selenium.webdriver.chrome.options import Options import time '''实例化一个参数对象,使得浏览器以无界面模式打开''' chrome_options = Options() chrome_options.add_argument('--headless') chrome_options.add_argument('--disable-gpu') '''谷歌驱动路径 ''' path = 'D:\googlediver\chromedriver.exe' '''创建浏览器对象''' browser = webdriver.Chrome(executable_path=path,chrome_options=chrome_options) '''访问网页''' url = 'http://www.baidu.com/' browser.get(url) time.sleep(3) browser.save_screenshot('image/wu.png') browser.quit()