selenium框架的使用
selenium是浏览器自动化测试框架,使用前可直接pip install selenium安装
需要选择与当前使用浏览器匹配的驱动下载调用。本次采用Chrome 74版,Chrome驱动下载地址:http://chromedriver.storage.googleapis.com/index.html
使用小案例:
from selenium import webdriver
from time import sleep
bower=webdriver.Chrome(executable_path=r'C:\Users\asaxh\Desktop\chromedriver.exe')
bower.get(url='https://www.hao123.com/') #指定导航栏URL
sleep(2)
text_input=bower.find_element_by_name('word')
text_input.send_keys('胡歌') #要搜素的内容
sleep(2)
bower.find_element_by_class_name('submitInput').click() #点击搜素
sleep(3)
bower.quit() #关闭浏览器并退出
使用普通Chrome有界面浏览器,执行鼠标拖动事件:
from selenium import webdriver
from time import sleep
url='https://www.hao123.com/'
bower=webdriver.Chrome(executable_path=r'C:\Users\asaxh\Desktop\chromedriver.exe')
bower.get(url)
sleep(5)
bower.execute_script('window.scrollTo(0,document.body.scrollHeight)')
sleep(5)
bower.execute_script('window.scrollTo(0,document.body.scrollHeight)')
sleep(5)
bower.execute_script('window.scrollTo(0,document.body.scrollHeight)')
sleep(5)
bower.execute_script('window.scrollTo(0,document.body.scrollHeight)')
sleep(5)
bower.execute_script('window.scrollTo(0,document.body.scrollHeight)')
sleep(5)
bower.execute_script('window.scrollTo(0,document.body.scrollHeight)')
sleep(5)
page_html=bower.page_source
with open('./hao123.html','w',encoding='utf-8')as f:
f.write(page_html)
sleep(3)
bower.quit()
推荐使用Chrome的无界面浏览器:
from selenium.webdriver.chrome.options import Options
from selenium import webdriver
from time import sleep
chrome_options=Options()
chrome_options.add_argument('--headless')
chrome_options.add_argument('--disable-gpu') #关闭浏览器界面
url='https://www.hao123.com/'
bower=webdriver.Chrome(executable_path=r'C:\Users\asaxh\Desktop\chromedriver.exe',chrome_options=chrome_options)
bower.get(url)
sleep(5)
bower.execute_script('window.scrollTo(0,document.body.scrollHeight)')
sleep(5)
bower.execute_script('window.scrollTo(0,document.body.scrollHeight)')
sleep(5)
bower.execute_script('window.scrollTo(0,document.body.scrollHeight)')
sleep(5)
bower.execute_script('window.scrollTo(0,document.body.scrollHeight)')
sleep(5)
bower.execute_script('window.scrollTo(0,document.body.scrollHeight)')
sleep(5)
bower.execute_script('window.scrollTo(0,document.body.scrollHeight)')
sleep(5)
page_html=bower.page_source
with open('./hao123.html','w',encoding='utf-8')as f:
f.write(page_html)
sleep(3)
print('ok')
bower.quit()
实现QQ的模拟登录:
from selenium import webdriver
from time import sleep
from lxml import etree
web=webdriver.Chrome(executable_path=r'C:\Users\asaxh\Desktop\chromedriver.exe')
url='https://qzone.qq.com/'
web.get(url)
sleep(2)
web.switch_to.frame('login_frame') #空间采取的是iframe控件式页面 要先找到控件
web.find_element_by_id('switcher_plogin').click()
sleep(3)
web.find_element_by_id('u').send_keys("1159402553")
web.find_element_by_id('p').send_keys('1159402553')
web.find_element_by_id('login_button').click()
sleep(3) #延迟3秒确保登录后的信息全部加载完成
q_page=web.page_source
tree=etree.HTML(q_page)
src_img=tree.xpath('//ul[@id="feed_friend_list"]/li//div[@class="user-pto"]/a/img/@src')
for src in src_img:
with open('./q.txt','a',encoding='utf-8')as f:
f.write(src+'\n') #简单的获取控件好友图像链接
#
# with open('./q.html','w',encoding='utf-8')as f:
# f.write(q_page) #个人主页持久化存储
sleep(2)
web.quit()