selenium

把解压的浏览器驱动 chromderiver 放在python 解释器所在的文件夹

程序运行一下，看到下面的这个文件夹就是

原理：模拟人的一些操作，动态获取网页内容，所见即所得

打开网址：

web=Chrome()
web.get("https://www.lagou.com/")    #打开一个网址
print(web.title)  #拿标题

获取网页实时代码（不是网页源代码）

web.page_source

点击事件：

el=web.find_element_by_xpath('//*[@id="changeCityBox"]/p[1]/a') #copy xpath
el.click()#点击事件

输入文本 & 模拟按键：

from selenium.webdriver.common.keys import Keys  #输入按钮


web.find_element_by_xpath('//*[@id="search_input"]').send_keys('python',Keys.ENTER)

根据标签名拿文本

li.find_element_by_tag_name('h3').text

xpath拿文本：

li.find_element_by_xpath('./div[1]/div[2]/div[1]/a').text  #直接拿a标签下面的文本

切换浏览器标签页：

假设浏览器点击了一个按钮，另外打开了一个网页，要把目标移到新网页
web.switch_to.window(web.window_handles[-1])
#window_handles 对应选项卡  【-1】最后一个
#然后就到了新窗口，进行提取内容

web.close()  #关掉新打开的窗口

web.switch_to.window(web.window_handles[0]) 
#返回原来的窗口第0个

切换页面视角（遇到iframe如何处理）

iframe=web.find_element_by_xpath('//*[@id="playleft"]/iframe')
web.switch_to.frame(iframe)
#一些操作
web.switch_to.default_content()  #回到原页面

处理下拉列表select

from selenium.webdriver.support.select import Select

selectt=web.find_element_by_xpath('//*[@id="Area"]')   #定位到select 节点


sel=Select(selectt)
#对元素进行包装，包装成下拉菜单

sel.select_by_index(i)#根据索引位置进行选择
sel.select_by_value(f'{i}')#根据value值
sel.select_by_visible_text()#根据所建立的文本进行选择 <option>文本</option>


举例：
gnum=[50,37,1,25,30]
for i in gnum:  #循环列表的索引 i
    print(i)
    i=' '+str(i)+' '
    #sel.select_by_index(i)#根据索引位置进行选择
    sel.select_by_value(f'{i}')#根据value值
#这时就选到了value值为xx的那个选项，页面刷新，就可以获得数据

无头浏览器（不显示过程，在后台运行）

from selenium.webdriver.chrome.options import Options 

opt=Options()
opt.add_argument('--headless')  #无头
opt.add_argument('--disable-gpu')  #不显示
#web=Chrome()
web=Chrome(options=opt)

应对自动化检测（chrome的版本大于等于88）

option = Options()
# option.add_experimental_option('excludeSwitches', ['enable-automation'])
option.add_argument('--disable-blink-features=AutomationControlled')
web=Chrome(options=option)

实现浏览器下拉

js = 'window.scrollTo(0,%s)'   %(i*800)
# x管水平，y管垂直
web.execute_script(js)
time.sleep(0.5)

带上cookie，模拟登录状态

 1 from selenium.webdriver import Chrome
 2 import json
 3 
 4 #第一步，自动打开网页后先手动登录一次，并且获取cookies
 5 web=Chrome()
 6 
 7 web.get("https://baidu.com/")  
 8 
 9 time.sleep(20)
10 print('还有5秒就开始记录cookie')
11 time.sleep(5)
12 with open('txt/cookies.txt','w') as f:
13     f.write(json.dumps(web.get_cookies()))
14 
15 #第二步，带着cookie登录
16 
17 with open('txt/cookies.txt', 'r', encoding='utf8') as f:
18    listCookies = json.loads(f.read())
19 
20 
21 for cookie in listCookies:
22   web.add_cookie(cookie)
23 
24 web.get('https://.*')
25  # 读取完cookie刷新页面
26 web.refresh()

View Code

拖拽滑块

from selenium.webdriver.common.action_chains import ActionChains

btn = web.find_element_by_xpath('//*[@id="nc_1_n1z"]')
ActionChains(web).drag_and_drop_by_offset(btn, 300, 0).perform()

过验证码