4.复杂操作以及特殊情况处理

selenium电脑模式和手机模式

# 指定调用某个地方的chrome
options = webdriver.ChromeOptions()
# chrome浏览器的主程序位置
location = r"F:\All_python_code\scrapy\chrome-win\chrome.exe"
# 在options里面增加读取位置为主程序位置
options.binary_location = location
driver = webdriver.Chrome("F:\All_python_code\scrapy\chromedriver.exe", options=options)

# 使用get方法打开网站
driver.get("http://www.4399dmw.com/donghua/")
time.sleep(5)

# 关闭webdriver
driver.quit()

# 根据id找到对应目标并输入内容
driver.find_element_by_id("j-input").send_keys("蜡笔")
# 找到按钮
driver.find_element_by_xpath("//button[@class='banner__btn']").click()
# 获取当前页面的地址（这里搜索打开新标签，但是没有跟进，所以还是之前搜索的url
print(driver.current_url)

#获取页面源码
print(driver.page_source)

#获取当前页面cookie
print(driver.get_cookies())

# 过2s后刷新页面
time.sleep(2)
driver.refresh()

一个注意点

这里print只能出现第一段文字，无法批量爬取，这种方式的xpath和之前获取的不太一样，建议爬取全部页面后改用BS
```
ret = driver.find_element_by_xpath("//div[@class='lst-item']/a/div/p").text
print(ret)
```

# 点击下一页（这里没有打开新的标签页，而是跳转新的url，所以print当前页面url也就是跳转后的url）
driver.find_element_by_xpath("//a[contains(text(),'下一页')]").click()
print(driver.current_url)

selenium如果程序出错无法quit会残留在进程，需要手动结束，否则会越堆越多

手机运行chrome

# 设置手机型号
mobileEmulation = {'deviceName': 'iPhone 6/7/8'}
# 使用手机浏览
options.add_experimental_option('mobileEmulation',mobileEmulation)

# 设置手机具体参数
mobileEmulation = {
        "deviceMetrics": {
            "width": 350,
            "height": 200,
            "pixelRatio": 3.0,
            "touch": False
        }
    }

# 使用静默模式，不跳出浏览器，还去操作
options.add_argument("headless")

# 加代理http https socks4 socks5
options.add_argument('--proxy-server=%s'%'socks5://127.0.0.1:9999')

#更改浏览器语言
options.add_argument("--lang=en-US")

都是加在options前面

# 选择多个页面爬取
for page in range(14):
    print("现在爬取第"+str((page)+1)+"页")
    result = driver.find_elements_by_xpath("//div[@class='lst']/a/div/p")
    for i in range(len(result)):
        print(result[i].text)
    # 点到下一页
    driver.find_element_by_xpath("//a[contains(text(),'下一页')]").click()

可以之后换成while循环对href进行判断，如果没有href就停止

# 获取包含目标元素标签的html代码
html = driver.find_element_by_xpath("//a[contains(text(),'下一页')]").get_attribute("outerHTML")
print(html)

# 获取目标的css属性
html = driver.find_element_by_xpath("//a[contains(text(),'下一页')]").value_of_css_property("background-image")
print(html)

键盘鼠标模拟人类操作入门

键盘按键包
from selenium.webdriver.common.keys import Keys
鼠标按键包
from selenium.webdriver.common.action_chains import ActionChains

# 组合键输入
driver.find_element_by_id("j-input").send_keys(Keys.CONTROL,'a')

打开尺子

切换到手机模式，点击竖着的三个点，Show rulers；电脑模式在检查元素右上角的小齿轮点开，在Preferences的Elements的Show rulers on hover打勾，在选取页面元素时就会出现

# 移动鼠标的位置
action = ActionChains(driver).move_by_offset(70,120).click()
# 开始执行
action.perform()
# 鼠标移动回来并且执行
ActionChains(driver).move_by_offset(-70,-120).perform()

# 获取登陆的位置，发现一个是link的，文字是登陆的element
log = driver.find_element_by_link_text("登陆")
# 鼠标悬停
ActionChains(driver).move_to_element(log).perform()

鼠标点击的第一种方法

# 找到logo的位置
logo = driver.find_element_by_xpath("//div[@class='banner__main']/a")
# 执行点击
ActionChains(driver).click(logo).perform()

鼠标点击的第二种方法：

# 找到logo和log的位置
logo = driver.find_element_by_xpath("//div[@class='banner__main']/a")
log = driver.find_element_by_xpath("//a[contains(text(),'登录')]")
# 执行点击，先点击logo，2s后再点击log
action = ActionChains(driver)
action.click(logo)
time.sleep(2)
action.click(log)
action.perform()

电脑的执行位置一直在原来的标签，并没有跟进到新打开的标签

posted @ 2022-04-16 18:49 icui4cu 阅读(41) 评论(0) 编辑收藏举报

刷新页面返回顶部

icui4cu

4.复杂操作以及特殊情况处理

selenium电脑模式和手机模式

键盘鼠标模拟人类操作入门

公告