Python基于Selenium实现自动打开百度,京东

chromedriver与chrome的的对应版整理：

这个链接比较新一点

chromedriver版本 chrome版本
v2.9 v31-v34
v2.10 v33-v36
v2.11 v36-v40
v2.12 v38-v41
v2.14 v39-v42
v2.15 v40-v43
v2.16 v42-v45
v2.17 v42-v43
v2.18 v43-v46
v2.19 v43-v47
v2.20 v43-v48
v2.21 v46-v50
v2.22 v49-v52
v2.23 v51-v53
v2.24 v52-v54
v2.25 v53-v55
v2.26 v53-v55
v2.27 v54-v56
v2.28 v55-v57
v2.29 v56-v58
v2.30 v58-v60
v2.31 v58-v60
v2.32 v59-v61
v2.33 v60-v62
v2.34 v61-v63
v2.35 v62-v64
v2.36 v63-v65
v2.37 v63-v66
v2.38 v65-v67
v2.39 v66-v68
v2.40 v66-v68
chromedriver驱动下载地址：

http://chromedriver.storage.googleapis.com/index.html

同时提供geckodriver和iedriverserver的下载地址：

geckodriver：

https://github.com/mozilla/geckodriver/releases

iedriverserver：

http://selenium-release.storage.googleapis.com/index.html

msedgedriver:

Microsoft Edge WebDriver - Microsoft Edge Developer

注意：

firefox56以下不需要安装webdriver驱动，firefox57（firefox47及以上版本）需要安装geckodriver-v0.19.1-win32/64。

IEDriverServer的版本号和Selenium的版本号一定要一致

使用selenium去调用浏览器，需要一个驱动，浏览器的webdriver需要独立安装，
如果是chrome在浏览器输入框输入chrome://version/ 查看相应版本，

http://npm.taobao.org/mirrors/chromedriver/下载相应驱动即可

百度

# _*_ coding:utf-8 _*_
import time
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By

# 创建浏览器驱动对象
# 这行代码会打开一个空白的浏览器
driver = webdriver.Chrome()

# 访问到项目所在的网址
driver.get("https://www.baidu.com")
driver.maximize_window()   # 最大化浏览器
time.sleep(1)

# 找到页面上的搜索输入框 id="kw"
# 找到元素以后，可以直接赋值给变量，再通过变量操作元素
# ele = driver.find_element_by_id("kw")  # 旧版代码，新版用下面代码
ele = driver.find_element(By.ID, value='kw')
ele.send_keys("韦神")  # 对文本输入框输入内容

# 也可以直接操作元素, 百度一下 按钮的 id="su"
driver.find_element(By.ID, value="su").click()

# # 但不能操作元素后再赋值给变量
# ele = driver.find_element_by_id("su").click()
# ele.click()  # 相当于 None.click()

# driver.quit()

京东：

# _*_ coding:gbk _*_

# 爬取 https://www.jd.com/ 京东图书
import csv
import time

from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By


# 创建浏览器驱动对象
# 这行代码会打开一个空白的浏览器
driver = webdriver.Chrome()


# 京东所在网站
# 访问到项目所在的网址
driver.get("https://www.jd.com")



# 输入需要查找的关键字
p_input = driver.find_element(By.ID, 'key')
p_input.send_keys('python编程')  # 找到输入框输入
time.sleep(1)

# 点击搜素按钮
button=driver.find_element(By.CLASS_NAME,"button").click()
time.sleep(1)

all_book_info = []
num=200
head=['书名', '价格']
#csv文件的路径和名字
path='./book.csv'
def write_csv(head,all_book_info,path):
    with open(path, 'w', newline='',encoding='gbk') as file:   # utf-8 乱码建议用  gbk
        fileWriter = csv.writer(file)
        fileWriter.writerow(head)
        fileWriter.writerows(all_book_info)
# 爬取一页
def get_onePage_info(num):
    driver.execute_script('window.scrollTo(0, document.body.scrollHeight);')
    time.sleep(2)
    # 书籍列表
    J_goodsList = driver.find_element(By.ID, "J_goodsList")
    listbook = J_goodsList.find_elements(By.TAG_NAME, "li")
    for res in listbook:
        num = num-1
        book_info = []
        name =res.find_element(By.CLASS_NAME, "p-name").find_element(By.TAG_NAME, "em").text
        price = res.find_element(By.CLASS_NAME, "p-price").find_element(By.TAG_NAME, "i").text
        book_info.append(name)
        book_info.append(price)
        # bookdetail = res.find_element(By.CLASS_NAME, "p-bookdetails")
        # author = bookdetail.find_element(By.CLASS_NAME, "p-bi-name").find_element(By.TAG_NAME, "a").text
        # store = bookdetail.find_element(By.CLASS_NAME, "p-bi-store").find_element(By.TAG_NAME, "a").text
        # book_info.append(author)
        # book_info.append(store)
        all_book_info.append(book_info)
        if num==0:
            break
    return num

while num!=0:
    num = get_onePage_info(num)
    driver.find_element(By.CLASS_NAME, 'pn-next').click()  # 点击下一页
    time.sleep(2)
write_csv(head, all_book_info, path)
# driver.close()

posted on 2022-05-13 15:54 Carl_99 阅读(499) 评论(0) 编辑收藏举报

刷新页面返回顶部