04Selenium剩余部分及练习:爬取京东商品信息

昨日回顾

一、爬取豆瓣电影top250

1.爬取电影页

2.解析提取电影信息

3.保存数据

二、Selenium请求库

驱动浏览器往目标网站发送请求,获取响应数据

-不需要分析复杂的通信流程

-执行js代码

-获取动态数据

三、Selenium使用

driver = webdriver.Chrome()

隐式等待

driver.get('网站') 往某个网站发送请求

显式等待

driver.close()

四、选择器

element:查找一个

elements:查找多个

by_id

by_class_name

by_name

by_link_text

by_partial_link_text

by_css_selector

今日内容

一、Selenium剩余部分

1.元素交互操作

1.1 点击,清除

click

clear

示例:

from selenium import webdriver
from selenium.webdriver.common.by import By #按照什么方式查找,By.Id,By.CSS_SELECTOR
from selenium.webdriver.common.keys import Keys #键盘按键操作
from selenium.webdriver.support import expected_conditions as EC #和下面WebDriverWait一起用的
from selenium.webdriver.support.wait import WebDriverWait #等待页面加载某些元素

import time

driver = webdriver.Chrome(r'C:\Program Files (x86)\chromedriver.exe')

try:
    driver.implicitly_wait(10)
    driver.get("https://www.jd.com/")
    time.sleep(5)

    #点击、清除
    input = driver.find_element_by_id('key')
    input.send_keys('围城')

    search = driver.find_element_by_class_name('button')
    search.click()

    time.sleep(3)

    input2 = driver.find_element_by_id('key')
    input2.clear()
    time.sleep(1)
    input2.send_keys('墨菲定律')
    input2.send_keys(Keys.ENTER)
    time.sleep(10)
finally:
    driver.close()
1.2 Action Chains

是一个动作链对象,

from selenium import webdriver
from selenium.webdriver import ActionChains
from selenium.webdriver.common.by import By #按照什么方式查找,By.Id,By.CSS_SELECTOR
from selenium.webdriver.common.keys import Keys #键盘按键操作
from selenium.webdriver.support import expected_conditions as EC #和下面WebDriverWait一起用的
from selenium.webdriver.support.wait import WebDriverWait #等待页面加载某些元素

import time

driver = webdriver.Chrome(r'C:\Program Files (x86)\chromedriver.exe')

try:
    driver.implicitly_wait(10)
    driver.get('http://www.runoob.com/try/try.php?filename=jqueryui-api-droppable')
    time.sleep(5)

    driver.switch_to.frame('iframeResult')
    time.sleep(1)
    #获取动作链对象
    action = ActionChains(driver)
    source = driver.find_element_by_id('draggable')
    target = driver.find_element_by_id('droppable')

    #方式一
    #秒移
    # action.drag_and_drop(source, target).perform() #拟定好一个动作。需要调用执行方法.perform

    # 方式二
    # 一点点移动
    #找到滑动距离

    print(source.tag_name)
    print(source.text)
    print(source.size)

    print(target.location)
    print(source.location)

    x=target.location['x']-source.location['x']

    #按住div,不同动作不能公用一个ActionChains
    ActionChains(driver).click_and_hold(source).perform()

    s = 0
    while s < x:
        #每一次都要调用一次ActionChains
        ActionChains(driver).move_by_offset(xoffset=2, yoffset=0).perform()
        s += 2

        time.sleep(0.1)
    #放下div
    ActionChains(driver).release(source).perform()
    time.sleep(10)

finally:
    driver.close()
1.3 fram切换
driver.switch_to.frame(frame 的id名)
1.4 执行js代码
driver.get("https://www.baidu.com/")

driver.execute_script(
    '''
            alert("你好")

    '''
)
time.sleep(5)
1.5 其他
#模拟浏览器的前进后退
import time
from selenium import webdriver

browser=webdriver.Chrome()
browser.get('https://www.baidu.com')
browser.get('https://www.taobao.com')
browser.get('http://www.sina.com.cn/')

#后退
browser.back()
time.sleep(10)
#前进
browser.forward()
browser.close()

二、练习:爬取京东商品信息

简单版本

from selenium import webdriver
from selenium.webdriver.common.keys import Keys #键盘按键操作

driver = webdriver.Chrome(r'C:\Program Files (x86)\chromedriver.exe')
try:
    driver.implicitly_wait(10)

    driver.get('https://www.jd.com/')

    # 往京东主页输入墨菲定律
    input_tag = driver.find_element_by_id('key')
    input_tag.send_keys('墨菲定律')
    input_tag.send_keys(Keys.ENTER)

    time.sleep(2)

    goods = driver.find_elements_by_class_name('gl-item')
    for good in goods:
        # print(good)
        #商品名称
        name = good.find_element_by_css_selector('.p-name em').text
        # print(name)

        #商品价格
        price = good.find_element_by_class_name('p-price').text

        #商品链接
        url = good.find_element_by_css_selector('.p-name a').get_attribute('href')

        #商品评价
        commit = good.find_element_by_class_name('p-commit').text

        good_content = f'''
        商品名称:{name}
        商品价格:{price}
        商品链接:{url}
        商品评价:{commit}
        \n
        '''
        print(good_content)

        with open('jd.txt','a',encoding='utf-8') as f:
            f.write(good_content)

    print("写入成功")

finally:
    driver.close()

改良版本

(加入了自动下拉加载商品与点击下一页):

from selenium import webdriver
from selenium.webdriver.common.keys import Keys #键盘按键操作



def get_goods(driver):
    num = 1
    js_code = '''
    window.scrollTo(0,5000)
    '''
    driver.execute_script(js_code)
    try:
        goods = driver.find_elements_by_class_name('gl-item')

        for good in goods:
            # print(good)
            # 商品名称
            name = good.find_element_by_css_selector('.p-name em').text
            # print(name)

            # 商品价格
            price = good.find_element_by_class_name('p-price').text

            # 商品链接
            url = good.find_element_by_css_selector('.p-name a').get_attribute('href')

            # 商品评价
            commit = good.find_element_by_class_name('p-commit').text

            good_content = f'''
                    num:{num}
                    商品名称:{name}
                    商品价格:{price}
                    商品链接:{url}
                    商品评价:{commit}
                    '''
            print(good_content)
            with open('jd.txt', 'a', encoding='utf-8') as f:
                f.write(good_content)
            num += 1
        print("写入成功")

        #找到下一页
        next_tag = driver.find_element_by_class_name('pn-next')
        next_tag.click()

        time.sleep(5)
        get_goods(driver)
    finally:
        driver.close()


if __name__ == '__main__':
    driver = webdriver.Chrome(r'C:\Program Files (x86)\chromedriver.exe')
    try:
        driver.implicitly_wait(10)

        driver.get('https://www.jd.com/')

        # 往京东主页输入墨菲定律
        input_tag = driver.find_element_by_id('key')
        input_tag.send_keys('墨菲定律')
        input_tag.send_keys(Keys.ENTER)

        get_goods(driver)
    finally:
        driver.close()

posted @ 2019-07-03 15:24  Crystal_Zh  阅读(280)  评论(0编辑  收藏  举报