爬取京东

爬取京东

"""
########
# 爬取京东商品信息
########
from selenium import webdriver
import time
from selenium.webdriver.common.keys import Keys
bro = webdriver.Chrome(executable_path='./chromedriver')


def get_goods(bro):
    # find_elements_by_class_name  找所有
    # find_element_by_class_name   找一个
    li_list = bro.find_elements_by_class_name('gl-item')
    # ul_list=bro.find_elements_by_css_selector('.gl-item')
    for li in li_list:
        url = li.find_element_by_css_selector('.p-img>a').get_attribute('href')
        url_img = li.find_element_by_css_selector('.p-img img').get_attribute("src")
        if not url_img:
            url_img = 'https:' + li.find_element_by_css_selector('.p-img img').get_attribute("data-lazy-img")
        price = li.find_element_by_css_selector('.p-price i').text
        name = li.find_element_by_css_selector('.p-name em').text
        commit = li.find_element_by_css_selector('.p-commit a').text

        print('''
        商品名字:%s
        商品价格:%s
        商品图片地址:%s
        商品地址:%s
        商品评论数:%s
        ''' % (name, price, url, url_img, commit))

    # 查找下一页按钮
    next = bro.find_element_by_partial_link_text('下一页')
    time.sleep(1)
    next.click()
    # 继续抓取下一页
    get_goods(bro)


try:
    bro.get('https://www.jd.com')
    # 隐士等待
    bro.implicitly_wait(10)
    input_search = bro.find_element_by_id('key')
    input_search.send_keys("精品内衣")
    # 模拟键盘操作(模拟键盘敲回车)
    input_search.send_keys(Keys.ENTER)
    get_goods(bro)


except Exception as e:
    print(e)

finally:
    bro.close()
"""
posted @ 2020-04-09 20:45  alen_zhan  阅读(180)  评论(0编辑  收藏  举报
返回顶部