爬取京东商品信息

爬取京东商品信息

 

from selenium import webdriver
from selenium.webdriver import ChromeOptions
from selenium.webdriver import ActionChains
from selenium.webdriver.common.keys import Keys
import time

option = ChromeOptions()
option.add_argument('disable-infobars')



driver = webdriver.Chrome(chrome_options=option)

def get_goods(driver):
    number = 400
    for line in range(20):
        js = '''
                window.scrollTo(0, %s)
            ''' % number
        number += 500
        driver.execute_script(js)
        time.sleep(0.2)

    # 查找所有商品的父标签
    good_div = driver.find_element_by_id('J_goodsList')
    # 获取所有商品的标签
    good_list = good_div.find_elements_by_class_name('gl-item')
    for good in good_list:
        '''
        商品信息:
            名称
            价格
            链接
            图片
            评价人数
        '''
        # 商品名称
        good_name = good.find_element_by_css_selector('.p-name em').text.replace('\n', '')

        # 商品价格
        good_price = good.find_element_by_css_selector('.p-price').text.replace('\n', '')

        # 商品链接
        good_link = good.find_element_by_css_selector('.p-img a').get_attribute('href')

        # 商品图片
        good_img = good.find_element_by_css_selector('.p-img img').get_attribute('src')

        # 评价人数
        good_commit = good.find_element_by_css_selector('.p-commit').text.replace('\n', ' ')

        goods = '''
            商品名称: %s
            商品价格: %s
            商品链接: %s
            商品图片: %s
            评价人数: %s
            ''' % (good_name, good_price, good_link, good_img, good_commit)

        print(goods)

        with open('京东女士内衣数据爬去.txt', 'a', encoding='utf-8') as f:
            f.write(goods + '\n')


    next_tag = driver.find_element_by_class_name('pn-next')
    next_tag.click()
    time.sleep(3)

    # 递归执行get_goods函数
    get_goods(driver)



try:
    driver.get('https://www.jd.com/')
    driver.implicitly_wait(10)
    input_tag = driver.find_element_by_id('key')
    input_tag.send_keys('女士内衣')
    search_button = driver.find_element_by_class_name('button')
    search_button.click()

    get_goods(driver)



    time.sleep(1000)


finally:
    driver.close()

  

posted @ 2019-06-25 21:02  优寒凌  阅读(245)  评论(0编辑  收藏  举报