爬取京东商品信息
爬取京东商品信息
from selenium import webdriver from selenium.webdriver import ChromeOptions from selenium.webdriver import ActionChains from selenium.webdriver.common.keys import Keys import time option = ChromeOptions() option.add_argument('disable-infobars') driver = webdriver.Chrome(chrome_options=option) def get_goods(driver): number = 400 for line in range(20): js = ''' window.scrollTo(0, %s) ''' % number number += 500 driver.execute_script(js) time.sleep(0.2) # 查找所有商品的父标签 good_div = driver.find_element_by_id('J_goodsList') # 获取所有商品的标签 good_list = good_div.find_elements_by_class_name('gl-item') for good in good_list: ''' 商品信息: 名称 价格 链接 图片 评价人数 ''' # 商品名称 good_name = good.find_element_by_css_selector('.p-name em').text.replace('\n', '') # 商品价格 good_price = good.find_element_by_css_selector('.p-price').text.replace('\n', '') # 商品链接 good_link = good.find_element_by_css_selector('.p-img a').get_attribute('href') # 商品图片 good_img = good.find_element_by_css_selector('.p-img img').get_attribute('src') # 评价人数 good_commit = good.find_element_by_css_selector('.p-commit').text.replace('\n', ' ') goods = ''' 商品名称: %s 商品价格: %s 商品链接: %s 商品图片: %s 评价人数: %s ''' % (good_name, good_price, good_link, good_img, good_commit) print(goods) with open('京东女士内衣数据爬去.txt', 'a', encoding='utf-8') as f: f.write(goods + '\n') next_tag = driver.find_element_by_class_name('pn-next') next_tag.click() time.sleep(3) # 递归执行get_goods函数 get_goods(driver) try: driver.get('https://www.jd.com/') driver.implicitly_wait(10) input_tag = driver.find_element_by_id('key') input_tag.send_keys('女士内衣') search_button = driver.find_element_by_class_name('button') search_button.click() get_goods(driver) time.sleep(1000) finally: driver.close()