爬取某东的小米的手机信息20页 用selenium来爬取

import time
#
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from bs4 import BeautifulSoup
from selenium.webdriver.common.by import By
from selenium.webdriver import ActionChains
from lxml import etree
作者:小辉 #爬取京东的小米的手机信息 class jingdo(): def __init__(self): self.sertd = Service("\自动\chromedriver.exe") #控制浏览 self.mko=webdriver.Chrome(service=self.sertd) #获取Service对象 self.html_top='https://item.jd.com/' self.html_huo='.html' def mko111(self): i1=5 s=116 ick=1 for i in range(1,21): # 20页的小米手机的url if i==1: c='https://search.jd.com/Search?keyword=%E5%B0%8F%E7%B1%B3%E6%89%8B%E6%9C%BA&enc=utf-8&pvid=98f95d796dc64a139bb211652a371657' elif i==2: c = 'https://search.jd.com/Search?keyword=%E5%B0%8F%E7%B1%B3%E6%89%8B%E6%9C%BA&qrst=1&ev=exbrand_%E5%B0%8F%E7%B1%B3%EF%BC%88MI%EF%BC%89%5E&pvid=98f95d796dc64a139bb211652a371657&page=3&s=57&click=0' else: c='https://search.jd.com/Search?keyword=%E5%B0%8F%E7%B1%B3%E6%89%8B%E6%9C%BA&qrst=1&ev=exbrand_%E5%B0%8F%E7%B1%B3%EF%BC%88MI%EF%BC%89%5E&pvid=98f95d796dc64a139bb211652a371657&page='+str(i1)+'&s='+str(s)+'&click='+str(ick) i1+=2 s+=60 self.mko.get(c) time.sleep(5)
js = "var q=document.documentElement.scrollTop=9000" self.mko.execute_script(js) # # 向下偏移了10000个像素,到达底部 time.sleep(5) nji_1=BeautifulSoup(self.mko.page_source,'lxml') xxh=nji_1.select('.ml-wrap') nji_name=etree.HTML(str(xxh)) c1='//*[@id="J_goodsList"]/ul/li/@data-sku' hao_url=nji_name.xpath(c1)#每个网页手机的地址 try: for i2 in hao_url: self.mko.get(self.html_top+str(i2)+self.html_huo) time.sleep(5) THTML=etree.HTML(self.mko.page_source) name=THTML.xpath('//*[@class="sku-name"]/text()')#手机名字 moung=THTML.xpath('//*[@class="summary-price J-summary-price"]/div[2]/span[1]/span[2]/text()')#手机价格 pj=THTML.xpath('//*[@id="comment-count"]/a/text()')#手机评论 # 取消空格 name_1=[str(i3).replace(' ','').strip() for i3 in name if str(i3).replace(' ','').strip()!=''] # moung_1 = [str(i).replace(' ', '').strip() for i in moung if str(i).replace(' ', '').strip() != ''] pj_1 = [str(i).replace(' ', '').strip() for i in pj if str(i).replace(' ', '').strip() != ''] print(name_1,moung_1,pj_1) except Exception as naoc: print('错误') htmkl=jingdo() htmkl.mko111()

  

posted @ 2022-09-24 20:45  python,菜鸟  阅读(54)  评论(0)    收藏  举报