爬取某东的小米的手机信息20页用selenium来爬取

import time
#
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from bs4 import BeautifulSoup
from selenium.webdriver.common.by import By
from selenium.webdriver import ActionChains
from lxml import etree
作者:小辉

#爬取京东的小米的手机信息

class jingdo():
    def __init__(self):
        self.sertd = Service("\自动\chromedriver.exe") #控制浏览
        self.mko=webdriver.Chrome(service=self.sertd)  #获取Service对象
        self.html_top='https://item.jd.com/'
        self.html_huo='.html'
    def mko111(self):
         i1=5
         s=116
         ick=1
         for i in range(1,21):
             # 20页的小米手机的url
             if i==1:
                  c='https://search.jd.com/Search?keyword=%E5%B0%8F%E7%B1%B3%E6%89%8B%E6%9C%BA&enc=utf-8&pvid=98f95d796dc64a139bb211652a371657'
             elif i==2:
                 c = 'https://search.jd.com/Search?keyword=%E5%B0%8F%E7%B1%B3%E6%89%8B%E6%9C%BA&qrst=1&ev=exbrand_%E5%B0%8F%E7%B1%B3%EF%BC%88MI%EF%BC%89%5E&pvid=98f95d796dc64a139bb211652a371657&page=3&s=57&click=0'
             else:
                  c='https://search.jd.com/Search?keyword=%E5%B0%8F%E7%B1%B3%E6%89%8B%E6%9C%BA&qrst=1&ev=exbrand_%E5%B0%8F%E7%B1%B3%EF%BC%88MI%EF%BC%89%5E&pvid=98f95d796dc64a139bb211652a371657&page='+str(i1)+'&s='+str(s)+'&click='+str(ick)
                  i1+=2
                  s+=60
             self.mko.get(c)
             time.sleep(5)

             js = "var q=document.documentElement.scrollTop=9000"
             self.mko.execute_script(js)
             # # 向下偏移了10000个像素，到达底部

             time.sleep(5)

             nji_1=BeautifulSoup(self.mko.page_source,'lxml')
             xxh=nji_1.select('.ml-wrap')
             nji_name=etree.HTML(str(xxh))
             c1='//*[@id="J_goodsList"]/ul/li/@data-sku'
             hao_url=nji_name.xpath(c1)#每个网页手机的地址
             try:
               for i2 in hao_url:
                 self.mko.get(self.html_top+str(i2)+self.html_huo)
                 time.sleep(5)
                 THTML=etree.HTML(self.mko.page_source)
                 name=THTML.xpath('//*[@class="sku-name"]/text()')#手机名字
                 moung=THTML.xpath('//*[@class="summary-price J-summary-price"]/div[2]/span[1]/span[2]/text()')#手机价格
                 pj=THTML.xpath('//*[@id="comment-count"]/a/text()')#手机评论

                # 取消空格
                 name_1=[str(i3).replace(' ','').strip() for i3 in name if str(i3).replace(' ','').strip()!='']  #
                 moung_1 = [str(i).replace(' ', '').strip() for i in moung if str(i).replace(' ', '').strip() != '']
                 pj_1 = [str(i).replace(' ', '').strip() for i in pj if str(i).replace(' ', '').strip() != '']

                 print(name_1,moung_1,pj_1)
             except Exception as naoc:
                 print('错误')

htmkl=jingdo()
htmkl.mko111()

posted @ 2022-09-24 20:45 python，菜鸟阅读(54) 评论(0) 收藏举报

刷新页面返回顶部

xxh12

爬取某东的小米的手机信息20页用selenium来爬取

公告

xxh12

爬取某东的小米的手机信息20页 用selenium来爬取

公告

爬取某东的小米的手机信息20页用selenium来爬取