1 import requests
2 from lxml import etree
3 from selenium import webdriver
4 import time
5 import pymongo
6
7
8 client=pymongo.MongoClient('localhost',27017)
9 DB=client['闲鱼']
10 #url_list=DB['shop_list']
11 shop_info=DB['shop_info']
12
13 url="https://s.2.taobao.com/list/list.htm?spm=2007.1000337.0.0.735ad9c1MhZfTa&st_trust=1&ist=0"
14 broswer=webdriver.PhantomJS()
15 broswer.get(url)
16 #time.sleep(2)
17 button=broswer.find_element_by_xpath('//*[@id="J_CategoryFilters"]/div/a')
18 button.click()
19 #time.sleep(1)
20 r=broswer.page_source
21
22
23 def labelparse(r):
24 html = etree.HTML(r)
25 label_title = html.xpath('//ul[@class="J_HiddenAreaContent clearfix"]/li/a/text()')
26 label_urls = html.xpath('//ul[@class="J_HiddenAreaContent clearfix"]/li/a/@href')
27 return label_urls
28
29
30 def shopparse(url,page):
31 fulurl='{0}{1}page={2}&ist=0'.format('https:',url[:-5],str(page))
32 headers={'user-agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36'}
33 r=requests.get(fulurl,headers=headers)
34 #print(r.url)
35 html=etree.HTML(r.text)
36 shopname = html.xpath('//h4[@class="item-title"]/a/text()')
37 shopprice = html.xpath('//span[@class="price"]/em/text()')
38 shoplocation = html.xpath('//div[@class="seller-location"]/text()')
39 #shopmaster=html.xpath('//span[@class="ww-light ww-small"]')
40 for i in range(0,len(shopname)):
41 shop_info.insert_one({
42 'shopname':shopname[i],
43 'shopprice':shopprice[i],
44 'shoplocation':shoplocation[i]
45
46 })
47
48
49
50
51 def main(page):
52 urls=labelparse(r)
53 for url in urls:
54 #time.sleep(2)
55 shopparse(url,page)
56
57
58
59 if __name__=="__main__":
60 for page in range(0,100):
61 main(page)