Macbookpro

from bs4 import BeautifulSoup
import time
import pymongo
import requests
# import random
from multiprocessing import Pool
client = pymongo.MongoClient('localhost',27017)
xy_mac = client['xy_mac']
database = xy_mac['database']
https = []

urls = ['https://s.2.taobao.com/list/list.htm?spm=2007.1000337.0.0.61cda5\
d6wpTvq1&st_trust=1&start=4000&end=7000&page={}&q=macbookpro&ist=0'.format(i) for i in range(100)]
headers = {
'user-agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64)AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.108 Safari/537.36',
'cookie':'swfstore=111401; t=d20536d2d10e77e461ec4bc996fd40cc; cna=8wTQEtrZnTgCAXb++cRqruJd; hng=CN%7Czh-CN%7CCNY%7C156; thw=cn; UM_distinctid=160bd29d63a618-079696740e5ea9-393d5c04-1fa400-160bd29d63dba6; uc3=sg2=U%2BIqukFv1b19qQt2BN86I91H6qp0HZN8vNPTF56Km7U%3D&nk2=1H%2B8vouo7yusY7sS&id2=W80oUNNWQwOL&vt3=F8dBzLeIsHCnzJYvucQ%3D&lg2=U%2BGCWk%2F75gdr5Q%3D%3D; uss=Vy%2BXL1eQV5fgBg%2BfNX5l2bzR6eFWENnBVyNLhaQuegZBZ05vc8Hyk6P6VA%3D%3D; lgc=%5Cu72AF%5Cu8D30%5Cu4EB2%5Cu5E741314; tracknick=%5Cu72AF%5Cu8D30%5Cu4EB2%5Cu5E741314; _cc_=Vq8l%2BKCLiw%3D%3D; tg=0; x=e%3D1%26p%3D*%26s%3D0%26c%3D0%26f%3D0%26g%3D0%26t%3D0%26__ll%3D-1%26_ato%3D0; mt=ci=-1_0; l=Are3W6ZACrJtXH8C-ZQZ6saJx6ABfIve; cookie2=1c56d5e35de05dbc06fbe1784f70885a; v=0; _tb_token_=3ee3173339e33; CNZZDATA1252911424=1260735152-1515001779-%7C1516030505; CNZZDATA30058275=cnzz_eid%3D1592397953-1514999307-%26ntime%3D1516030227; uc1=cookie14=UoTdfYXyhHqXhQ%3D%3D;isg=Alxc67ZPajPvFR5v-kyJamacLXrOfQCbMNlPczZda8cqgfwLXuXQj9Izld-C',
}
def get_product_info(url):
# proxies = {
# 'http': random.choice[
# 'http://58.255.38.139:9797', 'http://125.88.177.128:3128', 'http://58.252.6.165:9000', 'http://106.113.242.148:9999', 'http://119.122.212.87:9000']
# }
time.sleep(2.5)
request = requests.get(url,headers=headers)
if request.status_code == 200:
soup = BeautifulSoup(request.text,'lxml')
prices = soup.select('div.item-info > div.item-price.price-block > span > em')
titles = soup.select('div.item-info > h4 > a')
details = soup.select('div.item-info > div.item-description')
sellers_id = soup.select('div.seller-info-wrapper > div.seller-info > div.seller-nick > a')
for price,title,detail,seller_id in zip(prices,titles,details,sellers_id):
data = {
'price':price.get_text(),
'title':title.get_text(),
'detail':detail.get_text(),
'seller_id':seller_id.get_text(),
}
print(data)
database.insert(data)

else:
print('it`s doesn`t work')
if __name__ == '__main__':
pool= Pool()
pool.map(get_product_info,urls)

#------------------导出------------#
C:\MongoDB\bin\mongoexport -d xy_mac -c database --type=csv -f price,title,detail,seller_id -o c:\database.dat





posted @ 2018-01-16 01:46  hyolyn  阅读(528)  评论(0编辑  收藏  举报