python爬去酒店信息

一、代码是跟着网上一个视频教学敲的,还有一部分待优化

二、全部源码

import requests#网络请求
import re#正则 
import time
import random
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
#网页真实网址
url='http://hotel.elong.com/ajax/list/asyncsearch'
header={
'Accept': 'application/json, text/javascript, */*; q=0.01',
'Accept-Encoding': 'gzip, deflate',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Cache-Control': 'no-cache',
'Connection': 'keep-alive',
'Content-Length': '1599',
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
'Host': 'hotel.elong.com',
'Origin': 'http://hotel.elong.com',
'Pragma': 'no-cache',
'Referer': 'http://hotel.elong.com/beijing/',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.79 Safari/537.36',
'X-Requested-With': 'XMLHttpRequest'}
#提交数据
for n in range(20):
    dat={
'code': '9254658',
'listRequest.areaID': '',
'listRequest.bookingChannel': '1',
'listRequest.cardNo': '192928',
'listRequest.checkInDate': '2018-06-19 00:00:00',
'listRequest.checkOutDate': '2018-06-20 00:00:00',
'listRequest.cityID': '0101',
'listRequest.cityName': '北京市',
'listRequest.customLevel': '11',
'listRequest.distance': '20',
'listRequest.endLat': '0',
'listRequest.endLng': '0',
'listRequest.facilityIds': '',
'listRequest.highPrice': '0',
'listRequest.hotelBrandIDs': '',
'listRequest.isAdvanceSave': 'false',
'listRequest.isAfterCouponPrice': 'true',
'listRequest.isCoupon': 'false',
'listRequest.isDebug': 'false',
'listRequest.isLimitTime': 'false',
'listRequest.isLogin': 'false',
'listRequest.isMobileOnly': 'true',
'listRequest.isNeed5Discount': 'true',
'listRequest.isNeedNotContractedHotel': 'false',
'listRequest.isNeedSimilarPrice': 'false',
'listRequest.isReturnNoRoomHotel': 'true',
'listRequest.isStaySave': 'false',
'listRequest.isTrace': 'false',
'listRequest.isUnionSite': 'false',
'listRequest.keywords':'', 
'listRequest.keywordsType': '0',
'listRequest.language': 'cn',
'listRequest.listType': '0',
'listRequest.lowPrice': '0',
'listRequest.orderFromID': '50',#
'listRequest.pageIndex': n,
'listRequest.pageSize': '20',
'listRequest.payMethod': '0',
'listRequest.personOfRoom': '0',
'listRequest.poiId': '0',
'listRequest.promotionChannelCode': '0000',
'listRequest.proxyID': 'ZD',
'listRequest.rankType': '0',
'listRequest.returnFilterItem': 'true',
'listRequest.sellChannel': '1',
'listRequest.seoHotelStar': '0',
'listRequest.sortDirection': '1',
'listRequest.sortMethod': '1',
'listRequest.starLevels':'', 
'listRequest.startLat': '0',
'listRequest.startLng': '0',
'listRequest.taRecommend': 'false',
'listRequest.themeIds':'',
'listRequest.ctripToken': 'c3502aec-c095-4f09-b122-5d5d6dfb6a8f',
'listRequest.elongToken': 'a7af9982-c0fb-4bcf-ba63-b9f70e801680'}
    html=requests.post(url,data=dat,headers=header)
    content=html.json()['value']['hotelListHtml']
#print(content)
    hotel_pri=re.findall('n class="h_pri_num ">(.*?)</span',content)
    hotol_name=re.findall(' target="_blank" title="(.*?)"><span',content)
    data=list(map(lambda x:(hotol_name[x],hotel_pri[x]),range(20)))
    data2=pd.DataFrame(data)
    data2.to_csv('C:\\Users\\你若成风618\\Desktop\\aa\\1.csv',header=False,index=False,mode='a+')



posted @ 2018-06-18 16:51  HPUGIS  阅读(273)  评论(1编辑  收藏  举报