json
# -*- coding: GBK -*-
from lxml import etree
import xlwt #写xlsx
import xlrd #读xlsx
import time #休眠
import random #随机数
import urllib
import urllib2
import cookielib
import json
import sys
reload(sys)
sys.setdefaultencoding('gbk')
hotline = xlrd.open_workbook('D:\hotline.xlsx')
sheet1 = hotline.sheet_by_name('Sheet1')
#起sheet名,写标题
wb = xlwt.Workbook(encoding = 'GBK')
ws = wb.add_sheet(u'热门航班动态')
ws.write(0 , 0 , '航班号')
ws.write(0 , 1 , '航司名称')
ws.write(0 , 2 , '航司简称')
ws.write(0 , 3 , '出发城市')
ws.write(0 , 4 , '到达城市')
ws.write(0 , 5 , '出发机场三字码')
ws.write(0 , 6 , '出发航站楼')
ws.write(0 , 7 , '到达机场三字码')
ws.write(0 , 8 , '到达航站楼')
ws.write(0 , 9 , '飞行时长')
ws.write(0 , 10 , '计划出发日期')
ws.write(0 , 11 , '计划出发时间')
ws.write(0 , 12 , '计划到达时间')
ws.write(0 , 13 , '计划出发日期时间')
ws.write(0 , 14 , '计划到达日期时间')
ws.write(0 , 15 , '计划到达时间预测')
ws.write(0 , 16 , '预计出发时间')
ws.write(0 , 17 , '飞常准预计出发时间')
ws.write(0 , 18 , '预计到达时间')
ws.write(0 , 19 , '实际出发时间')
ws.write(0 , 20 , '实际到达时间')
ws.write(0 , 21 , '更新时间')
ws.write(0 , 22 , '准点率')
ws.write(0 , 23 , '状态')
ws.write(0 , 24 , '出发时区')
ws.write(0 , 25 , '到达时区')
ws.write(0 , 26 , '值机柜台')
ws.write(0 , 27 , '登机口')
ws.write(0 , 28 , '行李转盘')
ws.write(0 , 29 , '飞行里程')
ws.write(0 , 30 , '出发地天气')
ws.write(0 , 31 , '出发地温度')
ws.write(0 , 32 , '到达地天气')
ws.write(0 , 33 , '到达地温度')
ws.write(0 , 34 , '登机方式')
#初始化n,从excel第二行开始操作
n = 1
for r in range(1 , sheet1.nrows):
rows = sheet1.row_values(r)
url = "http://flights.ctrip.com/Process/FlightStatus/FindByCityWithJson?from="+ rows[0] + "&to=" +rows[1]+ "&date=20161220"
cj = cookielib.LWPCookieJar()
cookie_support = urllib2.HTTPCookieProcessor(cj)
opener = urllib2.build_opener(cookie_support, urllib2.HTTPHandler)
urllib2.install_opener(opener)
HEADER = {
'User-Agent' : 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:31.0) Gecko/20100101 Firefox/31.0',
'Referer' : 'http://202.206.1.163/logout.do'
}
req = urllib2.Request(url,headers = HEADER)
try:
res_data = urllib2.urlopen(req)
except urllib2.HTTPError, e:
print e.code
print e.reason
res = res_data.read()
ress = unicode(res)
decoded = json.loads(ress)
content = decoded['List']
#循环体,写一行值
for i in range(0,len(content)):
ws.write(n , 0 , content[i]['FlightNo'])
ws.write(n , 1 , content[i]['FlightCompany'])
ws.write(n , 2 , content[i]['CompanyShortName'])
ws.write(n , 3 , content[i]['DCityName'])
ws.write(n , 4 , content[i]['ACityName'])
ws.write(n , 5 , content[i]['DAirportName'])
ws.write(n , 6 , content[i]['DTerminal'])
ws.write(n , 7 , content[i]['AAirportCode'])
ws.write(n , 8 , content[i]['ATerminal'])
ws.write(n , 9 , content[i]['FlightDuration'])
ws.write(n , 10 , content[i]['PlanDData'])
ws.write(n , 11 , content[i]['PlanDTime'])
ws.write(n , 12 , content[i]['PlanATime'])
ws.write(n , 13 , content[i]['PlanDDateTime'])
ws.write(n , 14 , content[i]['PlanADateTime'])
ws.write(n , 15 , content[i]['PlanATimePre'])
ws.write(n , 16 , content[i]['ExpDDateTime'])
ws.write(n , 17 , content[i]['VeryZhunExpDTime'])
ws.write(n , 18 , content[i]['ExpADateTime']) ws.write(n , 19 , content[i]['ActDDateTime']) ws.write(n , 20 , content[i]['ActADateTime']) ws.write(n , 21 , content[i]['UpdateTime']) ws.write(n , 22 , content[i]['OnTimeRate']) ws.write(n , 23 , content[i]['Status']) ws.write(n , 24 , content[i]['DTimeZone']) ws.write(n , 25 , content[i]['ATimeZone'])# time.sleep(random.randint(5, 15)) url2 = "http://flights.ctrip.com/actualtime/fno--"+content[i]['FlightNo']+"-20161220-"+rows[0]+"-"+rows[1]+".html" cj = cookielib.LWPCookieJar() cookie_support = urllib2.HTTPCookieProcessor(cj) opener = urllib2.build_opener(cookie_support, urllib2.HTTPHandler) urllib2.install_opener(opener) detailurl = urllib2.Request(url2,headers = HEADER) try: detailurl_data = urllib2.urlopen(detailurl) except urllib2.HTTPError, e: print e.code print e.reason detail = detailurl_data.read() detailcontent = etree.HTML(detail) desk = detailcontent.xpath('//div[@class="strong"]/text()') cost = detailcontent.xpath('//div[@class="inl between"]/p[@class="gray"]/text()') weather = detailcontent.xpath('//div[@class="f12"]/span[@class="gray"]/text()') weather2 = detailcontent.xpath('//div[@class="f12"]/span[@class="gray ml5"]/text()') way = detailcontent.xpath('//span[@class="gray middle"]/text()') try: ws.write(n , 26 , desk[0].strip()) except: pass try: ws.write(n , 27 , desk[1].strip()) except: pass try: ws.write(n , 28 , desk[2].strip()) except: pass try: ws.write(n , 29 , cost[1].strip()) except: pass try: ws.write(n , 30 , weather2[0].strip()) except: pass try: ws.write(n , 31 , weather[0].strip()) except: pass try: ws.write(n , 32 , weather2[1].strip()) except: pass try: ws.write(n , 33 , weather[1].strip()) except: pass try: ws.write(n , 34 , way[0].strip()) except: pass#n+1,写下一行 n = n + 1#一个城市对写完,歇一会,一会找另一个城市对 time.sleep(random.randint(3, 10))wb.save('D:\\ctrip\\20161220.xls')
http://blog.chinaunix.net/uid-21961132-id-2915452.html