百度API之路线规划

近期参加一个课题,聊到路线规划问题,需要搜索两地点的最短线路距离以及最短用时等情况,然后就想着用借用百度API,做个参考

环境:

  python 3.6

主要问题:

1. 分析百度官方路线规划API了解到路线规划需要提供经纬度信息,于是借用百度地理编码是指将地址或地名等位置描述转换为经纬度坐标的过程。得到的坐标信息,可以用于制图或空间分析操作。(出于国家安全考虑,公布出来的坐标信息一般是经过加偏的。)http://lbsyun.baidu.com/index.php?title=webapi/guide/webservice-geocoding
2. 根据经纬度信息,现有起点和终点坐标值(经纬度lng、lat),目的是通过百度地图开发者平台的路线规划功能获取起点终点路线规划距离和预估时长,百度地图开发者平台路线规划使用说明网址为:http://lbsyun.baidu.com/index.php?title=webapi/direction-api-abroad
3.爬取过程可能会由于服务器或者参数不满足要求导致爬虫中断,注意处理这个问题即可

4.一是注意源文件的数据格式要转utf-8;二是修改文件路径;三是AK需要自行去开发者平台申请。

代码如下(由于文件不方便上传,只需稍作修改,即可使用):

# -*- coding:utf-8 -*-
# ------------------------------
# @Time     :2019/5/9 13:32
# @Author   :jonie
# @Email    :
# @File     :code_get.py
# Description:
# ------------------------------
import csv
import json
import time
import requests
from bs4 import BeautifulSoup
from urllib.request import urlopen, quote
import json
import requests
# [113.63095213159264, 34.74830559988335]#
origin_path = 'data/赛点.csv'  # 原始数据文件路径
new_path = 'data/地址对应坐标.txt'  # 爬取数据文件保存路径

machine_data = csv.reader(open(origin_path, 'r', encoding='utf-8'))  # 读取原始文件数据


for addr in machine_data:  # 循环爬取每一条数据
    # print(addr[2])


    address = addr[1]
    ak = 'FA8atAaqd1wajikD56lPqtiaNCldeya'
    url = 'http://api.map.baidu.com/geocoder/v2/?address='
    output = 'json'
    # ak = '你的ak'#需填入自己申请应用后生成的ak
    add = quote(address)  # 本文城市变量为中文,为防止乱码,先用quote进行编码
    url2 = url + add + '&output=' + output + "&ak=" + ak
    req = urlopen(url2)
    res = req.read().decode()
    temp = json.loads(res)
    lng = temp['result']['location']['lng']  # 获取经度
    lat = temp['result']['location']['lat']  # 获取纬度
    lng = ("%.5f" % lng)
    lat = ("%.5f" % lat)


    list1 = [lng, lat,addr[0]]
    print('百度坐标为:', list1)
    with open(new_path, 'a', encoding='utf-8') as f:
        f.write(str(list1))
        f.write('\n')
        f.close()



    with open("data/赛点信息.csv", 'a', newline='',encoding='utf-8') as t:  # numline是来控制空的行数的
        writer = csv.writer(t)  # 这一步是创建一个csv的写入器(个人理解)
        writer.writerow(list1)  # 写入标签
        # writer.writerows(n)  # 写入样本数据
        t.close()

调用百度地图api获取起点终点路线规划距离和预估时长代码

  1 import csv
  2 import re
  3 import time
  4 import json
  5 from urllib.request import urlopen
  6 import urllib
  7 
  8 # 原数据文件格式csv: 起点纬度 + 起点经度 + 索引 + 终点纬度 + 终点经度
  9 origin_path = 'data/b.csv'  # 原始数据文件路径
 10 result_path = 'data/result122901.txt'  # 爬取数据文件保存路径
 11 
 12 # 百度地图提供的api服务网址
 13 url_drive = r"http://api.map.baidu.com/direction/v2/driving"  # 驾车(routematrix 批量算路)
 14 url_ride = r'http://api.map.baidu.com/routematrix/v2/riding?output=json'  # 骑行
 15 url_walk = r'http://api.map.baidu.com/routematrix/v2/walking?output=json'  # 步行
 16 url_bus = r'http://api.map.baidu.com/direction/v2/transit?output=json'  # bus(direction路线规划)
 17 cod = r"&coord_type=bd09ll"
 18 # 声明坐标格式,bd09ll(百度经纬度坐标);bd09mc(百度摩卡托坐标);gcj02(国测局加密坐标),wgs84(gps设备获取的坐标)
 19 # AK为从百度地图网站申请的秘钥,额度不够的时候直接在list后面新增AK就行
 20 AK = ['FA8atAaqd1wajikD56lPqtiasdfleCeyz']
 21 # 把变量名先写入文件
 22 colnames = '设备序列号 起点 终点 状态码  步行路程(米) 步行耗时(秒)'
 23 with open(result_path, 'a', encoding='utf-8') as f:
 24     f.write(colnames)
 25     f.write('\n')
 26     f.close()
 27 
 28 address = csv.reader(open(origin_path, 'r', encoding='utf-8'))  # 读取原始文件数据
 29 
 30 # for ad in address:
 31 #     # print(ad)
 32 #     print(ad[0])
 33 #     print(ad[1])
 34 #     print(ad[2])
 35 #     print(ad[3])
 36 #     print(ad[4])
 37 n = 0
 38 akn1 = 0
 39 akn2 = 0
 40 a = 0
 41 while True:
 42     try:  # 避开错误:文件编码问题、服务器响应超时、
 43         for ad in address:
 44             if (akn1 < len(AK)) and (akn2 < len(AK)):  # 配额是否够
 45                 mac_code = str(ad[2])  # 设备序列号
 46                 try:
 47                     ori = str(ad[0]) + ',' + str(ad[1])  # 起点
 48                     des = str(ad[3]) + ',' + str(ad[4])  # 终点
 49                     ak_drive = AK[akn1]
 50                     ak_bus = AK[akn2]
 51                     ak_drive2 = r'&ak=' + ak_drive
 52                     ak_bus2 = r'&ak=' + ak_bus
 53                     ori1 = r"?origin=" + ori
 54                     des1 = r"&destination=" + des
 55                     # 以下是自驾车
 56                     tac_type = r'&tactics=11'  # 驾车路径:常规路线
 57                     # 10不走高速;11常规路线;12距离较短;13距离较短(不考虑路况)   只对驾车有效
 58                     aurl_drive = url_drive + ori1 + des1 + cod + tac_type + ak_drive2  # 驾车规划网址
 59                     res_drive = urlopen(aurl_drive)  # 打开网页
 60                     cet_drive = res_drive.read()  # 解析内容
 61                     res_drive.close()  # 关闭
 62                     result_drive = json.loads(cet_drive)  # json转dict
 63                     status = result_drive['status']
 64                     print('驾车码', status)
 65                     if status == 0:  # 状态码为0:无异常
 66                         m_drive = result_drive['result']["routes"][0]['distance']  # 里程(米)
 67                         m_drive2 = float(m_drive)  # str转float
 68                         timesec_drive = result_drive['result']["routes"][0]['duration']  # 耗时(秒)
 69                         diss_drive = '状态' + str(status) + ' ' + str(m_drive) + ' ' + str(timesec_drive)  # 驾车总
 70                     elif status == 302 or status == 210 or status == 201:  # 302:额度不足;210:IP验证未通过
 71                         m_drive2 = 10000  # 赋值(大于5km),即不爬取步行规划
 72                         akn1 += 1
 73                         diss_drive = '状态' + str(status) + ' break break'
 74                     else:
 75                         m_drive2 = 10000  # 赋值(大于5km),即不爬取步行规划
 76                         diss_drive = '状态' + str(status) + ' na na'
 77                     try:  # 当驾车规划m_drive2为空的时候,if语句发生错误
 78                         if 0 < m_drive2 < 5000:  # 里程低于5公里则爬取步行规划
 79                             aurl_walk = url_walk + ori1 + des1 + cod + ak_drive2  # 步行规划网址
 80                             res_walk = urlopen(aurl_walk)  # 打开网页
 81                             cet_walk = res_walk.read()  # 解析内容
 82                             result_walk = json.loads(cet_walk)  # json转dict
 83                             res_walk.close()  # 关闭网页
 84                             status_walk = result_walk['status']  # 状态码
 85                             if status_walk == 0:  # 状态正常
 86                                 m_walk = result_walk['result']["routes"][0]['distance']  # 步行距离
 87                                 time_walk = result_walk['result']["routes"][0]['duration']  # 步行时间
 88                                 diss_walk = str(m_walk) + ' ' + str(time_walk)  # 步行总
 89                             else:  # 状态异常
 90                                 diss_walk = 'na na'
 91                         else:  # 里程大于5km则不爬取步行规划
 92                             diss_walk = 'na na'
 93                     except:  # 发生错误时步行数据也赋值为na
 94                         diss_walk = 'na na'
 95                         pass
 96                     # 以下是乘车规划
 97                     tac_bus = r'&tactics_incity=0'
 98                     # 市内公交换乘策略 可选,默认为0      0推荐;1少换乘;2少步行;3不坐地铁;4时间短;5地铁优先
 99                     city_bus = r'&tactics_intercity=0'
100                     # 跨城公交换乘策略  可选,默认为0    0时间短;1出发早;2价格低;
101                     city_type = r'&trans_type_intercity=2'
102                     # 跨城交通方式策略  可选,默认为0  0火车优先;1飞机优先;2大巴优先;
103                     ori2 = r"&origin=" + ori
104                     des2 = r"&destination=" + des
105                     aurl_bus = url_bus + ori2 + des2 + tac_bus + city_bus + city_type + ak_bus2
106                     res_bus = urlopen(aurl_bus)
107                     cet_bus = res_bus.read()
108                     res_bus.close()
109                     result_bus = json.loads(cet_bus)
110                     status = result_bus['status']
111                     print('乘车码', status)
112                     # --------------------------------------
113                     # if status == 0:
114                     #     rsls = result_bus['result']['routes']
115                     #     if rsls == []:  # 无方案时状态也为0,但只返回一个空list
116                     #         diss_bus = '状态' + str(status) + ' ' + '无公交方案'
117                     #     else:
118                     #         m_bus = result_bus['result']['routes'][0]['distance']  # 乘车路线距离总长(米)
119                     #         time_bus = result_bus['result']['routes'][0]['duration']  # 乘车时间(秒)
120                     #         cost_bus = result_bus['result']['routes'][0]['price']  # 乘车费用(元)
121                     #         diss_bus = '状态' + str(status) + ' ' + str(m_bus) + ' ' + str(time_bus) + ' ' + str(cost_bus)
122                     # elif status == 302 or status == 210 or status == 201:
123                     #     akn2 = akn2 + 1
124                     #     diss_bus = '状态' + str(status) + ' ' + '更换AK断点'
125                     # else:  # 其他类型状态码(服务器错误)
126                     #     diss_bus = '状态' + str(status) + ' ' + '服务器错误'
127                     #     -----------------------------------------------
128                     # 汇总数据
129                     diss = mac_code + ' ' + str(ori) + ' ' + str(
130                         des) + ' ' + diss_drive + ' ' + diss_walk #+ ' ' + diss_bus
131                     with open(result_path, 'a', encoding='utf-8') as f:
132                         f.write(diss)
133                         f.write('\n')
134                         f.close()
135                     n += 1
136                     print('' + str(n) + '条已完成')
137                 except:
138                     time.sleep(3)
139                     diss_wrong = str(mac_code) + '未知错误'
140                     with open(result_path, 'a', encoding='utf-8') as f:
141                         f.write(diss_wrong)
142                         f.write('\n')
143                         f.close()
144                     continue
145             else:
146                 print('配额不足!')
147                 break
148     except:
149         time.sleep(3)
150         print('未知错误')
151         with open(result_path, 'a', encoding='utf-8') as f:
152             f.write('未知错误')
153             f.write('\n')
154             f.close()
155         continue
156     print('程序已停止运行')
157     break  # 跑完数时break打断while循环,for循环的话这里不好定义循环条件

 最终根据生成的数据作图如下:

附录:

1.数据获取(借助携程网爬取郑州市以好评度优先的方式爬取所有星级酒店信息)
  1 import requests
  2 import random
  3 from bs4 import BeautifulSoup
  4 import time
  5 import csv
  6 import json
  7 import re
  8 import pandas as pd
  9 import numpy as np
 10 
 11 pd.set_option('display.max_columns', 10000)
 12 pd.set_option('display.max_rows', 10000)
 13 pd.set_option('display.max_colwidth', 10000)
 14 pd.set_option('display.width',1000)
 15 
 16 # Beijing 5 star hotel list url
 17 five_star_url = "http://hotels.ctrip.com/Domestic/Tool/AjaxHotelList.aspx"
 18 filename = "data/star hotel list.csv"
 19 
 20 def Scrap_hotel_lists():
 21     """
 22     It aims to crawl the 5 star hotel lists in Beijing and save in a csv file.
 23     """
 24     headers = {
 25         "Connection": "keep-alive",
 26         "origin": "http://hotels.ctrip.com",
 27         "Host": "hotels.ctrip.com",
 28         "referer": "https://hotels.ctrip.com/hotel/zhengzhou559",
 29         "user-agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.92 Safari/537.36",
 30         "Content-Type":"application/x-www-form-urlencoded; charset=utf-8"
 31     }
 32 
 33 
 34 
 35     id = []
 36     name = []
 37     hotel_url = []
 38     address = []
 39     score = []
 40 
 41     # 8 pages
 42     for page in range(1,8):
 43 
 44         data = {
 45             "StartTime": "2019-09-08",  # The value depends on the date you want to scrap.
 46             "DepTime": "2019-09-18",
 47             "RoomGuestCount": "0,1,2",
 48             "cityId": 559,
 49             "cityPY": " zhengzhou",
 50             "cityCode": "0371",
 51             "cityLat": 34.758044,
 52             "cityLng": 113.673121,
 53             "page": page,
 54             "star": "3",
 55             "orderby": 3
 56         }
 57         html = requests.post(five_star_url, headers=headers, data=data)
 58 
 59         # print(html.text)
 60         j= json.loads(html.text.replace("\洛阳","洛阳"))
 61         #hotel_list = html.json()["totalMsg"]
 62         hotel_list = j["hotelPositionJSON"]
 63 
 64         for item in hotel_list:
 65             id.append(item['id'])
 66             name.append(item['name'])
 67             hotel_url.append(item['url'])
 68             address.append(item['address'])
 69             score.append(item['score'])
 70 
 71         time.sleep(random.randint(3,5))
 72     hotel_array = np.array((id, name, score, hotel_url, address)).T
 73     list_header = ['id', 'name', 'score', 'url', 'address']
 74     array_header = np.array((list_header))
 75     hotellists = np.vstack((array_header, hotel_array))
 76     with open(filename, 'a', encoding="utf-8-sig", newline="") as f:
 77         csvwriter = csv.writer(f, dialect='excel')
 78         csvwriter.writerows(hotellists)
 79 
 80 
 81 def hotel_detail(hotel_id):
 82     """
 83     It aims to scrap the detailed information of a specific hotel.
 84     """
 85     headers = {"Connection": "keep-alive",
 86                "Accept-Language": "zh-CN,zh;q=0.9",
 87                "Cache-Control": "max-age=0",
 88                "Content-Type": "application/x-www-form-urlencoded; charset=utf-8",
 89                "Host": "hotels.ctrip.com",
 90                "If-Modified-Since": "Thu, 01 Jan 1970 00:00:00 GMT",
 91                "Referer": "http://hotels.ctrip.com/hotel/2231618.html",
 92                "User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) "
 93                              "Chrome/69.0.3497.92 Safari/537.36"
 94                }
 95 
 96     basic_url = "http://hotels.ctrip.com/Domestic/tool/AjaxHote1RoomListForDetai1.aspx?hotel="
 97     url = basic_url + str(hotel_id)
 98 
 99     r = requests.get(url, headers=headers)
100     # Response is a json object.
101     html = r.json()['html']
102     soup = BeautifulSoup(html, "lxml")
103     rooms = soup.findAll('td', attrs={"class": "child_name J_Col_RoomName"})
104 
105     RoomID = []
106     RoomName = []
107     LowPrice = []
108     RoomSize = []
109     RoomLevel = []
110     IsAddBed = []
111     BedSize = []
112     CustomerNum = []
113 
114     # Regex Pattern
115     baseroom_pattern = re.compile(r'<[^>]+>')  # r'<[^>]+>'
116 
117     for idx in range(len(rooms)):
118         if rooms[idx].has_attr(key='data-baseroominfo'):
119             room_info_str = rooms[idx]['data-baseroominfo']
120             room_info_json = json.loads(room_info_str)
121             RoomID.append(str(room_info_json["RoomID"]))
122             RoomName.append(room_info_json["RoomName"])
123             LowPrice.append(room_info_json["LowPrice"])
124 
125             baseroom_info = room_info_json["BaseRoomInfo"]
126             # print(type(baseroom_info))
127             # <class 'str'>
128             remove_tag = baseroom_pattern.sub("", baseroom_info)
129             RoomDetailInfo = remove_tag.split("|")
130             if len(RoomDetailInfo) == 4:
131                 RoomDetailInfo.insert(3, None)
132 
133             RoomSize.append(RoomDetailInfo[0])
134             RoomLevel.append(RoomDetailInfo[1])
135             BedSize.append(RoomDetailInfo[2])
136             IsAddBed.append(RoomDetailInfo[3])
137             CustomerNum.append(RoomDetailInfo[4])
138         else:
139             continue
140 
141     RoomInfo = np.array((RoomID, RoomName, LowPrice, RoomSize, RoomLevel, BedSize, IsAddBed, CustomerNum)).T
142     # Create a DataFrame object
143     # print(RoomInfo)
144     column_name = ['RoomID', 'RoomName', 'LowPrice', 'RoomSize', 'RoomLevel', 'BedSize', 'IsAddBed', 'CustomerNum']
145     df = pd.DataFrame(data=RoomInfo, columns=column_name)
146     print(df)
147 
148 
149 if __name__ == "__main__":
150 
151     # # 1. Scrap 5 star hotel list in Beijing
152     Scrap_hotel_lists()
153 
154     # 2. Scrap the detailed hotel information
155     df = pd.read_csv(filename, encoding='utf8')
156     print("1. Beijing 5 Star Hotel Lists")
157     print(df)
158     hotelID = df["id"]
159     print('\n')
160 
161     while True:
162         print("2.1 If you find to search the detail hotel information, please input the hotel index in the DataFrame.")
163         print("2.2 If you want to quit, input 'q'.")
164 
165         print("Please input the Parameter: ")
166         input_param = input()
167         if input_param.isnumeric():
168             hotel_index = int(input_param)
169             if 0 <= hotel_index <= 170:
170                 print("3. The detail information of the Hotel:")
171                 hotel_detail(hotelID[hotel_index])
172             else:
173                 print('Hotel Index out of range! ')
174                 print('Remember: 0 <= Hotel Index <= 170')
175                 print('Please input again.')
176                 continue
177         elif input_param == 'q':
178             print('See you later!')
179             break
180         else:
181             print('Invalid Input!')
182             print('\n')
183             continue

 

2.根据生成数据绘制酒店信息云图
from pyecharts import WordCloud
import random
name1 =hotel_list2
random_list =[296, 630, 。。。]

# 也可以通过一下三行生成随机整数列表
# for i in range(len(name1)):
#     #随机产生len(name1)个300-10000整数
#     random_list.append(random.randint(300,800))
# print('生成的随机整数列表为:\n',random_list)

value =random_list
wordcloud =WordCloud(width=1300, height=800)
wordcloud.add("酒店信息", name1, value, word_size_range=[10,20], shape='pentagon')
wordcloud.show_config()
wordcloud.render()

Note:以上纯属娱乐学习之用。

posted @ 2019-05-10 10:07  泊月居  阅读(4114)  评论(2编辑  收藏  举报