”城市公交线路站点数据爬取 + csv站点数据转ShapeFile矢量数据“(二)配置html页面 以及 抓包json数据
(一)在高德上申请api开发Key,完了配置html页面,我这里直接用了官方的demo页。
<html> <head> <script type="text/javascript" src="https://webapi.amap.com/maps?v=1.4.15&key=你的Key"></script> </head> <body> <div id="container"></div> <script type="text/javascript"> var map = new AMap.Map('container'); /* var map = new AMap.Map('container', { zoom:11,//级别 center: [116.397428, 39.90923],//中心点坐标 viewMode:'3D'//使用3D视图 }); */ </script> <style type="text/css"> #container {width:800px; height: 500px; } </style> <script type="text/javascript"> AMap.plugin(["AMap.LineSearch"], function() { //实例化公交线路查询类 var linesearch = new AMap.LineSearch({ pageIndex: 1, //页码,默认值为1 pageSize: 1, //单页显示结果条数,默认值为20,最大值为50 city: "北京", //限定查询城市,可以是城市名(中文/中文全拼)、城市编码,默认值为『全国』 extensions: "all" //是否返回公交线路详细信息,默认值为『base』 }); //执行公交路线关键字查询 linesearch.search('536', function(status, result) { //打印状态信息status和结果信息result console.log(status, result); }); }); </script>> </body> </html>
(二)抓包,解析,并存储为csv
这里需要把py文件和城市公交xlsx放在同一文件夹下。
import urllib from urllib import request import pandas as pd import json import time import random def extratStations(busListSlt): #keyName = busListSlt["key_name"] busName = busListSlt["name"] stationSet = [] stations = busListSlt["busstops"] for bs in stations: tmp = [] tmp.append(bs["id"]) #tmp.append(keyName) tmp.append(busName) tmp.append(bs["name"]) cor = bs["location"].split(",") tmp.append(cor[0]) tmp.append(cor[1]) stationSet.append(tmp) return stationSet def writeStation(listData,fileName): data1 = pd.DataFrame(listData) data1.to_csv("E:\\公交线路\\江苏省\\江苏点总量\\{}".format(fileName),index=False,na_rep="NULL",header=["ID","BusName","StationName","LON","LAT"]) def main(cityName): df = pd.read_excel("{}.xlsx".format(cityName),header = None) BaseUrl = "https://restapi.amap.com/v3/bus/linename?s=rsv3&extensions=all&key=你弟Key&output=json&pageIndex=1&offset=1&" headers = {'User-Agent':"你弟UA"} busStations = [] for bus in df[0]: params = { 'city':cityName, 'keywords':bus } print(bus) paramMerge = urllib.parse.urlencode(params) targetUrl = BaseUrl + paramMerge #print(targetUrl) req = urllib.request.Request(url=targetUrl,headers=headers) res = urllib.request.urlopen(req) content = res.read() jsonData = json.loads(content) #print(jsonData) if (jsonData["buslines"]): busList = jsonData["buslines"] busListSlt = busList[0] busStations += extratStations(busListSlt) #time.sleep(random.random() * random.randint(0,7) + random.randint(0,5)) else: continue writeStation(busStations, "{}点总量.csv".format(cityName)) listC = ["yancheng","yangzhou","zhenjiang"]#你弟城市公交csv for i in listC: main(i)
最终结果如图: