Python——使用高德API获取指定城指定类别POI并实现XLSX文件合并

# 以下内容为原创,转载请注明出处
1
import xlrd # 读xlsx 2 import xlsxwriter # 写xlsx 3 import urllib.request # url请求,Python3自带,Python2与3中urllib的区别见:http://blog.csdn.net/Jurbo/article/details/52313636 4 import os # 创建output文件夹 5 import glob # 获取文件夹下文件名称 6 import time # 记录时间 7 import json # 读取json格式文件 8 9 # 本函数完成文件合并。单独保存是为了1.方便不同的用途;2.减少内存;3.在应用到其他网站时,可以断点续爬,降低中断风险 10 def xlsx_merge(folder,header,filename): 11 fileList = [] 12 for fileName in glob.glob(folder + "*.xlsx"): 13 fileList.append(fileName) 14 fileNum = len(fileList) 15 matrix = [None] * fileNum 16 for i in range(fileNum): 17 fileName = fileList[i] 18 workBook = xlrd.open_workbook(fileName) 19 try: 20 sheet = workBook.sheet_by_index(0) 21 except Exception as e: 22 print(e) 23 nRows = sheet.nrows 24 matrix[i] = [0]*(nRows - 1) 25 nCols = sheet.ncols 26 for m in range(nRows - 1): 27 matrix[i][m] = ["0"]* nCols 28 for j in range(1,nRows): 29 for k in range(nCols): 30 matrix[i][j-1][k] = sheet.cell(j,k).value 31 fileName = xlsxwriter.Workbook(folder + filename + ".xlsx") 32 sheet = fileName.add_worksheet("merged") 33 for i in range(len(header)): 34 sheet.write(0,i,header[i]) 35 rowIndex = 1 36 for fileIndex in range(fileNum): 37 for j in range(len(matrix[fileIndex])): 38 for colIndex in range (len(matrix[fileIndex][j])): 39 sheet.write(rowIndex,colIndex,matrix[fileIndex][j][colIndex]) 40 rowIndex += 1 41 print("已完成%d个文件的合并"%fileNum) 42 fileName.close() 43 44 # 本函数完成获取POI 45 def poi_by_adcode_poicode(folder,city_file = "city",poi_file = "poi",result_file = "result",merge_or_not = 1): 46 '''要求:文件为XLSX格式表格。 47 列表均在表格的第一页,且第一列为名称,第二列为编码。 48 因为是创建目录,folder要求不为根目录。 49 要求输入表在目录的input文件夹中,本函数将结果输出到目录下的output文件夹下 50 ''' 51 city_file = city_file 52 poi_file = poi_file 53 result_file = result_file 54 merge_or_not = merge_or_not 55 header_full = ["id","name","type","typecode","biz_type","address","location","tel","pname","cityname","adname","rating","cost"] 56 header = ["id","name","type","typecode","biz_type","address","location","tel","pname","cityname","adname"] 57 offset = 25 # 实例设置每页展示10条POI(官方限定25条) 58 output_folder = folder + "output/" 59 # 创建输出路径 60 if os.path.isdir(output_folder): 61 pass 62 else: 63 os.makedirs(output_folder) 64 # 读取列表 65 city_sheet = xlrd.open_workbook(folder+ "input/" + city_file + ".xlsx").sheet_by_index(0) 66 poi_type_sheet = xlrd.open_workbook(folder+ "input/" + poi_file + ".xlsx").sheet_by_index(0) 67 city_list =city_sheet.col_values(0) 68 city_code_list = city_sheet.col_values(1) 69 poi_type_list = poi_type_sheet.col_values(1) 70 # 指示工作完成量 71 total_work = (city_sheet.nrows - 1) * (poi_type_sheet.nrows - 1) 72 work_index = 1 73 print(time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time())) + ":抓取开始!") 74 for city_index in range(1,len(city_list)): 75 for poi_type_index in range(1,len(poi_type_list)): 76 workbook =xlsxwriter.Workbook(output_folder + str(city_list[city_index]) + str(poi_type_list[poi_type_index]) + ".xlsx") # 新建工作簿 77 sheet = workbook.add_worksheet("result") # 新建“poiResult”的工作表 78 for col_index in range(len(header_full)): 79 sheet.write(0,col_index,header_full[col_index]) # 写表头 80 row_index = 1 81 for page_index in range(1, 101): 82 try: 83 url = "http://restapi.amap.com/v3/place/text?&keywords=&types=" + str(poi_type_list[poi_type_index]) + "&city=" + city_code_list[city_index] + "&citylimit=true&offset=" + str(offset) + "&page="+ str(page_index) + "&key=你的key&extensions=all" 84 # 请求的结构化url地址如上,见:http://lbs.amap.com/api/webservice/guide/api/search/ 85 data = json.load(urllib.request.urlopen(url))["pois"] 86 for i in range(offset): 87 for col_index in range(len(header)): 88 sheet.write(row_index, col_index, str(data[i][header[col_index]])) 89 sheet.write(row_index,len(header),str(data[i]["biz_ext"]["rating"])) 90 sheet.write(row_index,len(header) + 1,str(data[i]["biz_ext"]["cost"])) 91 row_index += 1 92 except Exception: 94 break 95 workbook.close() 96 print(time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time())) + "" + city_list[city_index] + " " + poi_type_list[poi_type_index] + " 已获取!进度:%.2f%%" %(work_index / total_work *100)) 97 work_index += 1 98 print(time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time())) + ":所有地区各类别POI获取完毕") 99 if merge_or_not == 1: 100 xlsx_merge(output_folder, header_full, result_file) 101 print(time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time())) + ":已对文件进行合并!") 102 else: 103 print(time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time())) + ":未进行合并!") 104 print(time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time())) + ":所有工作完成!") 105 106 # 使用 107 poi_by_adcode_poicode("E:/test/","city", "poi", "result", 1)

 

posted @ 2017-11-17 14:32  HsiehTengK`o  阅读(1940)  评论(0编辑  收藏  举报