| |
| |
| |
| import requests |
| from fake_useragent import UserAgent |
| import pandas as pd |
| |
| import numpy as np |
| from urllib.parse import quote |
| import re |
| from time import sleep |
| from random import randint |
| import random |
| |
| |
| |
| |
| |
| place_name = pd.read_excel('企业信息获取.xlsx') |
| hangshu = place_name.shape[0] |
| leishu = place_name.shape[1] |
| place_name |
| |
| |
| |
| |
| |
| |
| class Url_Mnger: |
| |
| |
| def Url_join(self,hangshu): |
| all_url = [] |
| for i in range(hangshu): |
| |
| village_name = place_name.iloc[i,1] |
| place_encode = quote(village_name) |
| url = 'http://api.map.baidu.com/geocoder?address={}'.format(place_encode) |
| print(village_name,url) |
| all_url.append(url) |
| return all_url |
| |
| class Response_Cast(object): |
| def Get_response(self,url): |
| headers={ |
| 'User-Agent':UserAgent().chrome |
| } |
| response = requests.get(url=url,headers = headers) |
| return response.text |
| |
| class Info_Manger: |
| def Parse_html(self,info_text): |
| latitude=re.findall(r'<lat>(.+)</lat>',info_text) |
| longitude = re.findall('<lng>(.+)</lng>',info_text) |
| latitude = latitude[0] |
| longitude = longitude[0] |
| print(latitude,longitude) |
| return latitude,longitude |
| |
| |
| |
| |
| |
| |
| def Make_dataform(self,lat,longi): |
| df = pd.DataFrame({'a_point':place_name.iloc[:,1],'a_lat':lat,'a_longi':longi}) |
| return df |
| |
| def Savedata(self,df): |
| df.to_csv('geo_data_gaode_quchong.csv',encoding='GBK') |
| |
| |
| |
| |
| class Run_Scrapy: |
| def __init__(self): |
| url_manger = Url_Mnger() |
| url_list = url_manger.Url_join(hangshu) |
| url_list_length = len(url_list) |
| response_cast = Response_Cast() |
| info_manger = Info_Manger() |
| lat = [] |
| longi = [] |
| |
| for url,j in zip(url_list, range(url_list_length)): |
| print(j,'/',url_list_length) |
| sleep(random.uniform(1, 1.5)) |
| if (j % 100) == 0 : |
| |
| response_info = response_cast.Get_response(url) |
| info_latitude, info_longitude = info_manger.Parse_html(response_info) |
| lat.append(info_latitude) |
| longi.append(info_longitude) |
| |
| else: |
| response_info = response_cast.Get_response(url) |
| info_latitude,info_longitude = info_manger.Parse_html(response_info) |
| lat.append(info_latitude) |
| longi.append(info_longitude) |
| |
| |
| |
| |
| make_dataform = info_manger.Make_dataform(lat,longi) |
| info_manger.Savedata(make_dataform) |
| |
| if __name__ == '__main__': |
| Run_Scrapy |
| |
| |
| |
| |
| |
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 分享一个免费、快速、无限量使用的满血 DeepSeek R1 模型,支持深度思考和联网搜索!
· 基于 Docker 搭建 FRP 内网穿透开源项目(很简单哒)
· ollama系列01:轻松3步本地部署deepseek,普通电脑可用
· 25岁的心里话
· 按钮权限的设计及实现