百度爬取经纬度(百度地图的经纬度是存在偏移加密的)

#!/usr/bin/env python
# coding: utf-8
#数据爬取
import requests
from fake_useragent import UserAgent
import pandas as pd
#import xlrd
import numpy as np
from urllib.parse import quote
import re
from time import sleep
from random import randint
import random
# In[2]:
place_name = pd.read_excel('企业信息获取.xlsx')
hangshu = place_name.shape[0]
leishu = place_name.shape[1]
place_name
# In[ ]:
class Url_Mnger:
# hangshu = place_name.shape[0]
# leishu = place_name.shape[1]
def Url_join(self,hangshu):
all_url = []
for i in range(hangshu): #长度
#宽度
village_name = place_name.iloc[i,1]
place_encode = quote(village_name)
url = 'http://api.map.baidu.com/geocoder?address={}'.format(place_encode)
print(village_name,url)
all_url.append(url)
return all_url
#请求发送类
class Response_Cast(object):
def Get_response(self,url):
headers={
'User-Agent':UserAgent().chrome
}
response = requests.get(url=url,headers = headers)
return response.text
#数据管理类
class Info_Manger:
def Parse_html(self,info_text): #解析
latitude=re.findall(r'<lat>(.+)</lat>',info_text)
longitude = re.findall('<lng>(.+)</lng>',info_text)
latitude = latitude[0]
longitude = longitude[0]
print(latitude,longitude)
return latitude,longitude
# def Data_join(self,latitude,longitude):
# lat.append(latitude)
# longi.append(longitude)
# return lat,longi
def Make_dataform(self,lat,longi):
df = pd.DataFrame({'a_point':place_name.iloc[:,1],'a_lat':lat,'a_longi':longi})
return df
#数据保存
def Savedata(self,df):
df.to_csv('geo_data_gaode_quchong.csv',encoding='GBK')
# with open('geo.csv','w',encoding = 'utf-8') as f:
# f.write(df)
# f.close()
class Run_Scrapy:
def __init__(self):
url_manger = Url_Mnger()
url_list = url_manger.Url_join(hangshu)
url_list_length = len(url_list)
response_cast = Response_Cast()
info_manger = Info_Manger()
lat = []
longi = []
# print(url_list)
for url,j in zip(url_list, range(url_list_length)):
print(j,'/',url_list_length)
sleep(random.uniform(1, 1.5))
if (j % 100) == 0 :
# sleep(random.uniform(3, 10))
response_info = response_cast.Get_response(url)
info_latitude, info_longitude = info_manger.Parse_html(response_info)
lat.append(info_latitude)
longi.append(info_longitude)
# print(lat,longi)
else:
response_info = response_cast.Get_response(url)
info_latitude,info_longitude = info_manger.Parse_html(response_info)
lat.append(info_latitude)
longi.append(info_longitude)
# print(lat,longi)
# if (j % 100) == 10 :
# make_dataform = info_manger.Make_dataform(lat,longi)
# info_manger.Savedata(make_dataform)
make_dataform = info_manger.Make_dataform(lat,longi)
info_manger.Savedata(make_dataform)
if __name__ == '__main__':
Run_Scrapy
posted @   kuanleung  阅读(8)  评论(0编辑  收藏  举报  
相关博文:
阅读排行:
· 分享一个免费、快速、无限量使用的满血 DeepSeek R1 模型,支持深度思考和联网搜索!
· 基于 Docker 搭建 FRP 内网穿透开源项目(很简单哒)
· ollama系列01:轻松3步本地部署deepseek,普通电脑可用
· 25岁的心里话
· 按钮权限的设计及实现
点击右上角即可分享
微信分享提示