气象数据调研总结
1.判断是否为中文
def is_chinese(char): if '\u4e00' <= char <= '\u9fff': return True else: return False
2.逆地理编码---百度地图
AK = ''
lat = df['LATITUDE'].values[0] lon = df['LONGITUDE'].values[0] address = str(lat)+','+str(lon) url = 'https://api.map.baidu.com/reverse_geocoding/v3/?ak=%s&output=json&coordtype=wgs84ll&location=%s&extensions_town=%s'% (AK, address,'true') respose = requests.get(url) result = respose.json() station = df['NAME'].values[0] city = result['result']['addressComponent']['city']
3.逆地理编码---高德地图
lat = 36.18333 lon = 118.15 AK = '***' url = "https://restapi.amap.com/v3/geocode/regeo?output=json&location={0},{1}&key={2}&extensions=all".format(str(lon), str(lat),AK) res = requests.get(url) val = res.json()
4.requests爬虫
hd = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.81 Safari/537.36 SE 2.X MetaSr 1.0'}
r = requests.get("https://www.ncei.noaa.gov/pub/data/noaa/isd-lite/2023/",headers = hd)
html = r.text
soup = BeautifulSoup(html, "html.parser")
soup.prettify
5.爬取表格
table1 = soup.find_all('table',id = 'tab_100')
df1 = pd.read_html(table1[0].prettify())[0]