python3抓取中国天气网不同城市7天、15天实时数据
思路:
1、根据city.txt文档来获取不同城市code
2、获取中国天气网7d和15d不同城市url
3、利用requests库请求url获取html内容
4、利用beautifulsoup获取7d和15d指定天气数据
5、将获取的天气数据保存到csv文件中
# -*- coding: utf-8 -*- import requests from bs4 import BeautifulSoup import csv ''' 获取不同城市code ''' def get_citycode(city_name): with open('city.txt', 'r', encoding='UTF-8') as fs: lines = fs.readlines()#一次读取整个文件内容,且自动分成一行列表,readline()每次只读取一行 for line in lines: if(city_name in line): code = line.split('=')[0].strip()#每行去掉头尾空格,且用“=”分隔出code和cityname,返回字符串列表 return code raise ValueError('invalid city name')#抛出异常 ''' 获取不同城市7天url ''' def get_7d_url(city_name): url = 'http://www.weather.com.cn/weather/' code = get_citycode(city_name) return url + code + '.shtml' ''' 获取不同城市15天url ''' def get_15d_url(city_name): url = 'http://www.weather.com.cn/weather15d/' code = get_citycode(city_name) return url + code + '.shtml' '''' 获取html内容 ''' def get_content(url, data=None): rep = requests.get(url, timeout=60) rep.encoding = 'utf-8' return rep.text ''' 获取7天指定数据 ''' def get_7d_data(htmltext, city): content = [] bs = BeautifulSoup(htmltext, "html.parser") body = bs.body data = body.find('div', {'id': '7d'}) ul = data.find('ul') li = ul.find_all('li') for day in li: line = [city] date = day.find('h1').string p = day.find_all('p') text = p[0].string if p[1].find('span') is None: temperature_H = None else: temperature_H = p[1].find('span').string temperature_L = p[1].find('i').string wind_force = p[2].find('i').string line.append(date) line.append(text) line.append(temperature_H) line.append(temperature_L) line.append(wind_force) content.append(line) return content ''' 获取15天指定数据 ''' def get_15d_data(htmltext, city): content = [] bs = BeautifulSoup(htmltext, "html.parser") body = bs.body data = body.find('div', {'id': '15d'}) ul = data.find('ul') li = ul.find_all('li') for day in li: line = [city] span = day.find_all('span') date = span[0].string text = span[1].string if span[2].find('em') is None: temperature_H = None else: temperature_H = span[2].find('em').string temperature_L = span[2].string wind_direction = span[3].string wind_force = span[4].string line.append(date) line.append(text) line.append(temperature_H) line.append(temperature_L) line.append(wind_direction) line.append(wind_force) content.append(line) return content ''' 保存获取到的天气数据 csv文件 ''' def save_data(data, filename): with open(filename, 'a', errors='ignore', newline='') as f: #newline=" "是为了避免写入之后有空行 f_csv = csv.writer(f) f_csv.writerows(data)#数据整行写入csv文件中 ''' 爬取7天天气数据 ''' def _7d(city): url = get_7d_url(city) html = get_content(url) result = get_7d_data(html,city) save_data(result, 'E:\weather.csv') ''' 爬取15天天气数据 ''' def _15d(city): url = get_15d_url(city) html = get_content(url) result = get_15d_data(html,city) save_data(result, 'E:\weather.csv') if __name__ == '__main__': cities = input('city name: ').split(' ') # 键盘输入城市,用空格分隔开 for city in cities: _7d(city) _15d(city)
附:city.txt 获取地址:https://pan.baidu.com/s/1VNW8AJi6_zo7mP_90lTkiA 提取码:red5