python爬虫(12)获取七天内的天气
通过python脚本,来获取最近的天气情况
这样就不必每次打开网站查询天气了
思路分析:
1.决定好要抓取哪个城市的天气情况,获取天气预报网站的页面内容
2.匹配网页内容中天气情况
#!/usr/bin/env python
# coding:utf-8
import urllib2,re
from bs4 import BeautifulSoup
import sys
reload(sys)
sys.setdefaultencoding('utf-8')
#导入使用方法模块
#这是北京市天气地址,可以换成别的市区
url="http://www.weather.com.cn/weather/101010100.shtml"
def getHtml(url):
try:
print url
html = urllib2.urlopen(url).read()#.decode('utf-8')#解码为utf-8
except:
return
return html
def getWeatherReport(html):
if not html:
print 'nothing can be found'
return
soup=BeautifulSoup(html,'lxml')
try:
items=soup.find("ul",{"class":"t clearfix"})
result = re.sub(re.compile('\n+'),"\n",str(items.text))
print result
except:
print "something was wrong"
return
return result
html=getHtml(url)
getWeatherReport(html)