01爬虫 爬取糗事百科段子
1 # -*- coding:utf-8 -*- 2 import urllib2 3 import re 4 for page in range(1,5): 5 url = 'http://www.qiushibaike.com/hot/page/' + str(page) 6 user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)' 7 headers = { 'User-Agent' : user_agent } 8 try: 9 request = urllib2.Request(url,headers = headers) 10 response = urllib2.urlopen(request) 11 content = response.read().decode('utf-8') 12 pattern = re.compile('<span>(.*?)</span>',re.S) 13 items = re.findall(pattern,content) 14 for item in items: 15 print item 16 except urllib2.URLError, e: 17 if hasattr(e,"code"): 18 print e.code 19 if hasattr(e,"reason"): 20 print e.reason
【推荐】还在用 ECharts 开发大屏?试试这款永久免费的开源 BI 工具!
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步