Python 爬取页面内容
import urllib.request import requests from bs4 import BeautifulSoup url = "http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2018/12/1201.html" headers = ("User-Agent","Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36") opener = urllib.request.build_opener() opener.addheaders = [headers] data = opener.open(url).read() content = data.decode('GB2312') soup = BeautifulSoup(content, 'html.parser') print(soup.find_all('a')) for link in soup.find_all('a'): print('url:',link.attrs['href']) print('text:',link.get_text('title'))