爬取新浪网的标题和网址
import requests from bs4 import BeautifulSoup url = 'https://news.sina.com.cn/china' #爬取的新闻网址 res = requests.get(url) res.encoding="UTF-8" # 使用剖析器为html.parser soup = BeautifulSoup(res.text, 'html.parser') print ('开始爬取') #print(len(soup.select("li"))) for news in soup.select("a"): if len(news.text) > 5: title =news.text href = news['href'] print (title,href)