爬取新浪网的标题和网址

 

 

import requests
from bs4 import BeautifulSoup

url = 'https://news.sina.com.cn/china' #爬取的新闻网址
res = requests.get(url)
res.encoding="UTF-8"
# 使用剖析器为html.parser
soup = BeautifulSoup(res.text, 'html.parser')

print ('开始爬取')
#print(len(soup.select("li")))

for news in soup.select("a"):
    if len(news.text) > 5:
        title =news.text
        href = news['href']
        print (title,href)

 

posted on 2020-03-11 09:21  一往无前!  阅读(242)  评论(0编辑  收藏  举报