骑骡子赶猪  
安装   pip install requests-html

#2种方式爬取 博客园
from requests_html import HTMLSession

session=HTMLSession()

r=session.get('https://news.cnblogs.com/')
#通过CSS找到新闻标签
news=r.html.find('h2.news_entry a')

for i in news:
print(i.text) # 获得新闻标题
print(i.absolute_links) # 获得新闻链接


#通过xpath找到新闻标签
news=r.html.xpath('//div[@id="news_list"]//div[@class="content"]/h2/a')

for i in news:
print(i.text)
print(i.absolute_links)
posted on 2019-03-10 21:22  骑骡子赶猪  阅读(286)  评论(1编辑  收藏  举报