网络爬虫基础练习
2018-03-29 12:53 129赖锐扬 阅读(162) 评论(0) 编辑 收藏 举报import requests url = 'http://localhost:63342/bd/gouxueyuan.html?_ijt=kn4osq2f4cqos8pf8vjvmkrah7' res = requests.get(url) res.encoding = 'utf-8' from bs4 import BeautifulSoup soup = BeautifulSoup(res.text,'html.parser') print(soup.select('h1')[0].text) for link in soup.select('a'): print(link.get('href')) for i in soup.select('li'): print(i.contents) print(soup.select('.news-list-title')[0].text) print(soup.select('li')[1].a.attrs['href']) print(soup.select('.news-list-info')[0].contents[0].text) print(soup.select('.news-list-info')[0].contents[1].text)