网络爬虫基础练习

import requests
url='http://localhost:63342/new/036.html?_ijt=55599hg223a9s359i3e3f9kdku'
res=requests.get(url)
res.encoding='utf-8'


from bs4 import BeautifulSoup
soup=BeautifulSoup(res.text,'html.parser')
soups=soup.select('h1')[0].text
print(soups)

soupa=soup.a.attrs
print(soupa['href'])

soupli=soup.li.text
print(soupli)

soupzong=[]
soupzong.append(soup.select('.news-list-title')[0].text)
soupzong.append(soup.select('a')[1].attrs['href'])
soupzong.append(soup.li.a.div.span.text)
soupzong.append(soup.select('.news-list-info')[0].contents[1].text)
for i in soupzong:
    print(i)

  

posted @ 2018-03-29 16:42  Polvem  阅读(152)  评论(0编辑  收藏  举报