网络爬虫基础练习

import requests
from bs4 import BeautifulSoup
newsurl='http://news.gzcc.cn/html/xiaoyuanxinwen/'
res = requests.get(newsurl)
res.encoding='utf-8'
soup = BeautifulSoup(res.text,'html.parser')

h1=soup.h1
if(h1!=None):
    print(h1.text)
else:
    print("不存在h1标签")

a=soup.a
if(a!=None):
    print(a.attrs.get('href'))
else:
    print("不存在a标签")

li_list=soup.select("li")
for i in soup.select("li"):
    print(i.text)
if(len(li_list)==0):
    print("不存在li标签")

li=soup.select_one(".news-list").select_one("li")
print(li.select_one(".news-list-title").text)
print(li.a.attrs.get('href'))
print(li.select_one(".news-list-info").contents[0].text)
print(li.select_one(".news-list-info").contents[1].text)

 

posted @ 2018-03-29 13:49  130-张煌  阅读(157)  评论(0编辑  收藏  举报