网络爬虫基础练习
import requests from bs4 import BeautifulSoup newsurl='http://news.gzcc.cn/html/xiaoyuanxinwen/' res = requests.get(newsurl) res.encoding='utf-8' soup = BeautifulSoup(res.text,'html.parser') h1=soup.h1 if(h1!=None): print(h1.text) else: print("不存在h1标签") a=soup.a if(a!=None): print(a.attrs.get('href')) else: print("不存在a标签") li_list=soup.select("li") for i in soup.select("li"): print(i.text) if(len(li_list)==0): print("不存在li标签") li=soup.select_one(".news-list").select_one("li") print(li.select_one(".news-list-title").text) print(li.a.attrs.get('href')) print(li.select_one(".news-list-info").contents[0].text) print(li.select_one(".news-list-info").contents[1].text)