网络爬虫基础练习
import requests from bs4 import BeautifulSoup ''' Practice ''' url = 'http://news.gzcc.cn/html/2018/xiaoyuanxinwen_0329/9129.html' re = requests.get(url) re.encoding = 'utf-8' # print(re) # print(re.text) soup = BeautifulSoup(re.text, 'html.parser') # print(soup.p) # print(soup.head) # print(soup.head.name) # print(soup.a.attrs) # print(soup.li.contents) # print(soup.li.text) # 输出所选标签中的字符串 # print(soup.li.a.string) # print(soup.select('div')) # print(soup.select('div[class="news-list-description"]')) ''' HomeWork ''' "找一个有h1的网站" h = soup.select('h1') print(h) a_href = soup.a.attrs print(a_href) l = soup.select('li') # print(l) # for i in l: # print(i.text) title = soup.select('.show-title') print(title[0].text) href = soup.select('a') t = soup.select('.show-info') print(t)