import requests from bs4 import BeautifulSoup newsurl='http://localhost:63342/untitled/3/29.html?_ijt=ltocl4v68kb1po4608e3291lkm' res=requests.get(newsurl) res.encoding="utf-8" soup=BeautifulSoup(res.text,'html.parser') print(res.text) print("1-----------------") print(soup.div) print("2-----------------") print(soup.p) #标签名,返回第一个 print("3-----------------") print(soup.head) print("4-----------------") print(soup.p.name) #字符串 print("5-----------------") print(soup.p. attrs) #字典,标签的所有属性 print("6-----------------") print(soup.p. contents) # 列表,所有子标签 print("7-----------------") print(soup.p.text) #字符串 print("8-----------------") print(soup.p.string) print("9-----------------") print(soup.select('li')) print("10-----------------") print(soup.select('#p1Node')) print("11-----------------") print(soup.select('.news-list-title')) print("12-----------------") # 取出h1标签的文本 print(soup.h1.text) print("13-----------------") # 取出a标签的链接 print(soup.a.attrs['href']) print("14-----------------") # 取出所有li标签的所有内容 for i in soup.select('li'): print(i.contents)
print("15-----------------") print(soup.select('.news-list-title')[0].text) print(soup.li.a.attrs['href']) print(soup.select('.news-list-info')[0].contents[0].text) print(soup.select('.news-list-info')[0].contents[1].text)