beautifulsoup4模块
Beautifulsoup4模块
简称BS4
from bs4 import BeautifulSoup soup = BeautifulSoup(content,'html.parser') #1.生成一个BeautifulSoup对象 for tag in soup.find_all(): #针对script标签 应该直接删除 if tag.name == 'script': tag.decompose() #将符合条件的标签删除 desc = soup.text[0:150] #截取文章简介 150个文本内容
万般皆下品,唯有读书高!