利用BeautifulSoup去除HTML指定标签和去除注释
去除指定标签
from bs4 import BeautifulSoup
#去除属性ul
[s.extract() for s in soup("ul")]
# 去除属性svg
[s.extract() for s in soup("svg")]
# 去除属性script
[s.extract() for s in soup("script")]
去除注释
from bs4 import BeautifulSoup, Comment
#去除注释
comments = soup.findAll(text=lambda text: isinstance(text, Comment))
[comment.extract() for comment in comments]