利用BeautifulSoup去除HTML指定标签和去除注释

去除指定标签

from bs4 import BeautifulSoup
#去除属性ul
[s.extract() for s in soup("ul")]
# 去除属性svg
[s.extract() for s in soup("svg")]
# 去除属性script
[s.extract() for s in soup("script")]

去除注释

from bs4 import BeautifulSoup, Comment

 #去除注释
comments = soup.findAll(text=lambda text: isinstance(text, Comment))
[comment.extract() for comment in comments]

posted @ 2018-11-05 01:53  qq874455953  阅读(2564)  评论(0编辑  收藏  举报