html_doc = """
<html><head><title>The Dormouse's story</title></head>
<body>
<p class="title"><b>The Dormouse's story</b></p>
<p class="story">Once upon a time there were three little sisters; and their names were
<a href="http://example.com/elsie" class="sister" id="link1">Elsie</a>,
<a href="http://example.com/lacie" class="sister" id="link2">Lacie</a> and
<a href="http://example.com/tillie" class="sister" id="link3">Tillie</a>;
and they lived at the bottom of a well.</p>
<p class="story">...</p>
"""from bs4 import BeautifulSoup
soup = BeautifulSoup(html_doc, 'html.parser')
'\n'"The Dormouse's story"'\n''\n'"The Dormouse's story"'\n''Once upon a time there were three little sisters; and their names were\n''Elsie'',\n''Lacie'' and\n''Tillie'';\nand they lived at the bottom of a well.''\n''...''\n'
使用 .stripped_strings 可以去除多余空白内容:
# 使用 .stripped_strings 可以去除多余空白内容:for string in soup.stripped_strings:
print(repr(string))
输出为:
"The Dormouse's story""The Dormouse's story"'Once upon a time there were three little sisters; and their names were''Elsie'',''Lacie''and''Tillie'';\nand they lived at the bottom of a well.''...'
for sibling in soup.a.next_siblings:
print(repr(sibling))
for sibling in soup.find(id="link3").previous_siblings:
print(repr(sibling))
输出为:
',\n'
<a class="sister" href="http://example.com/lacie"id="link2">Lacie</a>
' and\n'
<a class="sister" href="http://example.com/tillie"id="link3">Tillie</a>
';\nand they lived at the bottom of a well.'' and\n'
<a class="sister" href="http://example.com/lacie"id="link2">Lacie</a>
',\n'
<a class="sister" href="http://example.com/elsie"id="link1">Elsie</a>
'Once upon a time there were three little sisters; and their names were\n'
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 阿里最新开源QwQ-32B,效果媲美deepseek-r1满血版,部署成本又又又降低了!
· 开源Multi-agent AI智能体框架aevatar.ai,欢迎大家贡献代码
· Manus重磅发布:全球首款通用AI代理技术深度解析与实战指南
· 被坑几百块钱后,我竟然真的恢复了删除的微信聊天记录!
· 没有Manus邀请码?试试免邀请码的MGX或者开源的OpenManus吧