下载
pip install beautifulsoup4
pip install beautifulsoup4 -i http://pypi.douban.com/simple/
导入
from bs4 import BeautifulSoup
对象实例化
soup = BeautifulSoup(html_doc,'lxml')
print(type(soup))
# <class 'bs4.BeautifulSoup'>
prettify 方法
res = soup.prettify()
print(type(res))
find 方法
print(soup.title)
print(soup.div)
print(soup.p)
print(soup.a)
print(soup.img)
print(soup.asd)
print(soup.find('title'))
print(soup.title)
print(soup.find('div'))
find 方法 条件查找
print(soup.find('a',class_="cover"))
print(soup.find('p',class_="detail"))
print(soup.find('span',id="icp"))
print(soup.find('div',id="db-global-nav",class_="global-nav"))
print(soup.find('div',attrs={'id': "db-global-nav",'class': "global-nav"}))
find 方法 组合技
print(type(soup.find('a',class_="cover")))
print(soup.find('a',class_="cover").find('img'))
print(soup.find('a',class_="cover").img)
print(soup.find('a',class_="cover").img.attrs)
print(soup.find('a',class_="cover").img.attrs['src'])
print(soup.find('a',class_="cover").img['src'])
attrs 方法
print(soup.div.attrs)
print(soup.div.attrs['id'])
print(soup.div.attrs['class'])
print(soup.div['class'])
print(soup.div['id'])
{'id': 'db-global-nav', 'class': ['global-nav']}
db-global-nav
['global-nav']
['global-nav']
db-global-nav
文件写入
print(soup.find('a',class_="cover").img)
with open('a_img','w',encoding='utf-8') as f:
res = soup.find('a',class_="cover").img['src']
f.write(str(res))
获取文本 string、strings、text、get_text()、stripped_strings
print(soup.title.string)
print(soup.title.strings)
print(list(soup.title.strings))
print(soup.title.text)
print(soup.title.get_text())
print(soup.title.stripped_strings)
print(list(soup.title.stripped_strings))
结果:
新书速递
<generator object Tag._all_strings at 0x000002AD19BFBC80>
['新书速递']
新书速递
新书速递
<generator object PageElement.stripped_strings at 0x000002AD19BFBC80>
['新书速递']
获取文本 方法的区别
print(soup.find('div',class_="detail-frame"))
print(soup.find('div',class_="detail-frame").string)
print(soup.find('div',class_="detail-frame").strings)
print(list(soup.find('div',class_="detail-frame").strings))
print(soup.find('div',class_="detail-frame").text)
print(soup.find('div',class_="detail-frame").get_text())
print(soup.find('div',class_="detail-frame").stripped_strings)
print(list(soup.find('div',class_="detail-frame").stripped_strings))
find_all 方法
print(soup.find_all('img'))
print(soup.find_all('div',id= "db-global-nav",class_= "global-nav"))
print(soup.find_all('div',limit=2))
print(soup.find_all(['h2','img']))
select 方法
print(soup.select('img'))
print(soup.select('.cover'))
print(soup.select('#db-global-nav'))
print(soup.select('.cover-col-4.clearfix'))
print(soup.select('span[class="font-small color-lightgray"]'))
print(soup.select('.cover-col-4.clearfix > li img'))
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 被坑几百块钱后,我竟然真的恢复了删除的微信聊天记录!
· 【自荐】一款简洁、开源的在线白板工具 Drawnix
· 没有Manus邀请码?试试免邀请码的MGX或者开源的OpenManus吧
· 园子的第一款AI主题卫衣上架——"HELLO! HOW CAN I ASSIST YOU TODAY
· 无需6万激活码!GitHub神秘组织3小时极速复刻Manus,手把手教你使用OpenManus搭建本
2020-10-27 zabbix使用模版监控nginx服务
2020-10-27 zabbix4.0监控-图形-模版(三)