# css选择器
#1 css选择器
# 重点
# Tag对象.select("css选择器")
# #ID号
# .类名
# div>p:儿子 和div p:子子孙孙
# 找div下最后一个a标签 div a:last-child
# css选择器,xpath选择器会用了,它就是个通行证(所有的都可以不会,会粘贴就行)
# bs4:自己的选择器,css选择器
# lxml:css选择器,xpath选择器
# selenium:自己的选择器,css选择器,xpath选择器
# scrapy框架:自己的选择器,css选择器,xpath选择器
# #select('.article')
html_doc = """
<html><head><title>The Dormouse's story</title></head>
<p class="title">
<b>The Dormouse's story</b>
Once upon a time there were three little sisters; and their names were
<a href="http://example.com/elsie" class="sister" id="link1">
<a href="http://example.com/lacie" class="sister" id="link2">Lacie</a> and
<a href="http://example.com/tillie" class="sister" id="link3">Tillie</a>;
<div class='panel-1'>
<ul class='list' id='list-1'>
<li class='element'>Foo</li>
<li class='element'>Bar</li>
<li class='element'>Jay</li>
<ul class='list list-small' id='list-2'>
<li class='element'><h1 class='yyyy'>Foo</h1></li>
<li class='element xxx'>Bar</li>
<li class='element'>Jay</li>
and they lived at the bottom of a well.
<p class="story">...</p>
from bs4 import BeautifulSoup
print(soup.select('.sister span'))
print(soup.select('#link1 span'))
print(soup.select('#list-2 .element.xxx'))
print(soup.select('#list-2')[0].select('.element')) #可以一直select,但其实没必要,一条select就可以了
# 2、获取属性
print(soup.select('#list-2 h1')[0].attrs)
# 3、获取内容
print(soup.select('#list-2 h1')[0].get_text())
Only you can control your future
You're not alone. You still have family,peopel who care for you and want to save you.