bs4数据解析基础
import requests
from bs4 import BeautifulSoup
if __name__ == "__main__":
# headers = {
# 'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'
# } # UA伪装
fp = open('./text.html','r',encoding='utf-8')
soup = BeautifulSoup(fp,'lxml') #将本地的html文档中的数据加载到该对象中
# print(soup)
# print(soup.input['href']) # soup.tagName 返回的是html文件第一次出现的tagname对应的标签
# print(soup.find('div')) # 等同于 soup.div
# print(soup.find('div',class_ = 'hzbtabs')) # 属性定位
# print(soup.find_all("div"))
# print(soup.select(".hzbtabs"))
# print(soup.select('.hzbbannertxt >a')[0]['href'])
作者:华王
博客:https://www.cnblogs.com/huahuawang/