python3 spider [ urllib.request ]

# # 导入urllib库的urlopen函数
# from urllib.request import urlopen
# # 发出请求，获取html
# html = urlopen("https://www.baidu.com/")
# # 获取的html内容是字节，将其转化为字符串
# html_text = bytes.decode(html.read())
# # 打印html内容
# print(html_text)


from urllib.request import urlopen, urlretrieve
from bs4 import BeautifulSoup as bf

html = urlopen("https://www.baidu.com/")
obj = bf(html.read(), 'html.parser')
title = obj.head.title

logo_pic_info = obj.find_all('img', class_="index-logo-src")

logo_url = "https:" + logo_pic_info[0]['src']

# download the image
urlretrieve(logo_url, 'logo.png')

posted on 2021-06-10 16:00 Karlkiller 阅读(47) 评论(0) 编辑收藏举报

刷新页面返回顶部

python3 spider [ urllib.request ]

导航

公告