python3 spider [ urllib.request ]
# # 导入urllib库的urlopen函数 # from urllib.request import urlopen # # 发出请求,获取html # html = urlopen("https://www.baidu.com/") # # 获取的html内容是字节,将其转化为字符串 # html_text = bytes.decode(html.read()) # # 打印html内容 # print(html_text) from urllib.request import urlopen, urlretrieve from bs4 import BeautifulSoup as bf html = urlopen("https://www.baidu.com/") obj = bf(html.read(), 'html.parser') title = obj.head.title logo_pic_info = obj.find_all('img', class_="index-logo-src") logo_url = "https:" + logo_pic_info[0]['src'] # download the image urlretrieve(logo_url, 'logo.png')
posted on 2021-06-10 16:00 Karlkiller 阅读(47) 评论(0) 编辑 收藏 举报