这爬虫写的导出报错

import os
import re
import requests
if not os.path.exists('./qiutuLibs'):
os.mkdir('./qiutuLibs')

url = 'https://www.qiushibaike.com/imgrank/' # '//www.qiushibaike.com/imgrank/ '

headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36'}

# 提取图片的url

response = requests.get(url = url, headers = headers).text
print(response)
img_re = '<div class="thumb">.*?<img src="(.*?)" alt=.*?</div>'# img不要写闭包> # 正则匹配外面用小括号,里面的匹配项class = ""还有需匹配提取的
# 一定要用大括号 '<div class="thumb">.*?<img src= "(.*?)" alt=.?*></div>'
# '<div class="thumb">.*?<img src="(.*?)" alt=.*?</div>'
# 这个正则写好容易错,该不该加''
img_list = re.findall(img_re, response, re.S) # img_list = re.findall(img_re, response, re.S) 这句话匹配为空
print(img_list)
# re.S用于匹配换行
print('这下面都不运行了')
for i in img_list:
url_img = 'https:' + i # 这里没有加https:没加:
img_content = requests.get(url=url_img,headers=headers).content
img_name = i.split('/')[-1]
img_path = './qiutuLibs/' + img_name # 表示上一级目录的文件夹
with open(img_path,'wb') as fp: # 加了encoding='utf-8' TypeError: write() argument must be str, not bytes
# 要以二进制形式写入
fp.write(img_content)
print('下载成功')

posted @ 2021-06-30 21:52  索匣  阅读(29)  评论(0编辑  收藏  举报