# 统一资源定位符 url
baseUrl ="https://www.doutula.com/article/list/?page="
headers ={
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.66 Safari/537.36',
'Cookie': 'Hm_lvt_2fc12699c699441729d4b335ce117f40=1607422895; _agep=1607422895; _agfp=71f4f425ea56fba0af9404dcbd130fd1; _agtk=4a5f8ee4d308915b44de47a4c1e0916a; Hm_lpvt_2fc12699c699441729d4b335ce117f40=1607423412',
}
for i in range(301):
#获取300页的逗图
url = baseUrl+str(i)
html_str = requests.get(url,headers=headers).text
#获取字符串格式的html 注意字符串是用的text
image_urls = re.findall(r'data-original="(.*?)"',html_str)
print(image_urls)
for image_url in image_urls:
print(image_url)
image = requests.get(image_url, headers=headers).content
# 获取2进制文件使用的是content
image_names = image_url.split('/')[-1]
with open(f'./doutu/{image_names}','wb') as file:
打开当前路径下的doutu文件夹,并存储文件
file.write(image)