爬取url网页里面所有的图片,把url填上就能用了,下面的自己看着改改进行筛选。
import requests import re import time import random import os def Find(string): # findall() 查找匹配正则表达式的字符串 st = re.findall('src="(.*?)"',string,re.S) return st url = '' user_agent= "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36" headers = {"User-Agent":user_agent} headers2 = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36 ", } name = 'xxx' os.chdir('D:\\temp') os.makedirs(name) os.chdir('D:\\temp' + '\\' + name) html_r = requests.get(url,headers = headers2) #print(html_r.text, type(html_r.text)) lis = Find(html_r.text) print(len(lis), lis) i = 0 for ele in lis: i += 1 if len(ele) > 20 and ele.startswith('https://'): img_url = ele img = requests.get(img_url, headers = headers2, stream=True) savename = name + str(i) + '.jpg' with open(savename, 'wb') as w: w.write(img.content) time.sleep(random.randrange(5, 8))