简单Lora爬虫
demo
# @Time : 2023/7/21
# @Author : bgm
# @File : demo.py
import time
from random import random
import re
import requests
class EsheepModelCollector:
BASE_URL = 'https://api.esheep.com/gateway/model/list'
INVALID_CHARS = ':*?"<>|\\/'
HEADERS = {
'origin': 'https://www.esheep.com',
'referer': 'https://www.esheep.com/',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36',
}
COOKIES = {
'buvid': '415DC444-806E-82F5-4753-BA65DE586D5325503infoc',
}
def __init__(self):
self.params = {
'limit': '20',
'mode': '3',
# 'mode': '1',
# 'next': None
}
def __remove_invalid_chars(self, name):
for char in self.INVALID_CHARS:
name = name.replace(char, '_')
return name
def __download_image(self, img_url, name):
name = self.__remove_invalid_chars(name)
filename = f"img_url/_test{name}.{'png' if 'png' in img_url else 'jpg'}"
with requests.get(img_url, headers=self.HEADERS) as res:
with open(filename, 'wb') as f:
f.write(res.content)
print(f'{name}已保存!')
def collect(self):
with requests.Session() as session:
while True:
response = session.get(url=self.BASE_URL, params=self.params, cookies=self.COOKIES, headers=self.HEADERS)
infos = response.json()['data']
Next, IsEnd = infos['cursor']['Next'], infos['cursor']['IsEnd']
print(f"打印=={Next}-----{IsEnd}")
items = infos['items']
img_urls = [re.findall(r"'url': '(.*?)', 'height'", repr(item))[0] for item in items]
names = [item['model']['name'] for item in items]
for img_url, name in zip(img_urls, names):
print(img_url)
self.__download_image(img_url, name)
if IsEnd:
break
self.params['next'] = Next
time.sleep(random())
if __name__ == '__main__':
collector = EsheepModelCollector()
collector.collect()
本文来自博客园,作者:__username,转载请注明原文链接:https://www.cnblogs.com/code3/p/17572776.html