简单Lora爬虫

demo

# @Time    : 2023/7/21
# @Author  : bgm
# @File    : demo.py
import time
from random import random
import re
import requests

class EsheepModelCollector:
    BASE_URL = 'https://api.esheep.com/gateway/model/list'
    INVALID_CHARS = ':*?"<>|\\/'

    HEADERS = {
        'origin': 'https://www.esheep.com',
        'referer': 'https://www.esheep.com/',
        'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36',
    }

    COOKIES = {
        'buvid': '415DC444-806E-82F5-4753-BA65DE586D5325503infoc',
    }

    def __init__(self):
        self.params = {
            'limit': '20',
            'mode': '3',
            # 'mode': '1',
            # 'next': None
        }

    def __remove_invalid_chars(self, name):
        for char in self.INVALID_CHARS:
            name = name.replace(char, '_')
        return name

    def __download_image(self, img_url, name):
        name = self.__remove_invalid_chars(name)
        filename = f"img_url/_test{name}.{'png' if 'png' in img_url else 'jpg'}"
        with requests.get(img_url, headers=self.HEADERS) as res:
            with open(filename, 'wb') as f:
                f.write(res.content)
        print(f'{name}已保存!')

    def collect(self):
        with requests.Session() as session:
            while True:
                response = session.get(url=self.BASE_URL, params=self.params, cookies=self.COOKIES, headers=self.HEADERS)
                infos = response.json()['data']

                Next, IsEnd = infos['cursor']['Next'], infos['cursor']['IsEnd']
                print(f"打印=={Next}-----{IsEnd}")

                items = infos['items']
                img_urls = [re.findall(r"'url': '(.*?)', 'height'", repr(item))[0] for item in items]
                names = [item['model']['name'] for item in items]

                for img_url, name in zip(img_urls, names):
                    print(img_url)
                    self.__download_image(img_url, name)

                if IsEnd:
                    break

                self.params['next'] = Next

                time.sleep(random())


if __name__ == '__main__':
    collector = EsheepModelCollector()
    collector.collect()


posted @ 2023-07-22 03:20  __username  阅读(24)  评论(0编辑  收藏  举报

本文作者:DIVMonster

本文链接:https://www.cnblogs.com/guangzan/p/12886111.html

版权声明:本作品采用知识共享署名-非商业性使用-禁止演绎 2.5 中国大陆许可协议进行许可。