Python爬图片(面向对象版)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
import requests
from lxml import etree
from threading import Thread
 
 
class Spider(object):
 
    def __init__(self):
        self.header = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36"}
 
    def catch(self, page_num):
        self.url = f"https://www.169tp.com/xingganmeinv/list_1_{page_num}.html"
        res = requests.get(self.url,headers=self.header)
        res.encoding = "gbk"
        text = res.text
 
        tree = etree.HTML(text)
        lis = tree.xpath("/html/body/div[4]/ul/li")
 
        for i in lis:
            addr = i.xpath("./a/img/@src")[0]
            title = i.xpath("./a/p/text()")
 
            detail = requests.get(addr, headers=self.header).content
 
            with open(f"imgs/{title}.jpg", mode="wb") as f:
                f.write(detail)
            print(f" ------------------- {title}.jpg 完成 -----------------------")
 
    def start(self):
        for num in range(1,500):
            self.catch(num)
            print(f"-------------------------- 第 {num} 页完成-----------------------------------")
 
 
 
if __name__ == '__main__':
 
    spider = Spider()
    spider.start()

  

posted @   映辉  阅读(25)  评论(0编辑  收藏  举报
相关博文:
阅读排行:
· 分享一个免费、快速、无限量使用的满血 DeepSeek R1 模型,支持深度思考和联网搜索!
· 基于 Docker 搭建 FRP 内网穿透开源项目(很简单哒)
· ollama系列01:轻松3步本地部署deepseek,普通电脑可用
· 25岁的心里话
· 按钮权限的设计及实现
点击右上角即可分享
微信分享提示