随笔- 310  文章- 1  评论- 0  阅读- 86066 

#pip install requests-html

 

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
'''
    目标网站: https://pic.netbian.com
'''
from requests_html import HTMLSession
import re,os
import requests
from tqdm import tqdm
from functools import partial
from multiprocessing import Pool
session = HTMLSession()
  
# 获取下载链接
def get_urllist(addr):
    '''
    :addr: 传入该网站一个地址
    :return:  返回一个高清大图下载地址字典(标题:url)
    '''
    addr = f'https://pic.netbian.com{addr}'
    r = session.get(addr)
    # // 查看页面内容
    # print(r.html.html)
    # 获取链接
    # print(r.html.links) # 所有链接
    url_list = r.html.absolute_links # 绝对链接
    # print(url_list)
    rule = re.compile('(https://pic.netbian.com/tupian/[0-9]{1,9}.html)')
  
    href_list = re.findall(rule,str(url_list))
    # print(href_list)
    # print('获取图片:',len(href_list))
    # 请求进去得到高清壁纸
    complete_url_dict = {}
    for i in href_list:
        response = session.get(i)
        # print(response.html.html)
        # 获取部分url
        imperfect_url = response.html.xpath("//a[@id='img']/img/@src",first=True)
        title = response.html.xpath("//a[@id='img']/img/@title",first=True)
        # 拼接完整url
        url = f'https://pic.netbian.com{imperfect_url}'
        complete_url_dict.update({url:title})
  
    return complete_url_dict
  
  
# 下载图片
def download(url_dict,save_path):
    '''
    :save_path: 保存目录
    :param url_dict: 图片下载地址列表
    :return:
    '''
    # print(url_dict)
    name_list = os.listdir(save_path)
    # print(name_list)
    # for url,title in tqdm(url_dict.items()):
    url = url_dict[0]
    title = url_dict[1]
    if f'{title}.jpg' in name_list:
        print(title,'已存在...跳过!')
        pass
    else:
        with open(f'{save_path}/{title}.jpg', mode='wb') as f:
            f.write(requests.get(url).content)
            # print(title,':写入成功!')
  
if __name__ == '__main__':
    # 根据网站创建图片分类
    dic = {
        '4k风景':'/4kfengjing/',
        '4k美女':'/4kmeinv/',
        '4k游戏': '/4kyouxi/',
        '4k动漫': '/4kdongman/',
        '4k影视': '/4kyingshi/',
        '4k汽车': '/4kqiche/',
        '4k动物': '/4kdongwu/',
        '4k人物': '/4krenwu/',
        '4k美食': '/4kmeishi/',
        '4k宗教': '/4kzongjiao/',
        '4k背景': '/4kbeijing/',
        '4k手机壁纸': '/shoujibizhi/',
  
    }
    # url:title 字典
    print('图片下载器'.center(50,'='))
    for id,i in enumerate(dic.keys()):
        print(f'{id+1}.{i}')
    print(''.center(50, '='))
    try:
        idd = int(input('请选择图片序号:'))-1
        num = int(input('请选择下载页数:'))
        if num > 10:
            print('为确保安全,最多下载10页!!')
            num = 10
        if num <= 0:
            print('1<下载页数<10')
            exit()
    except:
        print('请输入数字!')
        exit()
    print('正在获取下载链接-/-/')
    name = list(dic.keys())[idd]
    # 第一页链接
    url_dict = get_urllist(dic[name])
    # 后几页链接
    for x in range(1,num):
        url_dict.update(get_urllist(dic[name]+f'index_{x+1}.html'))
    print('下载图片张数:',len(url_dict))
    save_path = f'./图片/{name}'
    if not os.path.exists(save_path):
        os.mkdir(save_path)
    # print(url_dict)
    # 下载图片
    # download(url_dict,save_path)
    print('正在下载-/-/')
    func = partial(download, save_path=save_path)
    pool = Pool(10)
    r = list(tqdm(pool.imap(func, url_dict.items()), total=len(url_dict.items()), ncols=80))
    pool.close()
    pool.join()

  

 posted on   boye169  阅读(28)  评论(0编辑  收藏  举报
相关博文:
阅读排行:
· TypeScript + Deepseek 打造卜卦网站:技术与玄学的结合
· Manus的开源复刻OpenManus初探
· 三行代码完成国际化适配,妙~啊~
· .NET Core 中如何实现缓存的预热?
· 如何调用 DeepSeek 的自然语言处理 API 接口并集成到在线客服系统
点击右上角即可分享
微信分享提示