#pip install requests-html
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 | ''' 目标网站: https://pic.netbian.com ''' from requests_html import HTMLSession import re,os import requests from tqdm import tqdm from functools import partial from multiprocessing import Pool session = HTMLSession() # 获取下载链接 def get_urllist(addr): ''' :addr: 传入该网站一个地址 :return: 返回一个高清大图下载地址字典(标题:url) ''' addr = f 'https://pic.netbian.com{addr}' r = session.get(addr) # // 查看页面内容 # print(r.html.html) # 获取链接 # print(r.html.links) # 所有链接 url_list = r.html.absolute_links # 绝对链接 # print(url_list) rule = re. compile ( '(https://pic.netbian.com/tupian/[0-9]{1,9}.html)' ) href_list = re.findall(rule, str (url_list)) # print(href_list) # print('获取图片:',len(href_list)) # 请求进去得到高清壁纸 complete_url_dict = {} for i in href_list: response = session.get(i) # print(response.html.html) # 获取部分url imperfect_url = response.html.xpath( "//a[@id='img']/img/@src" ,first = True ) title = response.html.xpath( "//a[@id='img']/img/@title" ,first = True ) # 拼接完整url url = f 'https://pic.netbian.com{imperfect_url}' complete_url_dict.update({url:title}) return complete_url_dict # 下载图片 def download(url_dict,save_path): ''' :save_path: 保存目录 :param url_dict: 图片下载地址列表 :return: ''' # print(url_dict) name_list = os.listdir(save_path) # print(name_list) # for url,title in tqdm(url_dict.items()): url = url_dict[ 0 ] title = url_dict[ 1 ] if f '{title}.jpg' in name_list: print (title, '已存在...跳过!' ) pass else : with open (f '{save_path}/{title}.jpg' , mode = 'wb' ) as f: f.write(requests.get(url).content) # print(title,':写入成功!') if __name__ = = '__main__' : # 根据网站创建图片分类 dic = { '4k风景' : '/4kfengjing/' , '4k美女' : '/4kmeinv/' , '4k游戏' : '/4kyouxi/' , '4k动漫' : '/4kdongman/' , '4k影视' : '/4kyingshi/' , '4k汽车' : '/4kqiche/' , '4k动物' : '/4kdongwu/' , '4k人物' : '/4krenwu/' , '4k美食' : '/4kmeishi/' , '4k宗教' : '/4kzongjiao/' , '4k背景' : '/4kbeijing/' , '4k手机壁纸' : '/shoujibizhi/' , } # url:title 字典 print ( '图片下载器' .center( 50 , '=' )) for id ,i in enumerate (dic.keys()): print (f '{id+1}.{i}' ) print (' '.center(50, ' = ')) try : idd = int ( input ( '请选择图片序号:' )) - 1 num = int ( input ( '请选择下载页数:' )) if num > 10 : print ( '为确保安全,最多下载10页!!' ) num = 10 if num < = 0 : print ( '1<下载页数<10' ) exit() except : print ( '请输入数字!' ) exit() print ( '正在获取下载链接-/-/' ) name = list (dic.keys())[idd] # 第一页链接 url_dict = get_urllist(dic[name]) # 后几页链接 for x in range ( 1 ,num): url_dict.update(get_urllist(dic[name] + f 'index_{x+1}.html' )) print ( '下载图片张数:' , len (url_dict)) save_path = f './图片/{name}' if not os.path.exists(save_path): os.mkdir(save_path) # print(url_dict) # 下载图片 # download(url_dict,save_path) print ( '正在下载-/-/' ) func = partial(download, save_path = save_path) pool = Pool( 10 ) r = list (tqdm(pool.imap(func, url_dict.items()), total = len (url_dict.items()), ncols = 80 )) pool.close() pool.join() |
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· TypeScript + Deepseek 打造卜卦网站:技术与玄学的结合
· Manus的开源复刻OpenManus初探
· 三行代码完成国际化适配,妙~啊~
· .NET Core 中如何实现缓存的预热?
· 如何调用 DeepSeek 的自然语言处理 API 接口并集成到在线客服系统