彼岸图网,批量下载图片

 1 import requests
 2 import time
 3 from lxml import etree
 4 import os
 5 
 6 headers = {
 7     'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36',
 8     'Cookie': '_xsrf=2|564c23e9|a46b8cac21ef08bf52221f1b840f5462|1565579147; Hm_lvt_2670efbdd59c7e3ed3749b458cafaa37=1565579182; BAIDU_SSP_lcr=https://cn.bing.com/; _qqq_uuid_="2|1:0|10:1565579148|10:_qqq_uuid_|56:NGVlZTIxMjdiYjYxMTQ5ZTU2NDk2YjhkMmM1ODM2Njg2N2I0Y2JjYw==|520099f3c590773f531a9c5f1eb1d82adba8893e730d3b508c3045904e1cfd9c"; _ga=GA1.2.349374621.1565579182; _gid=GA1.2.361209414.1565579182; Hm_lpvt_2670efbdd59c7e3ed3749b458cafaa37=1565580312'
 9             }
10 def handle_request(url,page):
11         #由于首页的构建方式和后续界面不一致,所以需要进行判断
12         if page==1:
13             url=url
14         else:
15             url=url+'index_'+str(page)+'.html'
16         #print(url)
17         request = requests.get(url=url, headers=headers)
18         #print(request.content)
19         return request.text
20 
21 #分析界面,生成下载的界面的链接,并且返回
22 def parse_content(content,file_down):
23     #建造对象
24     tree=etree.HTML(content)
25     image_list=tree.xpath('//*[@id="main"]/div[3]/ul/li/a/@href')
26     #懒加载技术
27     #print(image_list)
28     #print(len(image_list))
29     for image_src in image_list:
30         image_src='http://pic.netbian.com'+image_src
31         parse_secondcontent(image_src,file_down)
32 
33 def parse_secondcontent(image_src,file_down):
34     request=requests.get(url=image_src,headers=headers)
35     tree=etree.HTML(request.text)
36     url_list=tree.xpath('//*[@id="img"]/img/@src')
37     for url in url_list:
38         image_url='http://pic.netbian.com'+url
39         download_image(image_url,file_down)
40 
41 def download_image(image_url,file_down):
42     print('开始下载图片---请稍后')
43     dirpath = file_down.replace('\\','/')
44     # 创建文件夹
45     if not os.path.exists(dirpath):
46         os.mkdir(dirpath)
47     # 创建文件名(返回文件path最后的的名称作为文件名)
48     filename = os.path.basename(image_url)
49     # 搞个文件路径(生成一个路径)
50     filepath = os.path.join(dirpath, filename)
51     # 发送请求,保存图片
52     request = requests.get(url=image_url,headers=headers)
53     with open(filepath, 'wb') as fp:
54         fp.write(request.content)
55     print('图片下载结束')
56 
57 def main():
58     number=int(input('请输入分类的代码:1.4k风景 2.4k美女 3.4k游戏 4.4k动漫 5.'))
59     if number ==1:
60         url='http://pic.netbian.com/4kfengjing/'
61     elif number ==2:
62         url='http://pic.netbian.com/4kmeinv/'
63     elif number ==3:
64         url='http://pic.netbian.com/4kyouxi/'
65     elif number==4:
66         url='http://pic.netbian.com/4kdongman/'
67     else:
68         url='http://pic.netbian.com/'
69     file_down = input('请输入本地图片下载地址')
70     start_page = int(input('请输入你的起始页面'))
71     end_page = int(input('请输入你的终止界面'))
72     for page in range(start_page,end_page+1):
73         print('开始下载%s页的内容'%page)
74         #根据url和page去生成指定的请求request
75         request=handle_request(url,page)
76         parse_content(request,file_down)
77         print('第%s页的图片下载完成'%page)
78         time.sleep(3)
79     print('谢谢你的使用')
80 
81 if __name__ == '__main__':
82     main()

 

posted @ 2024-07-09 13:44  尘尘尘尘  阅读(11)  评论(0编辑  收藏  举报