彼岸图网,批量下载图片
1 import requests 2 import time 3 from lxml import etree 4 import os 5 6 headers = { 7 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36', 8 'Cookie': '_xsrf=2|564c23e9|a46b8cac21ef08bf52221f1b840f5462|1565579147; Hm_lvt_2670efbdd59c7e3ed3749b458cafaa37=1565579182; BAIDU_SSP_lcr=https://cn.bing.com/; _qqq_uuid_="2|1:0|10:1565579148|10:_qqq_uuid_|56:NGVlZTIxMjdiYjYxMTQ5ZTU2NDk2YjhkMmM1ODM2Njg2N2I0Y2JjYw==|520099f3c590773f531a9c5f1eb1d82adba8893e730d3b508c3045904e1cfd9c"; _ga=GA1.2.349374621.1565579182; _gid=GA1.2.361209414.1565579182; Hm_lpvt_2670efbdd59c7e3ed3749b458cafaa37=1565580312' 9 } 10 def handle_request(url,page): 11 #由于首页的构建方式和后续界面不一致,所以需要进行判断 12 if page==1: 13 url=url 14 else: 15 url=url+'index_'+str(page)+'.html' 16 #print(url) 17 request = requests.get(url=url, headers=headers) 18 #print(request.content) 19 return request.text 20 21 #分析界面,生成下载的界面的链接,并且返回 22 def parse_content(content,file_down): 23 #建造对象 24 tree=etree.HTML(content) 25 image_list=tree.xpath('//*[@id="main"]/div[3]/ul/li/a/@href') 26 #懒加载技术 27 #print(image_list) 28 #print(len(image_list)) 29 for image_src in image_list: 30 image_src='http://pic.netbian.com'+image_src 31 parse_secondcontent(image_src,file_down) 32 33 def parse_secondcontent(image_src,file_down): 34 request=requests.get(url=image_src,headers=headers) 35 tree=etree.HTML(request.text) 36 url_list=tree.xpath('//*[@id="img"]/img/@src') 37 for url in url_list: 38 image_url='http://pic.netbian.com'+url 39 download_image(image_url,file_down) 40 41 def download_image(image_url,file_down): 42 print('开始下载图片---请稍后') 43 dirpath = file_down.replace('\\','/') 44 # 创建文件夹 45 if not os.path.exists(dirpath): 46 os.mkdir(dirpath) 47 # 创建文件名(返回文件path最后的的名称作为文件名) 48 filename = os.path.basename(image_url) 49 # 搞个文件路径(生成一个路径) 50 filepath = os.path.join(dirpath, filename) 51 # 发送请求,保存图片 52 request = requests.get(url=image_url,headers=headers) 53 with open(filepath, 'wb') as fp: 54 fp.write(request.content) 55 print('图片下载结束') 56 57 def main(): 58 number=int(input('请输入分类的代码:1.4k风景 2.4k美女 3.4k游戏 4.4k动漫 5.')) 59 if number ==1: 60 url='http://pic.netbian.com/4kfengjing/' 61 elif number ==2: 62 url='http://pic.netbian.com/4kmeinv/' 63 elif number ==3: 64 url='http://pic.netbian.com/4kyouxi/' 65 elif number==4: 66 url='http://pic.netbian.com/4kdongman/' 67 else: 68 url='http://pic.netbian.com/' 69 file_down = input('请输入本地图片下载地址') 70 start_page = int(input('请输入你的起始页面')) 71 end_page = int(input('请输入你的终止界面')) 72 for page in range(start_page,end_page+1): 73 print('开始下载%s页的内容'%page) 74 #根据url和page去生成指定的请求request 75 request=handle_request(url,page) 76 parse_content(request,file_down) 77 print('第%s页的图片下载完成'%page) 78 time.sleep(3) 79 print('谢谢你的使用') 80 81 if __name__ == '__main__': 82 main()