pic_scrapy_python
1 # _*_ coding:UTF-8 _*_ 2 import requests,json,time,sys 3 from contextlib import closing 4 class get_photos(object): 5 def __init__(self): 6 self.photos_id = [] 7 self.download_server = 'https://unsplash.com/photos/xxx/download?force=true' 8 self.target = 'https://unsplash.com/napi/feeds/home' 9 self.headers = {'authorization': 'Client-ID c94869b36aa272dd62dfaeefed769d4115fb3189a9d1ec88ed457207747be626'} 10 11 #获取图片ID 12 def get_ids(self): 13 req = requests.get(url = self.target,headers = self.headers,verify = False) 14 html = json.loads(req.text) 15 next_page = html['next_page'] 16 print(next_page) 17 for each in html['photos']: 18 self.photos_id.append(each['id']) 19 time.sleep(1) 20 for i in range(4): 21 req = requests.get(url = next_page,headers = self.headers,verify = False) 22 html = json.loads(req.text) 23 next_page = html['next_page'] 24 for each in html['photos']: 25 self.photos_id.append(each['id']) 26 time.sleep(1) 27 #图片下载 28 def download(self,photo_id,filename): 29 headers = {'User-Agent':' Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'} 30 target = self.download_server.replace('xxx',photo_id) 31 with closing(requests.get(url = target,stream = True,verify = False,headers = self.headers)) as r: 32 with open('%d.jpg'%filename,'ab+') as f: 33 for chunk in r.iter_content(chunk_size = 1024): 34 if chunk: 35 f.write(chunk) 36 f.flush() 37 if __name__ == '__main__': 38 gp = get_photos() 39 print('获取图片链接中:') 40 gp.get_ids() 41 print('图片下载中:') 42 for i in range(len(gp.photos_id)): 43 print('正在下载第%d张图片'%(i+1)) 44 gp.download(gp.photos_id[i],(i+1))