Python爬虫之一键保存全部必应高清1080P壁纸
源码:
1 import requests 2 from lxml import etree 3 4 for i in range(1, 152): 5 print('page:\t', i) 6 url = 'https://bing.ioliu.cn/?p={}'.format(i) 7 8 headers = { 9 'Host': 'bing.ioliu.cn', 10 'Connection': 'keep-alive', 11 'Cache-Control': 'max-age=0', 12 'DNT': '1', 13 'Upgrade-Insecure-Requests': '1', 14 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.163 Safari/537.36', 15 'Sec-Fetch-Dest': 'document', 16 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9', 17 'Sec-Fetch-Site': 'none', 18 'Sec-Fetch-Mode': 'navigate', 19 'Sec-Fetch-User': '?1', 20 'Accept-Encoding': 'gzip, deflate, br', 21 'Accept-Language': 'zh-CN,zh;q=0.9', 22 'Cookie': '_ga=GA1.2.1389080226.1586346124; _gid=GA1.2.1179718529.1586346124; Hm_lvt_667639aad0d4654c92786a241a486361=1586346124; likes=; Hm_lpvt_667639aad0d4654c92786a241a486361=1586347115', 23 'If-None-Match': 'W/"5ae9-A6K6aP64lqd/8LCoQ4XYnQ"' 24 } 25 res = requests.get(url, headers=headers, verify=False) 26 # print(res.text) 27 parseHtml = etree.HTML(res.text) 28 picList = parseHtml.xpath('//img/@src') 29 # print(picList) 30 for pic in picList: 31 try: 32 # http://h1.ioliu.cn/bing/SantoriniAerial_ZH-CN9367767863_640x480.jpg?imageslim 33 picUrl = pic.split('_640')[0] + '_1920x1080.jpg' 34 picName = pic.split('bing/')[-1].split('_')[0] + '.jpg' 35 picRes = requests.get(picUrl) 36 with open(picName, 'wb') as f: 37 f.write(picRes.content) 38 39 except Exception as e: 40 print(i, pic, e)
爬取结果: