【爬虫】必应图片按关键词进行图片下载
1 """ 2 关键是找到正确的url,然后判断组成url的参数的规律,多观察几个url即可得出规律 3 """ 4 5 import requests 6 from fake_useragent import UserAgent 7 import os 8 from lxml import etree 9 import json 10 import time 11 12 headers = {'User-Agent': UserAgent().random} 13 14 keyword = "周星驰" 15 url = 'https://cn.bing.com/images/async?q={}&first={}&count=35&relp={}&tsc=ImageHoverTitle&mmasync=1' 16 17 if not os.path.exists(keyword): 18 os.mkdir(keyword) 19 os.chdir(keyword) 20 21 first = 35 22 relp = 35 23 count = 1 24 25 while True: 26 try: 27 html_str = requests.get(url.format(keyword, first, relp), headers=headers).content.decode() 28 html = etree.HTML(html_str) 29 pics_url = html.xpath("//a[@class='iusc']/@m") 30 31 for pic_url in pics_url: 32 pic_url = json.loads(pic_url)['turl'] 33 res = requests.get(pic_url, headers=headers).content 34 35 with open('{}.jpg'.format(count), "wb") as f: 36 f.write(res) 37 38 print('第{}张已下载完成'.format(count)) 39 40 count += 1 41 time.sleep(0.5) 42 43 first += 35 44 relp += 35 45 46 if first > 35 * 5: 47 break 48 49 except Exception as e: 50 print(e)