爬取百度图片

import User_Agent #自己的编写的请求头
import requests
import os
from urllib import parse
from time import time, sleep
import json

img_url_list = []

url = 'https://image.baidu.com/search/acjson?tn=resultjson_com&ipn=rj&word={}&pn={}&{}'
headers = {'User-Agent':User_Agent.ua()}

name = input('请输入您要下载的图片:')
pn = int(input('要下载几页?:'))
if not os.path.exists('D:\IDEAXiangMu\python\images\'+name+'\'):
os.makedirs('D:\IDEAXiangMu\python\images\'+name+'\')
p_name = parse.quote(name)

for x in range(1,pn+1):
pn = x*30
now =time()
# response=requests.get(url=url,headers=headers)
response = requests.get(url=url.format(p_name,pn,now) ,headers=headers)

d=json.loads(response.text)

for y in range(len(d["data"])-1):

    img_url_list.append(d['data'][y]['thumbURL'])

cnt = 0
page = 0
for x in img_url_list:
houzhui = x.split('.')[-1]
cnt += 1
if cnt % 30 == 0:
page += 1
print('%s第%d页下载成功' % (name, page))
sleep(1)
response = requests.get(url=x, headers=headers)

with open('本地路径'+name+'\\'+name+str(cnt)+ '.' + houzhui, 'wb') as f:     
    f.write(response.content)
print('正在获取'+str(cnt)+'数据')
posted on 2020-12-24 12:08  鬼纔  阅读(122)  评论(0编辑  收藏  举报