python 爬取百度图片

import requests
from bs4 import BeautifulSoup
import re
import os
import json
from urllib import parse
headers='''
Accept-Ranges: bytes
Access-Control-Allow-Origin: *
Age: 570820
Cache-Control: max-age=2628000
Connection: keep-alive
Content-Length: 45163
Content-Type: image/jpeg
Date: Sat, 11 May 2019 06:17:00 GMT
ETag: 3448023fd5dc275ff4088c50d1da7d5f
Expires: Tue, 04 Jun 2019 01:43:20 GMT
Last-Modified: Thu, 01 Jan 1970 00:00:00 GMT
Ohc-Response-Time: 1 0 0 0 0 0
Server: JSP3/2.0.14
'''

class DownBaiDuImg(object):
listheader='''
Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8
Accept-Encoding: gzip, deflate
Accept-Language: zh-CN,zh;q=0.9,en;q=0.8
Cache-Control: max-age=0
Connection: keep-alive
Cookie: BDIMGISLOGIN=0; winWH=%5E6_1366x631; BDqhfp=%E6%AF%94%E5%9F%BA%E5%B0%BC%26%26-10-1undefined%26%260%26%261; BAIDUID=ED5602028E2013468035151C8C3C3A53:FG=1; BIDUPSID=ED5602028E2013468035151C8C3C3A53; PSTM=1552569672; BDSFRCVID=ZoFOJeC62GC4q3c9ZolNh5mNHGcamB3TH6aoUWSSBZNRGvSy07o7EG0PqU8g0Kub55HBogKK0mOTHv8F_2uxOjjg8UtVJeC6EG0P3J; H_BDCLCKID_SF=tJAq_D0hfIP3fP36q45Mq4tHen6y0fRZ5mAqoq3nJPD5HITLhPvFM5LDX47x5-oL0J7naIQqaM5RVUOtWxTCQnK92H0f25b43bRTQxKy5KJvfJ_Gjf7IhP-UyN3LWh37bJblMKoaMp78jR093JO4y4Ldj4oxJp8eWJQ2QJ8BJI02MDJP; BDUSS=k5MTWt1V2RvRHRBMVBrUVFMeURRY243ZWRMNDEtMkg1Mm94VnNYcVp5cUh5cmxjQVFBQUFBJCQAAAAAAAAAAAEAAAA64oOWs8y36rPYMQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAIc9klyHPZJcW; uploadTime=1557547291054; cleanHistoryStatus=0; BDRCVFR[Tp5-T0kH1pb]=mk3SLVN4HKm; delPer=0; PSINO=1; BDRCVFR[CCf63Vmik7b]=mk3SLVN4HKm; BDRCVFR[dG2JNJb_ajR]=mk3SLVN4HKm; H_PS_PSSID=1441_28939_28981_21126_28519_28775_28723_28963_28836_28585_26350_22157; indexPageSugList=%5B%22%E6%AF%94%E5%9F%BA%E5%B0%BC%22%2C%22%E7%BE%8E%E5%A5%B3%22%5D
Host: image.baidu.com
Upgrade-Insecure-Requests: 1
User-Agent: Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.75 Safari/537.36
'''
"""docstring for DownBaiDuImg"""
def __init__(self,header,kw):
super(DownBaiDuImg, self).__init__()
self.heades=self.gen_headers(header)
self.num=0
self.kw=parse.quote(kw)

def gen_headers(self,s):
ls = s.split('\n')
lsl = []
ls = ls[1:-1]
headers = {}
for l in ls:
l = l.split(': ')
lsl.append(l)
for x in lsl:
headers[str(x[0]).strip(' ')] = x[1]
return headers

 

def downimg(self,url,name):
try:
content=requests.get(url,timeout=2).content
with open('../images/'+name,'wb') as f:
f.write(content)
f.close()
return True
except Exception as e:
return False
else:
pass
finally:
pass

def doing(self,page):
listheader=self.gen_headers(self.listheader);
page=str(page)
# print('http://image.baidu.com/search/acjson?tn=resultjson_com&ipn=rj&ct=201326592&is=&fp=result&queryWord='+self.kw+'&cl=2&lm=-1&ie=utf-8&oe=utf-8&adpicid=&st=&z=&ic=&hd=&latest=&copyright=&word='+self.kw+'&s=&se=&tab=&width=&height=&face=&istype=&qc=&nc=&fr=&expermode=&force=&cg=girl&pn=60&rn='+page)
text=requests.get('http://image.baidu.com/search/acjson?tn=resultjson_com&ipn=rj&ct=201326592&is=&fp=result&queryWord='+self.kw+'&cl=2&lm=-1&ie=utf-8&oe=utf-8&adpicid=&st=-1&z=&ic=&hd=&latest=&copyright=&word='+self.kw+'&s=&se=&tab=&width=&height=&face=&istype=&qc=&nc=&fr=&expermode=&force=&cg=girl&pn=60&rn='+page,headers=listheader)
text=text.text
if json.loads(text)['data']:
for x in json.loads(text)['data']:
if 'thumbURL' in x.keys():
h=parse.quote(x['hoverURL'])
g=parse.quote(x['thumbURL'])
imgurl='http://image.baidu.com/search/down?tn=download&ipn=dwnl&word=download&ie=utf8&fr=result&url='+h+'&thumburl='+g
exe=os.path.splitext(imgurl)[-1]
self.num+=1
filename=str(self.num)+exe;
if self.downimg(imgurl,filename):
print('下载成功')
else:
print('下载失败')
obj=DownBaiDuImg(headers,'绿色护眼壁纸大全')
for x in range(0,5):
obj.doing(x*30)

posted @ 2019-05-11 20:16  酷酷的城池  阅读(850)  评论(0编辑  收藏  举报