python多线程采集
import requests import json import threading Default_Header = { #具体请求头自己去弄 } _session=requests.session() _session.headers.update(Default_Header) #多线程下载 class myThread(threading.Thread): def __init__(self,imgUrl,fname): threading.Thread.__init__(self) self.imgUrl=imgUrl self.fname=fname def run(self): print("downloading",self.imgUrl) download(self.imgUrl,self.fname) def download(fileid,type): img_url="http://img.hb.aicdn.com/"+fileid imgresp=requests.get(img_url) byte_img = imgresp.content try: out = open(type, 'wb') out.write(byte_img) out.flush() out.close() except Exception as e: print(e) if __name__ == "__main__": soup =_session.get('http://huaban.com/pins/873774526/?xxxxxx') url=json.loads(soup.text) urlList=url['pin']['board']['pins'] for i in urlList: key=i['file']['key'] print(key) #download(key,key+'.jpg') myThread(key,key+'.jpg').start()