python多线程采集

import requests
import json
import threading

Default_Header = {
    #具体请求头自己去弄
}
_session=requests.session()
_session.headers.update(Default_Header)

#多线程下载
class myThread(threading.Thread):
    def __init__(self,imgUrl,fname):
        threading.Thread.__init__(self)
        self.imgUrl=imgUrl
        self.fname=fname
    def run(self):
        print("downloading",self.imgUrl)
        download(self.imgUrl,self.fname)

def download(fileid,type):
    img_url="http://img.hb.aicdn.com/"+fileid
    imgresp=requests.get(img_url)
    byte_img = imgresp.content
    try:
        out = open(type, 'wb')
        out.write(byte_img)
        out.flush()
        out.close()
    except Exception as e:
        print(e)
if __name__ == "__main__":
    
    soup =_session.get('http://huaban.com/pins/873774526/?xxxxxx')
    url=json.loads(soup.text)
    urlList=url['pin']['board']['pins']
    for i in urlList:
        key=i['file']['key']
        print(key)
        #download(key,key+'.jpg')
        myThread(key,key+'.jpg').start()

  

posted @ 2019-04-24 16:46  wangway  阅读(743)  评论(0编辑  收藏  举报