django 多线程下载图片

example1:

from multiprocessing.dummy import Pool as ThreadPool #多线程  
import time  
import urllib2  
  
urls = [  
    'http://www.python.org',   
    'http://www.python.org/about/',  
    'http://www.onlamp.com/pub/a/python/2003/04/17/metaclasses.html',  
    'http://www.python.org/doc/',  
    ]  
  
# 单线程  
start = time.time()  
results = map(urllib2.urlopen, urls)  
print 'Normal:', time.time() - start  
  
# 多线程  
start2 = time.time()  
# 开4个 worker，没有参数时默认是 cpu 的核心数  
pool = ThreadPool(4)  
# 在线程中执行 urllib2.urlopen(url) 并返回执行结果  
results2 = pool.map(urllib2.urlopen, urls)  
pool.close()  
pool.join()  
print 'Thread Pool:', time.time() - start2

example2:

#!/usr/bin/env python
# coding=utf-8

import os
import random
import requests
from cStringIO import StringIO
from PIL import Image
from multiprocessing.dummy import Pool as ThreadPool

class Labelcode(object):

    def __init__(self, path='images'):

        self.base_path = os.path.dirname(__file__)
        print 'self.base_path', self.base_path
        totalImg = [os.path.join(self.base_path, path, fname) for fname in os.listdir(os.path.join(self.base_path, path))]
        finished = [fname for fname in totalImg if fname.find('_')>1]
        self.unfinished = [fname for fname in totalImg if fname not in finished]

        self.label_msg = {} # 当前信息
        self.label_msg['totalCnt'] = len(totalImg) 
        self.label_msg['finishedCnt']= len(finished)

        if self.unfinished:
            self.label_msg['fname'] = random.choice(self.unfinished)
        else:
            self.label_msg['fname'] = 'no img exists'

    def get_label_img(self, fname=None, result=''):
        """
        标注相关验证码图片,并返回当前信息
        """
        print self.base_path
        print '---'*30
        if fname in self.unfinished and len(result)>1:
            newName = '%s/images/%s_%s.jpg' % (self.base_path, result, fname.split('/')[-1].split('.')[0])
            if os.path.exists(fname):
                os.rename(fname, newName)   # 更新名字
                self.unfinished.remove(fname)
                self.label_msg['finishedCnt'] += 1
                self.label_msg['fname'] = random.choice(self.unfinished)
        
        if self.unfinished:
            self.label_msg['fname'] = random.choice(self.unfinished)
        
        return self.label_msg


    def download_img(self, img_url=None, web_name=None, cnt=1000):
        """
        多线程下载图片
        """
        res = {'totalCnt': cnt, 'finishedCnt':0}
        if not img_url:
            return res
        
        def get_html((img_ur, idx)):
            try:
                img = StringIO(requests.get(img_url).content)
                img = Image.open(img).convert('RGB')
                img.save('%s/images/%s/%d.jpg' % (self.base_path, web_name, idx))
                res['finishedCnt'] += 1
            except Exception as e:
                print (e)

        web_name = web_name.strip().lower()
        cnt = int(cnt) if cnt else 0
        img_urls = []
        pool = ThreadPool(10)  # 同时开启 10 个线程
        for i in range(cnt):
            img_urls.append((img_url, i+500))   # 插入需要下载的url
        
        if not os.path.exists('%s/images/%s' % (self.base_path, web_name)):
            os.makedirs('%s/images/%s' % (self.base_path, web_name))

        pool.map(get_html, img_urls)
        pool.close()
        pool.join()

        return res

if __name__ == '__main__':

    test = Labelcode()
    url = 'https://passport.360.cn/captcha.php?m=create&app=i360&scene=login&userip=n7ASHVefL%2FAiu7j%2BPntTvQ%3D%3D&level=default&sign=c5d208'
    print test.download_img(url, '360', 10)

posted on 2017-06-17 15:45 星空守望者--jkmiao 阅读(542) 评论(0) 收藏举报