108-python-进程池爬取数据的例子

from urllib.request import urlopen
from multiprocessing import Pool

# 200 网页正常的返回
# 404 网页找不到
#    502  504

def get_urllib(url):
    ret = urlopen(url)
    return ret.read().decode('utf-8')

def call_back(args):
    url,content = args
    print(url,len(content))
    print(args)

if __name__ == '__main__':
    p = Pool(5)
    # l = [
    #     'http://www.baidu.com',
    #     'https://www.sougou.com',
    #     'http://www.sohu.com'
    # ]
    l = 'http://maoyan.com/board'
    # for url in l:
    p.apply_async(get_urllib,args=(l,),callback=call_back)
        # print(ret)
    p.close()
    p.join()

  

posted @ 2018-11-20 12:34  _Q  阅读(158)  评论(0编辑  收藏  举报