python 多线程、多进程、协程性能对比(以爬虫为例)

  基本配置:阿里云服务器低配,单核2G内存

  首先是看协程的效果:

  
import requests
import lxml.html as HTML
import sys
import time
import gevent
from gevent import monkey
monkey.patch_all()

# create url
urls = []
for i in range(int(sys.argv[1]),int(sys.argv[2])):
    url = 'http://grri94kmi4.app.tianmaying.com/songs?page='+str(i)
    urls.append(url)

def get_data(url):
    t1 = time.time()
    res = requests.get(url)
    if res.status_code == 200:
        print(url+' : '+'url open success'+'  time use: '+ str(time.time()-t1))
    html = HTML.fromstring(res.content)
    trs = html.xpath('//tbody/tr')
    data = []
    for tr in trs:
        s = {}
        s['name'] = tr.xpath('./td/a/text()')[0]
        s['url'] = tr.xpath('./td/a/@href')[0]
        s['id'] = s['url'][30:]
        s['comment'] = tr.xpath('./td[last()]/text()')[0]
        data.append(s)

if __name__ == '__main__':
    total = time.time()
    task = []
    for url in urls:
        task.append(gevent.spawn(get_data,url))
    gevent.joinall(task)
    print('total time use :', time.time()-total)
View Code

  在爬取20个链接的情况下,用时为4s:

  total time use : 4.873192071914673

 

  线程和进程差不多 ,用时6s左右

  

import requests
import lxml.html as HTML
import sys
import time
from multiprocessing import Pool as ThreadPool
# create url
urls = []
for i in range(int(sys.argv[1]),int(sys.argv[2])):
   url = 'http://grri94kmi4.app.tianmaying.com/songs?page='+str(i)
   urls.append(url)

def get_data(url):
   t1 = time.time()
   res = requests.get(url)
   if res.status_code == 200:
     print(url+' : '+'url open success'+'  time use: '+ str(time.time()-t1))
   html = HTML.fromstring(res.content)
   trs = html.xpath('//tbody/tr')
   data = []
   for tr in trs:
     s = {}
     s['name'] = tr.xpath('./td/a/text()')[0]
     s['url'] = tr.xpath('./td/a/@href')[0]
     s['id'] = s['url'][30:]
     s['comment'] = tr.xpath('./td[last()]/text()')[0]
     data.append(s)

if __name__ == '__main__':
   total = time.time()
   pool = ThreadPool()
   results = pool.map(get_data,urls)
   pool.close()
   pool.join()
   print('total time use :', time.time()-total)

 

  

posted @ 2017-10-09 16:34  安阳小栈-客官歇会吧  阅读(382)  评论(0编辑  收藏  举报