爬虫性能测试代码

# 同步执行,效率慢
# import requests
# def parse_page(res):
#     print("PAESE %s" %(len(res)))
#
# def get_page(url):
#     print("GET %s" %url)
#     response = requests.get(url)
#     if response.status_code == 200:
#         return response.text
#
# if __name__ == '__main__':
#     urls = [
#         "https://www.baidu.com",
#         "https://www.taobao.com",
#         "https://www.python.org",
#     ]
#     for url in urls:
#         res = get_page(url)
#         parse_page(res)

# 开进程线程
# import requests
# from threading import Thread,current_thread
# from multiprocessing import Process
#
# def parse_page(res):
#     print("%s PARSE %s"%(current_thread().getName(),len(res)))
#
# def get_page(url,callback=parse_page):
#     print("%s GET %s"%(current_thread().getName(),url))
#     response = requests.get(url)
#     if response.status_code == 200:
#         callback(response.text)
#
# if __name__ == '__main__':
#     import time
#     urls = [
#         "https://www.baidu.com",
#         "https://www.taobao.com",
#         "https://www.python.ort",
#     ]
#     start_time = time.time()
#     for url in urls:
#         # t = Thread(target=get_page,args=(url,))
#         p = Process(target=get_page,args=(url,))
#         p.start()
#         # t.start()
#     print("===========耗时",time.time()-start_time)


import requests
from threading import current_thread
from concurrent.futures import ThreadPoolExecutor,ProcessPoolExecutor

# 进程池线程池
# def parse_page(res):
#     res = res.result()
#     print("%s PAESE %s"%(current_thread().getName(),len(res)))

# def get_page(url):
#     print("%s GET %s"%(current_thread().getName(),url))
#     response = requests.get(url)
#     if response.status_code == 200:
#         return response.text
#
# if __name__ == '__main__':
#     import time
#     start_time = time.time()
#     urls = [
#         "https://www.baidu.com",
#         "https://www.taobao.com",
#         "https://www.python.org",
#     ]
#     # pool = ThreadPoolExecutor(50)      #3.5992057323455?81
#     pool = ProcessPoolExecutor(50)       #12.549717903137207
#     for url in urls:
#         pool.submit(get_page,url).add_done_callback(parse_page)
#     pool.shutdown()
#     print("========>",time.time()-start_time)


# # 协程 实现的是仿线程
# from gevent import joinall,spawn,monkey;monkey.patch_all()
# import requests,time
# from threading import current_thread
#
# def parse_page(res):
#     print("%s PAESAE %s"%(current_thread().getName(),len(res)))
#
# def get_page(url,callback=parse_page):
#     print("%s GET %s"%(current_thread().getName(),url))
#     response = requests.get(url)
#     if response.status_code == 200:
#         callback(response.text)
#
#
# if __name__ == '__main__':
#     start_time = time.time()
#     urls = [
#         "https://www.baidu.com",
#         "https://www.taobao.com",
#         "https://www.python.org",
#     ]
#     tasks = []
#     for url in urls:
#         tasks.append(spawn(get_page,url))
#     joinall(tasks)
#     print("++++++=====>",time.time()-start_time)       #2.9541687965393066
#


# import asyncio
# @asyncio.coroutine
# def task(task_id,second):
#     print("%s run "%task_id)
#     yield from asyncio.sleep(second)
#     print("%s run done"%task_id)
#
# if __name__ == '__main__':
#     tasks = [
#         task("任务一",3),
#         task("任务二",2),
#         task("任务三",1),
#     ]
#     loop = asyncio.get_event_loop()
#     loop.run_until_complete(asyncio.wait(tasks))
#     loop.close()

 

posted @ 2018-01-23 08:21  一石数字欠我15w!!!  阅读(564)  评论(0编辑  收藏  举报