爬虫性能测试代码

复制代码
# 同步执行,效率慢
# import requests
# def parse_page(res):
#     print("PAESE %s" %(len(res)))
#
# def get_page(url):
#     print("GET %s" %url)
#     response = requests.get(url)
#     if response.status_code == 200:
#         return response.text
#
# if __name__ == '__main__':
#     urls = [
#         "https://www.baidu.com",
#         "https://www.taobao.com",
#         "https://www.python.org",
#     ]
#     for url in urls:
#         res = get_page(url)
#         parse_page(res)

# 开进程线程
# import requests
# from threading import Thread,current_thread
# from multiprocessing import Process
#
# def parse_page(res):
#     print("%s PARSE %s"%(current_thread().getName(),len(res)))
#
# def get_page(url,callback=parse_page):
#     print("%s GET %s"%(current_thread().getName(),url))
#     response = requests.get(url)
#     if response.status_code == 200:
#         callback(response.text)
#
# if __name__ == '__main__':
#     import time
#     urls = [
#         "https://www.baidu.com",
#         "https://www.taobao.com",
#         "https://www.python.ort",
#     ]
#     start_time = time.time()
#     for url in urls:
#         # t = Thread(target=get_page,args=(url,))
#         p = Process(target=get_page,args=(url,))
#         p.start()
#         # t.start()
#     print("===========耗时",time.time()-start_time)


import requests
from threading import current_thread
from concurrent.futures import ThreadPoolExecutor,ProcessPoolExecutor

# 进程池线程池
# def parse_page(res):
#     res = res.result()
#     print("%s PAESE %s"%(current_thread().getName(),len(res)))

# def get_page(url):
#     print("%s GET %s"%(current_thread().getName(),url))
#     response = requests.get(url)
#     if response.status_code == 200:
#         return response.text
#
# if __name__ == '__main__':
#     import time
#     start_time = time.time()
#     urls = [
#         "https://www.baidu.com",
#         "https://www.taobao.com",
#         "https://www.python.org",
#     ]
#     # pool = ThreadPoolExecutor(50)      #3.5992057323455?81
#     pool = ProcessPoolExecutor(50)       #12.549717903137207
#     for url in urls:
#         pool.submit(get_page,url).add_done_callback(parse_page)
#     pool.shutdown()
#     print("========>",time.time()-start_time)


# # 协程 实现的是仿线程
# from gevent import joinall,spawn,monkey;monkey.patch_all()
# import requests,time
# from threading import current_thread
#
# def parse_page(res):
#     print("%s PAESAE %s"%(current_thread().getName(),len(res)))
#
# def get_page(url,callback=parse_page):
#     print("%s GET %s"%(current_thread().getName(),url))
#     response = requests.get(url)
#     if response.status_code == 200:
#         callback(response.text)
#
#
# if __name__ == '__main__':
#     start_time = time.time()
#     urls = [
#         "https://www.baidu.com",
#         "https://www.taobao.com",
#         "https://www.python.org",
#     ]
#     tasks = []
#     for url in urls:
#         tasks.append(spawn(get_page,url))
#     joinall(tasks)
#     print("++++++=====>",time.time()-start_time)       #2.9541687965393066
#


# import asyncio
# @asyncio.coroutine
# def task(task_id,second):
#     print("%s run "%task_id)
#     yield from asyncio.sleep(second)
#     print("%s run done"%task_id)
#
# if __name__ == '__main__':
#     tasks = [
#         task("任务一",3),
#         task("任务二",2),
#         task("任务三",1),
#     ]
#     loop = asyncio.get_event_loop()
#     loop.run_until_complete(asyncio.wait(tasks))
#     loop.close()
复制代码

 

posted @   一石数字欠我15w!!!  阅读(565)  评论(0编辑  收藏  举报
编辑推荐:
· 一个奇形怪状的面试题:Bean中的CHM要不要加volatile?
· [.NET]调用本地 Deepseek 模型
· 一个费力不讨好的项目,让我损失了近一半的绩效!
· .NET Core 托管堆内存泄露/CPU异常的常见思路
· PostgreSQL 和 SQL Server 在统计信息维护中的关键差异
阅读排行:
· DeepSeek “源神”启动!「GitHub 热点速览」
· 我与微信审核的“相爱相杀”看个人小程序副业
· 微软正式发布.NET 10 Preview 1:开启下一代开发框架新篇章
· 如何使用 Uni-app 实现视频聊天(源码,支持安卓、iOS)
· C# 集成 DeepSeek 模型实现 AI 私有化(本地部署与 API 调用教程)
点击右上角即可分享
微信分享提示