爬虫性能测试代码
# 同步执行,效率慢 # import requests # def parse_page(res): # print("PAESE %s" %(len(res))) # # def get_page(url): # print("GET %s" %url) # response = requests.get(url) # if response.status_code == 200: # return response.text # # if __name__ == '__main__': # urls = [ # "https://www.baidu.com", # "https://www.taobao.com", # "https://www.python.org", # ] # for url in urls: # res = get_page(url) # parse_page(res) # 开进程线程 # import requests # from threading import Thread,current_thread # from multiprocessing import Process # # def parse_page(res): # print("%s PARSE %s"%(current_thread().getName(),len(res))) # # def get_page(url,callback=parse_page): # print("%s GET %s"%(current_thread().getName(),url)) # response = requests.get(url) # if response.status_code == 200: # callback(response.text) # # if __name__ == '__main__': # import time # urls = [ # "https://www.baidu.com", # "https://www.taobao.com", # "https://www.python.ort", # ] # start_time = time.time() # for url in urls: # # t = Thread(target=get_page,args=(url,)) # p = Process(target=get_page,args=(url,)) # p.start() # # t.start() # print("===========耗时",time.time()-start_time) import requests from threading import current_thread from concurrent.futures import ThreadPoolExecutor,ProcessPoolExecutor # 进程池线程池 # def parse_page(res): # res = res.result() # print("%s PAESE %s"%(current_thread().getName(),len(res))) # def get_page(url): # print("%s GET %s"%(current_thread().getName(),url)) # response = requests.get(url) # if response.status_code == 200: # return response.text # # if __name__ == '__main__': # import time # start_time = time.time() # urls = [ # "https://www.baidu.com", # "https://www.taobao.com", # "https://www.python.org", # ] # # pool = ThreadPoolExecutor(50) #3.5992057323455?81 # pool = ProcessPoolExecutor(50) #12.549717903137207 # for url in urls: # pool.submit(get_page,url).add_done_callback(parse_page) # pool.shutdown() # print("========>",time.time()-start_time) # # 协程 实现的是仿线程 # from gevent import joinall,spawn,monkey;monkey.patch_all() # import requests,time # from threading import current_thread # # def parse_page(res): # print("%s PAESAE %s"%(current_thread().getName(),len(res))) # # def get_page(url,callback=parse_page): # print("%s GET %s"%(current_thread().getName(),url)) # response = requests.get(url) # if response.status_code == 200: # callback(response.text) # # # if __name__ == '__main__': # start_time = time.time() # urls = [ # "https://www.baidu.com", # "https://www.taobao.com", # "https://www.python.org", # ] # tasks = [] # for url in urls: # tasks.append(spawn(get_page,url)) # joinall(tasks) # print("++++++=====>",time.time()-start_time) #2.9541687965393066 # # import asyncio # @asyncio.coroutine # def task(task_id,second): # print("%s run "%task_id) # yield from asyncio.sleep(second) # print("%s run done"%task_id) # # if __name__ == '__main__': # tasks = [ # task("任务一",3), # task("任务二",2), # task("任务三",1), # ] # loop = asyncio.get_event_loop() # loop.run_until_complete(asyncio.wait(tasks)) # loop.close()
本文来自博客园,作者:一石数字欠我15w!!!,转载请注明原文链接:https://www.cnblogs.com/52-qq/p/8333833.html
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】凌霞软件回馈社区,博客园 & 1Panel & Halo 联合会员上线
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】博客园社区专享云产品让利特惠,阿里云新客6.5折上折
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 一个奇形怪状的面试题:Bean中的CHM要不要加volatile?
· [.NET]调用本地 Deepseek 模型
· 一个费力不讨好的项目,让我损失了近一半的绩效!
· .NET Core 托管堆内存泄露/CPU异常的常见思路
· PostgreSQL 和 SQL Server 在统计信息维护中的关键差异
· DeepSeek “源神”启动!「GitHub 热点速览」
· 我与微信审核的“相爱相杀”看个人小程序副业
· 微软正式发布.NET 10 Preview 1:开启下一代开发框架新篇章
· 如何使用 Uni-app 实现视频聊天(源码,支持安卓、iOS)
· C# 集成 DeepSeek 模型实现 AI 私有化(本地部署与 API 调用教程)