爬虫性能测试代码
# 同步执行,效率慢 # import requests # def parse_page(res): # print("PAESE %s" %(len(res))) # # def get_page(url): # print("GET %s" %url) # response = requests.get(url) # if response.status_code == 200: # return response.text # # if __name__ == '__main__': # urls = [ # "https://www.baidu.com", # "https://www.taobao.com", # "https://www.python.org", # ] # for url in urls: # res = get_page(url) # parse_page(res) # 开进程线程 # import requests # from threading import Thread,current_thread # from multiprocessing import Process # # def parse_page(res): # print("%s PARSE %s"%(current_thread().getName(),len(res))) # # def get_page(url,callback=parse_page): # print("%s GET %s"%(current_thread().getName(),url)) # response = requests.get(url) # if response.status_code == 200: # callback(response.text) # # if __name__ == '__main__': # import time # urls = [ # "https://www.baidu.com", # "https://www.taobao.com", # "https://www.python.ort", # ] # start_time = time.time() # for url in urls: # # t = Thread(target=get_page,args=(url,)) # p = Process(target=get_page,args=(url,)) # p.start() # # t.start() # print("===========耗时",time.time()-start_time) import requests from threading import current_thread from concurrent.futures import ThreadPoolExecutor,ProcessPoolExecutor # 进程池线程池 # def parse_page(res): # res = res.result() # print("%s PAESE %s"%(current_thread().getName(),len(res))) # def get_page(url): # print("%s GET %s"%(current_thread().getName(),url)) # response = requests.get(url) # if response.status_code == 200: # return response.text # # if __name__ == '__main__': # import time # start_time = time.time() # urls = [ # "https://www.baidu.com", # "https://www.taobao.com", # "https://www.python.org", # ] # # pool = ThreadPoolExecutor(50) #3.5992057323455?81 # pool = ProcessPoolExecutor(50) #12.549717903137207 # for url in urls: # pool.submit(get_page,url).add_done_callback(parse_page) # pool.shutdown() # print("========>",time.time()-start_time) # # 协程 实现的是仿线程 # from gevent import joinall,spawn,monkey;monkey.patch_all() # import requests,time # from threading import current_thread # # def parse_page(res): # print("%s PAESAE %s"%(current_thread().getName(),len(res))) # # def get_page(url,callback=parse_page): # print("%s GET %s"%(current_thread().getName(),url)) # response = requests.get(url) # if response.status_code == 200: # callback(response.text) # # # if __name__ == '__main__': # start_time = time.time() # urls = [ # "https://www.baidu.com", # "https://www.taobao.com", # "https://www.python.org", # ] # tasks = [] # for url in urls: # tasks.append(spawn(get_page,url)) # joinall(tasks) # print("++++++=====>",time.time()-start_time) #2.9541687965393066 # # import asyncio # @asyncio.coroutine # def task(task_id,second): # print("%s run "%task_id) # yield from asyncio.sleep(second) # print("%s run done"%task_id) # # if __name__ == '__main__': # tasks = [ # task("任务一",3), # task("任务二",2), # task("任务三",1), # ] # loop = asyncio.get_event_loop() # loop.run_until_complete(asyncio.wait(tasks)) # loop.close()
本文来自博客园,作者:一石数字欠我15w!!!,转载请注明原文链接:https://www.cnblogs.com/52-qq/p/8333833.html