使用tornado实现简单的异步服务端与客户端——异步爬虫
目录结构
文件
async_server.py
# -*- coding:utf-8 -*- import time import datetime as dt import tornado.web import tornado.gen import tornado.ioloop import tornado.httpserver class SleepHandler(tornado.web.RequestHandler): # 协程 @tornado.gen.coroutine def get(self, *args, **kwargs): yield tornado.gen.sleep(3) self.write(str(dt.datetime.now())) if __name__ == '__main__': app = tornado.web.Application( [ (r"/sleep",SleepHandler), ], ) http_server = tornado.httpserver.HTTPServer(app) http_server.listen(9999) tornado.ioloop.IOLoop.instance().start()
async_client.py —— 异步爬虫与同步爬虫对比
# -*- coding:utf-8 -*- """ tornado实现异步爬虫 """ import time import tornado.gen import tornado.ioloop import tornado.httpclient from tornado import gen import requests N = 5 URL = "http://localhost:9999/sleep" @gen.coroutine def run(): http_client = tornado.httpclient.AsyncHTTPClient() # 同时请求多个URL responses = yield [ http_client.fetch(URL) for i in range(N) ] if __name__ == '__main__': # 异步的爬虫 start = time.time() # 异步并发的执行对多个URL的请求 —— 实际中只需要花3秒左右(server端sleep了3秒) tornado.ioloop.IOLoop.current().run_sync(run) print('async:',time.time()-start) # 同步的爬虫 start2 = time.time() for i in range(N): requests.get(URL) print('req:',time.time()-start2) # 打印结果: # async: 3.016835927963257 # req: 15.045804977416992
~~~