PYTHN tornado 异步客户端应用
简单应用
from tornado import ioloop, httpclient, gen from tornado.gen import Task import time, logging, copy URLS = ['http://qq.com', 'http://sina.com', 'http://www.baidu.com', ] @gen.coroutine # 注意这里是一个装饰器,是实现异步client的关键 def download(url): http_client = httpclient.AsyncHTTPClient() # 6. 执行http_client.fetch(url),然后退出download函数,等待下次步骤5中的gen.next或者gen.send调用 # 51. 获取从www.baidu.com返回的响应,赋值给response response = yield http_client.fetch(url) print 'response.length =', len(response.body) print dir(response) if str(response.code) == "200": URLS.remove(url) if not URLS: ioloop.IOLoop.instance().stop() url_list = copy.deepcopy(URLS) for url in url_list: future = download(url) # 0. 开始源码分析 print future logging.info("****start ioloop*************") ioloop.IOLoop.instance().start() # 18. 启动ioloop
封装应用
#!/usr/bin/env python #_*_coding:utf-8_*_ import sys from os.path import dirname, abspath, sep PWD = dirname(abspath(__file__)) sys.path.append(dirname(PWD) + sep + 'lib') import re import json import time import functools from datetime import datetime import tornado import tornado.ioloop from tornado.httpclient import HTTPRequest from tornado.httpclient import AsyncHTTPClient from log_manager import LogManager normal = "filter_normal" error = "filter_599" class FilterHcode(object): def __init__(self, filter_conf, probe): self._initlogger() self.filter_conf = filter_conf self.probe_obj = probe self.result = probe.result self._probe_interval = int(self.filter_conf['probe_interval']) self._max_clients = int(self.filter_conf['max_clients']) def _initlogger(self): self._logger_normal = LogManager(normal) self._logger_error = LogManager(error) def probe(self,): x = [] def async_fetch(): now = int(time.time()) self._time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(now)) time_utc = str(datetime.utcfromtimestamp(now)) self._time_utc = "T".join(time_utc.split(" ")) + "Z" domain = self.filter_conf['domain'] method = self.filter_conf['method'] ctimeout = int(self.filter_conf['ctimeout']) rtimeout = int(self.filter_conf['rtimeout']) suc_hcodes = self.filter_conf['success_hcode'] if "!" in suc_hcodes: suc_hcodes = map(lambda hcode: int(hcode[1:]), suc_hcodes.split(',')) set_flag = False else: suc_hcodes = map(lambda hcode: int(hcode), suc_hcodes.split(',')) set_flag = True headers = { "Host": domain } all_count_ip_num = 0 for isp, area_iplists in self.result.items(): for area, ip_list in area_iplists.items(): all_count_ip_num += len(ip_list) self.all_num = all_count_ip_num self.call_iplist = [] self.probe_obj.filter_599 = False for isp, area_iplists in self.result.items(): for area, ip_list in area_iplists.items(): for ip in ip_list: if len(ip) == 0: continue url = "http://" + ip + "/" req = HTTPRequest(url, method=method, headers=headers, connect_timeout=ctimeout, request_timeout=rtimeout, follow_redirects=False) handle_response_partial = functools.partial(handle_response, area, isp, method, suc_hcodes, set_flag) self._http_client.fetch(req, handle_response_partial) def handle_response(area, isp, method, suc_hcodes, set_flag, response): header_bytes_recv = 0 body_bytes_recv = len(response.body) if response.body else 0 headers = response.headers.get_all() for key, val in headers: # print 117, key, val header_bytes_recv += len(key) + len(val) time_info = response.time_info request = response.request url = request.url match = re.search(r'(http://|https://)([0-9|a-z|A-Z|\.|:]*)/(.*)', url) if not match: return dip = match.group(2) uri = match.group(3) host = request.headers['host'] conn_time = time_info['connect'] if 'connect' in time_info else 0 fst_pkg_time = time_info['starttransfer'] if 'starttransfer' in time_info else 0 resp_time = fst_pkg_time - conn_time if set_flag: success = 1 if response.code in suc_hcodes else 0 else: success = 1 if response.code not in suc_hcodes else 0 data = { 'node_ip': dip, 'uri': uri, 'domain': host, 'http_code': response.code, 'conn_time': conn_time, 'resp_time': resp_time, 'probe_time': self._time, '@timestamp': self._time_utc, 'area': area, 'isp': isp, 'success': success, 'method': method, 'header_bytes_recv': header_bytes_recv, 'body_bytes_recv': body_bytes_recv, } if isinstance(response.code, int): response.code = str(response.code) if response.code == "599": data['errMsg'] = response.error.message self._logger_error.log(json.dumps(data), 'error') self.result[isp][area].remove(dip) self.filter_599 = True else: self._logger_normal.log(json.dumps(data), "info") self.call_iplist.append(dip) if len(self.call_iplist) == self.all_num: tornado.ioloop.IOLoop.instance().stop() AsyncHTTPClient.configure("tornado.curl_httpclient.CurlAsyncHTTPClient", max_clients=self._max_clients) self._http_client = AsyncHTTPClient() async_fetch() # tornado.ioloop.PeriodicCallback(async_fetch, self._probe_interval).start() self.ioloop = tornado.ioloop.IOLoop.instance() self.ioloop.start() if __name__ == '__main__': prober = FilterHcode() # signal.signal(signal.SIGUSR1, prober.reset_config) prober.probe()