tcollector是openTSDB的一部分,它用来采集客户端日志发送给server端数据库,这里我只使用它来采集日志,不涉及openTSDB。
一、源码
tcollector的核心代码仅一千多行,主要包括这几个部分:读取线程、发送线程、主循环。
代码中的daemon进程、logging模块、optparse模块、Queue模块、多线程、动态模块加载、子进程式插件都是不错的学习demo。
二、日志采集客户端
可以修改tcollector的行处理、发送部分的代码,并增大Queue大小,用来做日志采集客户端框架。再实现日志采集程序,把采集程序代码放在collectors/0/目录下,采集框架会将采集程序调起运行,采集程序通过stdout将日志发给采集框架,采集框架发出日志给服务端。采集程序代码可以参考官方提供的多个demo。
再实现一个日志收集服务端用于接收、处理日志,日志收集服务端可以是TCP Server,或者修改发送端采用HTTP,服务端采用Tornado,都可以很方便的实现。
三、日志采集服务端
TCP Server demo
import socket import traceback import sys import os NET_OPERATION_TIMEOUT = 5 HOST = socket.gethostname() PORT = 1738 def process_worker(): while True: try: _recv_send() except: print traceback.print_exc() def _recv_send(): client, address = s.accept() client.settimeout(NET_OPERATION_TIMEOUT) while True: input = client.recv(4096) if input: print input client.send('0') client.close() def _main(): global s s = socket.socket() host = HOST port = PORT print host, port s.bind((host, port)) s.listen(5) process_worker() if __name__ == "__main__": _main()
Tornado HTTP Server demo
import tornado.ioloop import tornado.web import tornado.httpserver import tornado.gen from tornado.concurrent import run_on_executor import logging from logging.handlers import RotatingFileHandler import os import re import traceback import fcntl import errno PWD = os.path.dirname(os.path.realpath(__file__)) log_file = '%s/tcollector_server.log' % PWD LOG = logging.getLogger('tcollector_server') LOG.setLevel(logging.DEBUG) ch = RotatingFileHandler(log_file, 'a', 400 * 1024 * 1024, 10) ch.setFormatter(logging.Formatter('[%(asctime)s][%(levelname)s]' '[%(process)d][%(filename)s]%(message)s')) LOG.addHandler(ch) # 服务端发过来的日志格式:[prod] [date] [data] PAT = re.compile(r'^(?P<prod>[^ ]+) (?P<date>[^ ]+) (?P<data>.+)') # 日期格式:2015-06-30 DATE_PAT = re.compile(r'^\d{4}-\d{2}-\d{2}$') LOG_DIR = PWD + '/data' try: os.makedirs(LOG_DIR) except OSError as exc: if exc.errno == errno.EEXIST and os.path.isdir(LOG_DIR): pass else: raise file_fd = {} class AutoLock(object): """进程锁 依赖文件实现进程锁 """ def __init__(self, file_name): self.__file_name = file_name self.__handle = open(file_name, ‘w’) def __enter__(self): fcntl.flock(self.__handle, fcntl.LOCK_EX) def __exit__(self, exception_type, exception_val, trace): fcntl.flock(self.__handle, fcntl.LOCK_UN) self.__handle.close() class HandlerBase(tornado.web.RequestHandler): """ handler """ def initialize(self): pass def get(self): self.post() @tornado.web.asynchronous @tornado.gen.coroutine def post(self): LOG.info(str(self.request.remote_ip)) body = self.parse_request(self.request) self.process_request(body) self.finish() @staticmethod def parse_request(request): return request.body @staticmethod def save_data(info): file_path = '{log_dir}/{prod}_{date}.dat'.format(log_dir=LOG_DIR, prod=info['prod'], date=info['date']) if file_path not in file_fd: file_fd[file_path] = open(file_path, 'a+') with AutoLock(file_path + '.lock'): file_fd[file_path].write(info['data']) file_fd[file_path].flush() if len(file_path) > 100: for value in file_fd.itervalues(): value.close() def process_request(self, body): lines = body.split('\n') for line in lines: try: line_match = PAT.match(line) if not line_match: LOG.error('error line:' + line) else: date_match = DATE_PAT.match(line_match.groupdict()['date']) if date_match: self.save_data(line_match.groupdict()) except: LOG.error(traceback.format_exc()) LOG.error('exception line:' + line) class LogHandler(HandlerBase): pass application = tornado.web.Application( [ (r"/.*", LogHandler), ] ) if __name__ == '__main__': server = tornado.httpserver.HTTPServer(application, xheaders=True) server.bind(8016) server.start(0) #跟CPU个数一样的并发进程 tornado.ioloop.IOLoop.instance().start()