通过Python来操作kylin
起因:
老大要求的数据,无法通过kylin里面的SQL查询到,只能通过调用接口来实现需求
第一步,安装依赖的包(py2/py3都支持,我这边用的是py2)
pip install kylinpy
pip install sqlalchemy
第二步,直接写代码,实现每天日志里面最多的五个用户,通过自修改实现自己的需求
#!/usr/bin/env python # coding=utf-8 import sqlalchemy as sa def kylin_query_show(conn_str, query_sql): res_dic = {} kylin_engine = sa.create_engine(conn_str) results = kylin_engine.execute(query_sql) for line in results: userid = str(line[0]).strip() datetime = line[1].strip() per_count = line[2] if datetime not in res_dic: res_dic[datetime] = [] res_dic[datetime].append((userid, per_count)) else: res_dic[datetime].append((userid, per_count)) return res_dic if __name__ == "__main__": conn_str = "kylin://ADMIN:KYLIN@ip:7070/project_name?version=v1" query_sql = "select userid, datetime, count(*) c from soda_report group by userid, datetime" res_dic = kylin_query_show(conn_str, query_sql) tmp_list = [] for k, v in res_dic.items(): final_userid_count_list = sorted(v, key = lambda x: x[1], reverse=True) final_userid_count_list = final_userid_count_list[0:5] tmp_list.append((k, ','.join([':'.join([u_c[0], str(u_c[1])]) for u_c in final_userid_count_list]))) res = sorted(tmp_list, key = lambda x:x[0], reverse=True) for i in res: print "\t".join(i)