通过Python来操作kylin

起因:

老大要求的数据,无法通过kylin里面的SQL查询到,只能通过调用接口来实现需求

 

第一步,安装依赖的包(py2/py3都支持,我这边用的是py2)

pip install kylinpy
pip install sqlalchemy

 

第二步,直接写代码,实现每天日志里面最多的五个用户,通过自修改实现自己的需求

#!/usr/bin/env python
# coding=utf-8

import sqlalchemy as sa

def kylin_query_show(conn_str, query_sql):
    res_dic = {}
    kylin_engine = sa.create_engine(conn_str)
    results = kylin_engine.execute(query_sql)
    for line in results:
        userid = str(line[0]).strip()
        datetime = line[1].strip()
        per_count = line[2]
        if datetime not in res_dic:
            res_dic[datetime] = []
            res_dic[datetime].append((userid, per_count))
        else:
            res_dic[datetime].append((userid, per_count))
    return res_dic

if __name__ == "__main__":
    conn_str = "kylin://ADMIN:KYLIN@ip:7070/project_name?version=v1"
    query_sql = "select  userid, datetime, count(*) c from soda_report group by userid, datetime"
    res_dic = kylin_query_show(conn_str, query_sql)

    tmp_list = []
    
    for k, v in res_dic.items():
        final_userid_count_list = sorted(v, key = lambda x: x[1], reverse=True)
        final_userid_count_list = final_userid_count_list[0:5]
        tmp_list.append((k, ','.join([':'.join([u_c[0], str(u_c[1])]) for u_c in final_userid_count_list])))

    res = sorted(tmp_list, key = lambda x:x[0], reverse=True)

    for i in res:
        print "\t".join(i)

 

posted @ 2018-10-29 12:09  wang_zai  阅读(2244)  评论(0编辑  收藏  举报