happybase

import json


def get_by_rows(row_key, table_name, cf="f1", columns=None, conn=None):
    table = conn.table(table_name)
    columns = [cf + ":" + i for i in columns]
    hbase_result = table.rows(rows=row_key, columns=columns)
    return hbase_result


def hbase_process(user_set, columns, table_name, conn=None):
    search_rowkeys = [str(item) for item in user_set]
    datas = get_by_rows(row_key=search_rowkeys, table_name=table_name, columns=columns, conn=conn)
    """
    [(b'123', {b'f1:tags': b'{"user_id": 123, "tags": {"tag1": 1, "tag2": 2, "tag3": 2.4, "Comedy": 1.1}}'}), 
    (b'456', {b'f1:tags': b'{"user_id": 456, "tags": {"tag2": 2, "tag3": 2}}', b'f1:tv': b'{"user_id": 456, "tv": {"tv1": 1}}'})]
    """
    if datas:
        for one_record_ in datas:
            one_record = one_record_[1]
            user_id = json.loads(one_record_[0])  # rowkey为userid,rows查询结果为bytes,通过json.loads()转换为python对象
            for user, tag_weight in one_record.items():
                tag_weight = json.loads(
                    tag_weight)  # 这里tag_weight开始的值为: b'{"user_id": 456, "tv": {"tv1": 1}}'} 转换之后变成了字典。
                tag_weight.pop("user_id")
                # 三种情况
                if "tv" in tag_weight:
                    temp = [(k, v) for k, v in tag_weight["tv"].items()]
                    print(temp)


if __name__ == '__main__':

    import happybase
    users = [123, 456]
    tablename = "namespace:my_table"
    connection = happybase.Connection(host="localhost", port=9090, protocol='compact', transport='framed')
    hbase_process(user_set=users,
                  columns=["tv", "tags"],
                  table_name=tablename,
                  conn=connection)
posted @ 2022-12-06 13:43  木叶流云  阅读(26)  评论(0编辑  收藏  举报