# requirement.txt
happybase==1.2.0
"""
hbase.py
"""
# -*- coding: utf-8 -*-
import happybase
from config.hbase_config import HAPPYBASE_HBASE
# HAPPYBASE_HBASE = {
# "host": "xxx.xxx.xxx.xxx",
# "port": ???,
# "size": ?,
# }
from tasks.common_constant import log
hbase_pool = happybase.ConnectionPool(**HAPPYBASE_HBASE)
class HbaseCURD():
def __init__(self, hbase_config=None, hbase_pool=None):
self.hbase_config = hbase_config
if self.hbase_config is not None:
self.hbase_pool = happybase.ConnectionPool(**hbase_config)
elif hbase_pool is not None:
self.hbase_pool = hbase_pool
else:
raise Exception('HbaseCURD init error')
def save_to_hbase(self, table_name, data, batch_size=5000, timeout=120):
with self.hbase_pool.connection(timeout) as connection:
if table_name.encode() in connection.tables():
table = connection.table(table_name)
with table.batch(batch_size=batch_size) as bat:
for row_key, kv_pairs in data.items():
bat.put(row_key, kv_pairs)
else:
log.error('save to hbsae fail, hbase table %s not exist' % table_name)
def create_hbase_table(self, table_name, families=None):
with self.hbase_pool.connection() as connection:
if table_name.encode() not in connection.tables():
if families is None:
families = {'families': dict(max_versions=1)}
connection.create_table(table_name, families=families)
else:
log.warning('hbase table %s exist, create table fail' % table_name)
def delete_hbase_table(self, table_name):
with self.hbase_pool.connection() as connection:
if table_name.encode() in connection.tables():
connection.delete_table(table_name, disable=True)
else:
log.warning('hbase table %s not exist, delete table fail' % table_name)
def scan(self, table_name, columns=None, filter=None, limit=None, batch_size=5000, timeout=120):
with self.hbase_pool.connection(timeout) as connection:
if table_name.encode() in connection.tables():
table = connection.table(table_name)
return table.scan(columns=columns, filter=filter, batch_size=batch_size, limit=limit)
else:
log.warning('hbase table %s not exist, get data fail' % table_name)