Hbase概述以及完成数据库的一般操作
Hbase是以hdfs为数据储存的,一种分布式、可扩展的NoSQL数据库
本质为一个稀疏的、分布式的、持久的多维排序map
最基本的结构为rowKey和columnFamily
一条数据需要rowKey,columnFamliy,columnName三个才能确定唯一的值
逻辑结构
hbase
列族1 | 列族2 | ||||
Row Key | 字段1 | 字段2 | 字段3 | 字段4 | 字段5 |
row key1 | |||||
row key2 |
每一行数据都有一个row key,按照字典顺序排列
每几列属于一个列族
行拆分按照row key,按行分类不同的数据;列拆分按照列族,按列分类不同的数据,不同列族的为一种数据
物理结构
K-V
Row Key | Column Family | Column Qualifier | Time Stamp | Type | Value |
Row Key:储存每一个row key
Column Family:储存对应的列族
Column Qualifier:储存改列族的列
Timestamp:时间戳,对于相同的数据,我们会比较时间戳,更新为新的数据
Type:删除操作的类型
Value:储存的值
数据模型
命名空间:Name Space
hbase的命令空间对应关系型数据库的database,每个命名空间下有多张表
表:Table
行:Row
列:Column
时间戳:Time Stamp
唯一单元:Cell,上面一行K-V对应一个Cell
高可用的Hbase
一个Alive Master,多个Backup的Master,由zookeeper管理,每个节点都有一个Region Server
一般Master设置再namenode上面
一张表根据row key和列族的划分,划分给不同的Region Server进行管理
cell是最小单元,对应的是某一行,某一列族,某一列的数据
Hbase Shell操作
一般为命名空间:表来定位一张表
namespace:
alter_namespace:修改namespace
create_namespace:创建namespace
describe_namespace "namespace_name":查看指定的namespace
drop_namespace:删除namespace
list_namespace:查看所有的namespace
list_namespace_tables "namespace_name":查看指定namespace下的所有表
table:
create "table_name","columnFamily1","columnFamily2"... :创建表,第一个为表名,后面全是列族名,默认为default下
create "namespace_name:table_name", {NAME =>"columnFamily1" , VERSIONS => 5......},{....}...::创建表的详细语法
alter "namespace_name:table_name" , {NAME =>"columnFamily1",...},.....:修改表格
disable "tableName":禁用表
enable 'tableName":启用表
drop "tableName" :删除表,注意只能删除禁用的表,因此需要先禁用该表才能删除
put "namespace_name:table_name","rowkey","columnFamily:columnName","value":添加数据,一条语句只能插入一个值,如果rowkey和columnFamily1:字段名相同则会覆盖
get "namespace_name:table_name","rowkey",{COLUMN=>["columnFamily:columnName",....]}:查看某一行数据,只能通过rowkey查找,后面可以省略,默认为返回一行所有数据
scan "namespace_name:table_name":扫描一张表,返回所有数据
delete "namespace_name:table_name", 'rowkey', 'columnFamily:columnName' :删除指定单元格,不填就是删除整行,后面可以再带个时间戳为删除指定版本的
一个数据由命名空间,表,行号,列族,字段名一起确定
HbaseAPI
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-client</artifactId>
<version>2.5.5</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.hbase/hbase-server -->
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-server</artifactId>
<version>2.5.5</version>
</dependency>
package org.example.Hbase.utils;
import lombok.SneakyThrows;
import org.apache.hadoop.hbase.*;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.filter.ColumnValueFilter;
import org.apache.hadoop.hbase.filter.FilterList;
import org.apache.hadoop.hbase.filter.SingleColumnValueFilter;
import org.apache.hadoop.hbase.util.Bytes;
public class HbaseUtils {
private final Connection connection;
private final Admin admin;
//创建连接
@SneakyThrows
public HbaseUtils() {
connection=ConnectionFactory.createConnection();
admin=connection.getAdmin();
}
//创建命名空间
@SneakyThrows
public void createNamespace(String nameSpace){
NamespaceDescriptor database = NamespaceDescriptor.create(nameSpace).build();
admin.createNamespace(database);
}
//删除命名空间
@SneakyThrows
public void deleteNamespace(String nameSpace){
NamespaceDescriptor database = NamespaceDescriptor.create(nameSpace).build();
if(admin.listTableNames().length>0){
System.out.println("命名空间不为空,将删除所有包含的表格");
for(TableName tableName:admin.listTableNames()){
admin.disableTable(tableName);
admin.deleteTable(tableName);
}
}
admin.deleteNamespace(nameSpace);
}
//判断表的存在
@SneakyThrows
public boolean isTable(String nameSpace,String tableName){
return admin.tableExists(TableName.valueOf(nameSpace,tableName));
}
//创建表
@SneakyThrows
public void createTable(String nameSpace,String tableName,String... columnFamilies){
if(isTable(nameSpace,tableName)){
System.out.println("表已经存在");
return;
}
TableDescriptorBuilder tableDescriptorBuilder=TableDescriptorBuilder.newBuilder(TableName.valueOf(nameSpace,tableName));
for(String columnFamily:columnFamilies){
ColumnFamilyDescriptorBuilder columnFamilyDescriptorBuilder=ColumnFamilyDescriptorBuilder.newBuilder(Bytes.toBytes(columnFamily));
columnFamilyDescriptorBuilder.setMaxVersions(5);
tableDescriptorBuilder.setColumnFamily(columnFamilyDescriptorBuilder.build());
}
admin.createTable(tableDescriptorBuilder.build());
if(isTable(nameSpace,tableName)){
System.out.println("表创建成功");
}
else {
System.out.println("表创建失败");
}
}
//删除表
@SneakyThrows
public void deleteTable(String nameSpace,String tableName){
if(!isTable(nameSpace,tableName)){
System.out.println("表不存在,无法删除");
}
TableName tablename=TableName.valueOf(nameSpace,tableName);
admin.disableTable(tablename);
admin.deleteTable(tablename);
if(isTable(nameSpace,tableName)){
System.out.println("删除失败");
}
else {
System.out.println("删除成功");
}
}
//插入数据
@SneakyThrows
public void putCell(String nameSpace,String tableName,String rowKey,String columnFamily,String columnName,String value){
if(!isTable(nameSpace,tableName)){
System.out.println("表不存在,无法插入数据");
}
Table table=connection.getTable(TableName.valueOf(nameSpace,tableName));
Put put=new Put(Bytes.toBytes(rowKey));
put.addColumn(Bytes.toBytes(columnFamily),Bytes.toBytes(columnName),Bytes.toBytes(value));
table.put(put);
table.close();
}
//读取数据
@SneakyThrows
public void getCell(String nameSpace, String tableName, String rowKey, String columnFamily, String columnName){
if(!isTable(nameSpace,tableName)){
System.out.println("表不存在,无法读取数据");
}
Table table=connection.getTable(TableName.valueOf(nameSpace,tableName));
Get get=new Get(Bytes.toBytes(rowKey));
Result result=null;
System.out.println("Row Key FamilyName ColumnName Timestamp Value");
//读取某一行
if(columnFamily!=null){
//读取指定列族
get.addFamily(Bytes.toBytes(columnFamily));
if(columnName!=null){
//读取指定列
get.addColumn(Bytes.toBytes(columnFamily),Bytes.toBytes(columnName));
}
}
result=table.get(get);
Cell[] cells=result.rawCells();
for(Cell cell:cells){
String row=new String(result.getRow());
String family=new String(CellUtil.cloneFamily(cell));
String qualifier=new String(CellUtil.cloneQualifier(cell));
long timestamp=cell.getTimestamp();
String value=new String(CellUtil.cloneValue(cell));
System.out.println(row+" "+family+" "+qualifier+" "+timestamp+" "+value);
}
table.close();
}
//扫描表
@SneakyThrows
public void scanTable(String nameSpace,String tableName,String startRow,String stopRow,String familyName,String columnName,String valueName,boolean flag){
Table table=connection.getTable(TableName.valueOf(nameSpace,tableName));
Scan scan=new Scan();
//指定范围
if(startRow!=null) scan.withStartRow(startRow.getBytes());
if(stopRow!=null) scan.withStopRow(stopRow.getBytes());
//过滤数据
//两种过滤方式,第一种只保留对应的列,第二种保留对应列所在的行
//如果表中某一行数据没有过滤器指定的列也会被保留
FilterList filterList=new FilterList();
if(familyName!=null && columnName!=null && valueName !=null && !flag){
ColumnValueFilter columnValueFilter=new ColumnValueFilter(familyName.getBytes(),columnName.getBytes(), CompareOperator.EQUAL,valueName.getBytes());
filterList.addFilter(columnValueFilter);
scan.setFilter(filterList);
}
if(familyName!=null && columnName!=null && valueName !=null && flag){
SingleColumnValueFilter columnValueFilter=new SingleColumnValueFilter(familyName.getBytes(),columnName.getBytes(), CompareOperator.EQUAL,valueName.getBytes());
filterList.addFilter(columnValueFilter);
scan.setFilter(filterList);
}
ResultScanner scanner=table.getScanner(scan);
for(Result result:scanner){
String row=new String(result.getRow());
System.out.println("===========================================");
System.out.println("行号:"+row);
Cell[] cells=result.rawCells();
for(Cell cell:cells){
String family=new String(CellUtil.cloneFamily(cell));
String qualifier=new String(CellUtil.cloneQualifier(cell));
long timestamp=cell.getTimestamp();
String value=new String(CellUtil.cloneValue(cell));
System.out.println("列族:"+family+" 列字段:"+qualifier+" 时间戳:"+timestamp+" 值:"+value);
}
}
scanner.close();
table.close();
}
//删除数据
@SneakyThrows
public void deleteColumn(String nameSpace,String tableName,String rowKey,String columnFamily,String columnName) {
Table table = connection.getTable(TableName.valueOf(nameSpace, tableName));
Delete delete = new Delete(rowKey.getBytes());
//删除整行
if (columnFamily!=null){
//删除指定列族下的所有列
delete.addFamily(columnFamily.getBytes());
if(columnName!=null){
//删除某一列
delete.addColumns(columnFamily.getBytes(),columnName.getBytes());
}
}
table.delete(delete);
table.close();
}
//关闭连接
@SneakyThrows
public void closeConnection(){
if(admin!=null) admin.close();
if(connection!=null) connection.close();
}
}
package org.example.Hbase;
import lombok.SneakyThrows;
import org.example.Hbase.utils.HbaseUtils;
public class HbaseTest {
@SneakyThrows
public static void main(String[] args) {
HbaseUtils hbaseUtils=new HbaseUtils();
//创建school的命名空间
hbaseUtils.createNamespace("school");
//创建表student在school下,包含info,score,teacher三个列族
hbaseUtils.createTable("school","student","info","score","teacher");
//插入五个学生的数据
//学生:java
hbaseUtils.putCell("school","student","1001","info","name","java");
hbaseUtils.putCell("school","student","1001","info","age","20");
hbaseUtils.putCell("school","student","1001","info","sex","男");
hbaseUtils.putCell("school","student","1001","score","离散数学","90");
hbaseUtils.putCell("school","student","1001","score","高等数学","80");
hbaseUtils.putCell("school","student","1001","score","数学分析","70");
hbaseUtils.putCell("school","student","1001","teacher","name","张三");
hbaseUtils.putCell("school","student","1001","teacher","hobby","篮球");
//学生:python
hbaseUtils.putCell("school","student","1002","info","name","python");
hbaseUtils.putCell("school","student","1002","info","age","18");
hbaseUtils.putCell("school","student","1002","info","sex","男");
hbaseUtils.putCell("school","student","1002","score","高级英语","90");
hbaseUtils.putCell("school","student","1002","score","英语口语","80");
hbaseUtils.putCell("school","student","1002","score","英语提高","70");
hbaseUtils.putCell("school","student","1002","teacher","name","李四");
hbaseUtils.putCell("school","student","1002","teacher","age","40");
//学生:c++
hbaseUtils.putCell("school","student","1003","info","name","c++");
hbaseUtils.putCell("school","student","1003","info","age","20");
hbaseUtils.putCell("school","student","1003","info","sex","男");
hbaseUtils.putCell("school","student","1003","score","高级英语","90");
hbaseUtils.putCell("school","student","1003","score","高等数学","80");
hbaseUtils.putCell("school","student","1003","score","近代史纲要","70");
hbaseUtils.putCell("school","student","1003","teacher","name","王五");
//学生:scala
hbaseUtils.putCell("school","student","1004","info","name","scala");
hbaseUtils.putCell("school","student","1004","info","age","19");
hbaseUtils.putCell("school","student","1004","info","sex","男");
hbaseUtils.putCell("school","student","1004","score","线性代数","90");
hbaseUtils.putCell("school","student","1004","score","计算机网络","80");
//学生:html
hbaseUtils.putCell("school","student","1005","info","name","html");
hbaseUtils.putCell("school","student","1005","info","age","20");
hbaseUtils.putCell("school","student","1005","info","sex","男");
//扫描表
hbaseUtils.scanTable("school","student",null,null,null,null,null,false);
//获取c++的个人信息
hbaseUtils.getCell("school","student","1003","info",null);
//获取java的所有分数
hbaseUtils.getCell("school","student","1001","score",null);
//删除python的英语口语分数
hbaseUtils.deleteColumn("school","student","1002","score","英语口语");
//关闭连接
hbaseUtils.closeConnection();
}
}
总结可以得到
hbase建表的时候需要指定命名空间,表名和列族
不同于关系型数据库,hbase中不同的行包含的东西可以不一样包括列族和列的数量
查找数据一般是指定到某一个数据
(namespace,tablename,rowkey,columnFamily,columnName)=>Key