HBase入门
/×××××××××××××××××××××××××××××××××××××××××/
Author:xxx0624
HomePage:http://www.cnblogs.com/xxx0624/
/×××××××××××××××××××××××××××××××××××××××××/
一 HBase特点
1.HBase表与表之间没有关联查询
查询方法只有三种:
1.1 全表查询
1.2 按照rowkey查询
1.3 rowkey结合range查询
2.一个数据行(row)可以有唯一的row key和任意数量的列
3.表中的数据无char int 等类型之分
二 HBase基本命令
名称
|
命令表达式
|
|
创建表 | create '表名称', '列名称1','列名称2','列名称N' | |
添加记录 | put '表名称', '行名称', '列名称:', '值' | |
查看记录 | get '表名称', '行名称' | |
查看表中的记录总数 | count '表名称' | |
删除记录 | delete '表名' ,'行名称' , '列名称' | |
删除一张表 | 先要屏蔽该表,才能对该表进行删除,第一步 disable '表名称' 第二步 drop '表名称' | |
查看所有记录 | scan "表名称" | |
查看某个表某个列中所有数据 | scan "表名称" , ['列名称:'] | |
更新记录 | 就是重写一遍进行覆盖 |
三 HBase api操作
import java.io.IOException; import java.util.ArrayList; import java.util.List; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.HColumnDescriptor; import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.MasterNotRunningException; import org.apache.hadoop.hbase.ZooKeeperConnectionException; import org.apache.hadoop.hbase.client.Delete; import org.apache.hadoop.hbase.client.Get; import org.apache.hadoop.hbase.client.HBaseAdmin; import org.apache.hadoop.hbase.client.HTable; import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.client.ResultScanner; import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.util.Bytes; public class HbaseTest { private static Configuration conf =null; /** * 初始化配置 */ static { conf = HBaseConfiguration.create(); } /** * 创建一张表 */ public static void creatTable(String tableName, String[] familys) throws Exception { HBaseAdmin admin = new HBaseAdmin(conf); if (admin.tableExists(tableName)) { System.out.println("table already exists!"); } else { HTableDescriptor tableDesc = new HTableDescriptor(tableName); for(int i=0; i<familys.length; i++){ tableDesc.addFamily(new HColumnDescriptor(familys[i])); } admin.createTable(tableDesc); System.out.println("create table " + tableName + " ok."); } } /** * 删除表 */ public static void deleteTable(String tableName) throws Exception { try { HBaseAdmin admin = new HBaseAdmin(conf); admin.disableTable(tableName); admin.deleteTable(tableName); System.out.println("delete table " + tableName + " ok."); } catch (MasterNotRunningException e) { e.printStackTrace(); } catch (ZooKeeperConnectionException e) { e.printStackTrace(); } } /** * 插入一行记录 */ public static void addRecord (String tableName, String rowKey, String family, String qualifier, String value) throws Exception{ try { HTable table = new HTable(conf, tableName); Put put = new Put(Bytes.toBytes(rowKey)); put.add(Bytes.toBytes(family),Bytes.toBytes(qualifier),Bytes.toBytes(value)); table.put(put); System.out.println("insert recored " + rowKey + " to table " + tableName +" ok."); } catch (IOException e) { e.printStackTrace(); } } /** * 删除一行记录 */ public static void delRecord (String tableName, String rowKey) throws IOException{ HTable table = new HTable(conf, tableName); List list = new ArrayList(); Delete del = new Delete(rowKey.getBytes()); list.add(del); table.delete(list); System.out.println("del recored " + rowKey + " ok."); } /** * 查找一行记录 */ public static void getOneRecord (String tableName, String rowKey) throws IOException{ HTable table = new HTable(conf, tableName); Get get = new Get(rowKey.getBytes()); Result rs = table.get(get); for(KeyValue kv : rs.raw()){ System.out.print(new String(kv.getRow()) + " " ); System.out.print(new String(kv.getFamily()) + ":" ); System.out.print(new String(kv.getQualifier()) + " " ); System.out.print(kv.getTimestamp() + " " ); System.out.println(new String(kv.getValue())); } } /** * 显示所有数据 */ public static void getAllRecord (String tableName) { try{ HTable table = new HTable(conf, tableName); Scan s = new Scan(); ResultScanner ss = table.getScanner(s); for(Result r:ss){ for(KeyValue kv : r.raw()){ System.out.print(new String(kv.getRow()) + " "); System.out.print(new String(kv.getFamily()) + ":"); System.out.print(new String(kv.getQualifier()) + " "); System.out.print(kv.getTimestamp() + " "); System.out.println(new String(kv.getValue())); } } } catch (IOException e){ e.printStackTrace(); } } public static void main (String [] agrs) { try { String tablename = "scores"; String[] familys = {"grade", "course"}; HbaseTest.creatTable(tablename, familys); //add record zkb HbaseTest.addRecord(tablename,"zkb","grade","","哈哈"); HbaseTest.addRecord(tablename,"zkb","course","","90"); HbaseTest.addRecord(tablename,"zkb","course","math","97"); HbaseTest.addRecord(tablename,"zkb","course","art","87"); //add record baoniu HbaseTest.addRecord(tablename,"baoniu","grade","","4"); HbaseTest.addRecord(tablename,"baoniu","course","math","89"); System.out.println("===========get one record========"); HbaseTest.getOneRecord(tablename, "zkb"); System.out.println("===========show all record========"); HbaseTest.getAllRecord(tablename); System.out.println("===========del one record========"); HbaseTest.delRecord(tablename, "baoniu"); HbaseTest.getAllRecord(tablename); System.out.println("===========show all record========"); HbaseTest.getAllRecord(tablename); } catch (Exception e) { e.printStackTrace(); } } }
四 HBase运行参数解释
hbase.client.write.buffer
描述:设置写入数据的缓冲区大小. 当服务器和客户端传送数据时,服务器为了提高运行性能开了一个缓冲区来处理数据.
hbase.master.meta.thread.rescanfrequency
描述:设置HMaster多长时间对系统表root和meta扫描一次. 设置时间长一点,可以降低系统能耗.
hbase.hregion.max.filesize
描述:HRegion中的HStoreFile最大值,任何表中的列族一旦超过这个大小就会被切分.
hbase.zookeeper.property.maxClientCnxns
描述:这个配置来自于zookeeper,表示zookeeper客户端同时访问服务器端时候的并发连接数. zookeeper对于HBase来说就是一个入口,这个参数可以稍微设置大一点.
五 HBase读数据&写数据
读数据:优先读取HMemcache中的数据,若不存在再到HStore中读取,提高读取性能
写数据:写数据会写到HMemcache和Hlog中,HMemcache建立缓存,Hlog同步HMemcache和HStore的事物日志. 当Flush Cache时,数据持久化到HStore中,并清空HMemcache.
举例建表
Table | Row Key | Family | Attributes |
website | Row_ID | info: | URL,Title,Time,Introduce |
text: | No column key | ||
some_fa_1: | |||
some_fa_2: | |||
some_fa_3: |
上图中website表中我们定义了info列簇,其中包含4个属性,分表代表web的URL地址,web的标题,web的最新更新时间,web的简介
text列簇无属性,则可以存储web的详细内容
some_fa_1/2/3 可选(解释同text列簇)
其中:当你想在info中添加属性是,则可以在添加命令中info:xxx即可.体现出HBase的可伸缩性
具体操作如下:
============================创建website表========================= create 'website', 'info','text','some_fa_1','some_fa_2','some_fa_3'
============================插入信息========================= put 'website', '1', 'info:title', 'this is website's title'
put 'website', '1', 'info:time', '2015/02/26'
put 'website', '1', 'info:url', 'http://www.cnblogs.com/xxx0624/'
put 'website', '1', 'info:introduce', 'this is website's introduce'
put 'website', '1', 'text:', 'this is xxx0624'
(剩下的some_fa_1/2/3可以自行参考text添加)
============================读取信息=========================
scan 'website',{COLUMNS=>['text:','info:title'] } 列举出web的内容和标题
get 'website','1' 列出web的row ID 等于1的数据
get 'website','1', {COLUMN => 'info'} 列出web的row ID 等于1的 info
详细可以参考:http://www.uml.org.cn/sjjm/201212141.asp