通过Java Api与HBase交互

出处:http://www.taobaotest.com/blogs/qa?bid=13894

引言

HBase提供了Java Api的访问接口,掌握这个就跟Java应用使用RDBMS时需要JDBC一样重要,本文将继续前两篇文章中blog表的示例,介绍常用的Api。
练习前的准备工作
  • 创建一个Maven工程,加入以下依赖:
    <dependency>
    <groupId>org.apache.hbase</groupId>
    <artifactId>hbase</artifactId>
    <version>0.90.2</version>
    </dependency>
    如果你的Maven库里还没有hbase,还需要配置下repository
    <repositories>
    <repository>
    <id>cloudera</id>
    <url>https://repository.cloudera.com/content/groups/public</url>
    </repository>
    </repositories>
  • 确保HBase环境已启动且能连接到,将HBase环境的hbase-site.xml文件拷贝到上述工程的src/test/resources目录
    加载配置
    Configuration conf = new Configuration();
    // conf.addResource("hbase-site-cluster.xml");//可以指定文件加载
    conf = HBaseConfiguration.create(conf);
    创建表
    /**========创建表=========*/
    HTableDescriptor desc = new HTableDescriptor("blog");
    desc.addFamily(new HColumnDescriptor("article"));
    desc.addFamily(new HColumnDescriptor("author"));
    admin.createTable(desc );
    增加记录
    /**=========插入数据=========*/
    Put put = new Put(Bytes.toBytes("1"));
    put.add(Bytes.toBytes("article"), Bytes.toBytes("title"), Bytes.toBytes("Head First HBase"));
    put.add(Bytes.toBytes("article"), Bytes.toBytes("content"), Bytes.toBytes("HBase is the Hadoop database. Use it when you need random, realtime read/write access to your Big Data."));
    put.add(Bytes.toBytes("article"), Bytes.toBytes("tags"), Bytes.toBytes("Hadoop,HBase,NoSQL"));
    put.add(Bytes.toBytes("author"), Bytes.toBytes("name"), Bytes.toBytes("hujinjun"));
    put.add(Bytes.toBytes("author"), Bytes.toBytes("nickname"), Bytes.toBytes("一叶渡江"));
    table.put(put);
    知识点回顾:RowKey 和 ColumnName 是二进制值(Java 类型 byte[]),value 是一个字节数组(Java类型 byte[])
    根据RowKey查询
    /**=========根据rowkey
    Get get = new Get(Bytes.toBytes("1")); 查询数据=========*/
    Result result = table.get(get);
    for(KeyValue kv :result.list()){
      System.out.println("family:" +Bytes.toString(kv.getFamily()));
      System.out.println("qualifier:" +Bytes.toString(kv.getQualifier()));
      System.out.println("value:" +Bytes.toString(kv.getValue()));
      System.out.println("Timestamp:" +kv.getTimestamp());
    }
    遍历查询与迭代
    /**=========遍历查询=========*/
    Scan scan = new Scan();
    ResultScanner rs =null;
    try {
      rs = table.getScanner(scan);
      for (Result r : rs) {
        for(KeyValue kv :r.list()){
          System.out.println("family:" +Bytes.toString(kv.getFamily()));
          System.out.println("qualifier:" +Bytes.toString(kv.getQualifier()));
        System.out.println("value:" +Bytes.toString(kv.getValue()));
        }
      }
    } finally {
      rs.close();
    }
    知识点回顾:HTable的存储结构
    可以看到上面代码我们用了两次for循环来遍历迭代。
    更新练习
    /**=========更新=========*/
    //查询更新前的值
    Get get2 = new Get(Bytes.toBytes("1"));
    get2.addColumn(Bytes.toBytes("author"), Bytes.toBytes("nickname"));
    assertThat(Bytes.toString(table.get(get2).list().get(0).getValue()),is("一叶渡江"));
    //更新nickname为yedu
    Put put2 = new Put(Bytes.toBytes("1")); :
    put2.add(Bytes.toBytes("author"), Bytes.toBytes("nickname"), Bytes.toBytes("yedu"));
    table.put(put2);
    //查询更新结果
    Get get3 = new Get(Bytes.toBytes("1"));
    get3.addColumn(Bytes.toBytes("author"), Bytes.toBytes("nickname"));
    assertThat(Bytes.toString(table.get(get3).list().get(0).getValue()),is("yedu"));
    //查询nickname的多个(本示例为2个)版本值
    Get get4 = new Get(Bytes.toBytes("1"));
    get4.addColumn(Bytes.toBytes("author"), Bytes.toBytes("nickname"));
    get4.setMaxVersions(2);
    List results = table.get(get4).list();
    assertThat(results.size(),is(2));
    assertThat(Bytes.toString(results.get(0).getValue()),is("yedu"));
    assertThat(Bytes.toString(results.get(1).getValue()),is("一叶渡江"));
    删除记录
    /**=========删除记录=========*/
    //删除指定column
    Delete deleteColumn = new Delete(Bytes.toBytes("1"));
    deleteColumn.deleteColumns(Bytes.toBytes("author"),Bytes.toBytes("nickname"));
    table.delete(deleteColumn);
    assertThat( table.get(get4).list(),nullValue());
    //删除所有column
    Delete deleteAll = new Delete(Bytes.toBytes("1"));
    table.delete(deleteAll);
    assertThat(table.getScanner(scan).next(),nullValue());
    删除表
    /**=========删除表=========*/
    admin.disableTable("blog");
    admin.deleteTable("blog");
    assertThat(admin.tableExists("blog"),is(false));
    完整代码示例
    public class HBase {
    public static void main(String[] args) throws IOException {
    Configuration conf = new Configuration();
    // conf.addResource("hbase-site-cluster.xml");//指定文件加载
    conf = HBaseConfiguration.create(conf);
    HBaseAdmin admin = new HBaseAdmin(conf);//HBaseAdmin负责跟表相关的操作如create,drop等 

    /**========创建表=========*/
    HTableDescriptor desc = new HTableDescriptor("blog");
    desc.addFamily(new HColumnDescriptor("article"));
    desc.addFamily(new HColumnDescriptor("author"));
    admin.createTable(desc );

    HTable table = new HTable(conf, Bytes.toBytes("blog"));//HTabel负责跟记录相关的操作如增删改查等

    /**=========插入数据=========*/

    Put put = new Put(Bytes.toBytes("1"));
    put.add(Bytes.toBytes("article"), Bytes.toBytes("title"), Bytes.toBytes("Head First HBase"));
    put.add(Bytes.toBytes("article"), Bytes.toBytes("content"), Bytes.toBytes("HBase is the Hadoop database. Use it when you need random, realtime read/write access to your Big Data."));
    put.add(Bytes.toBytes("article"), Bytes.toBytes("tags"), Bytes.toBytes("Hadoop,HBase,NoSQL"));
    put.add(Bytes.toBytes("author"), Bytes.toBytes("name"), Bytes.toBytes("hujinjun"));
    put.add(Bytes.toBytes("author"), Bytes.toBytes("nickname"), Bytes.toBytes("一叶渡江"));
    table.put(put);
    /**=========根据rowkey
    Get get = new Get(Bytes.toBytes("1")); 查询数据=========*/
    Result result = table.get(get);
      for(KeyValue kv :result.list()){
      System.out.println("family:" +Bytes.toString(kv.getFamily()));
      System.out.println("qualifier:" +Bytes.toString(kv.getQualifier()));
      System.out.println("value:" +Bytes.toString(kv.getValue()));
      System.out.println("Timestamp:" +kv.getTimestamp());
    }
    /**=========遍历查询=========*/
    Scan scan = new Scan();
    ResultScanner rs =null;
    try {
      rs = table.getScanner(scan);
      for (Result r : rs) {
        for(KeyValue kv :r.list()){
          System.out.println("family:" +Bytes.toString(kv.getFamily()));
          System.out.println("qualifier:" +Bytes.toString(kv.getQualifier()));
          System.out.println("value:" +Bytes.toString(kv.getValue()));
        }
      }
    } finally {
      rs.close();
    }
    /**=========更新=========*/
    //查询更新前的值
    Get get2 = new Get(Bytes.toBytes("1"));
    get2.addColumn(Bytes.toBytes("author"), Bytes.toBytes("nickname"));
    assertThat(Bytes.toString(table.get(get2).list().get(0).getValue()),is("一叶渡江"));
    //更新nickname为yedu
    Put put2 = new Put(Bytes.toBytes("1")); :
    put2.add(Bytes.toBytes("author"), Bytes.toBytes("nickname"), Bytes.toBytes("yedu"));
    table.put(put2);
    //查询更新结果
    Get get3 = new Get(Bytes.toBytes("1"));
    get3.addColumn(Bytes.toBytes("author"), Bytes.toBytes("nickname"));
    assertThat(Bytes.toString(table.get(get3).list().get(0).getValue()),is("yedu"));
    //查询nickname的多个(本示例为2个)版本值
    Get get4 = new Get(Bytes.toBytes("1"));
    get4.addColumn(Bytes.toBytes("author"), Bytes.toBytes("nickname"));
    get4.setMaxVersions(2);
    List results = table.get(get4).list();
    assertThat(results.size(),is(2));
    assertThat(Bytes.toString(results.get(0).getValue()),is("yedu"));
    assertThat(Bytes.toString(results.get(1).getValue()),is("一叶渡江"));
    /**=========删除记录=========*/
    //删除指定column
    Delete deleteColumn = new Delete(Bytes.toBytes("1"));
    deleteColumn.deleteColumns(Bytes.toBytes("author"),Bytes.toBytes("nickname"));
    table.delete(deleteColumn);
    assertThat( table.get(get4).list(),nullValue());
    //删除所有column
    Delete deleteAll = new Delete(Bytes.toBytes("1"));
    table.delete(deleteAll);
    assertThat(table.getScanner(scan).next(),nullValue());
    /**=========删除表=========*/
    admin.disableTable("blog");
    admin.deleteTable("blog");
    assertThat(admin.tableExists("blog"),is(false));
    }
    }
    小结
    本文介绍了Java api创建、删除表,及记录的增删改查,还是以练习为主,也可作为速查手册(比如看如何迭代查询结果),对HBase的基本概念及操作就介绍到这里,后面将介绍如何使用MapReduce对HBase数据进行分布式计算。
  • posted @ 2014-09-23 00:13  JamesFan  阅读(179)  评论(0编辑  收藏  举报