(转)Java操作Hbase进行建表、删表以及对数据进行增删改查，条件查询

1、搭建环境

新建JAVA项目，添加的包有:

有关Hadoop的hadoop-core-0.20.204.0.jar

有关Hbase的hbase-0.90.4.jar、hbase-0.90.4-tests.jar以及Hbase资源包中lib目录下的所有jar包

2、主要程序

  1 package com.wujintao.hbase.test;
  2 
  3 import java.io.IOException;
  4 import java.util.ArrayList;
  5 import java.util.List;
  6 
  7 import org.apache.hadoop.conf.Configuration;
  8 import org.apache.hadoop.hbase.HBaseConfiguration;
  9 import org.apache.hadoop.hbase.HColumnDescriptor;
 10 import org.apache.hadoop.hbase.HTableDescriptor;
 11 import org.apache.hadoop.hbase.KeyValue;
 12 import org.apache.hadoop.hbase.MasterNotRunningException;
 13 import org.apache.hadoop.hbase.ZooKeeperConnectionException;
 14 import org.apache.hadoop.hbase.client.Delete;
 15 import org.apache.hadoop.hbase.client.Get;
 16 import org.apache.hadoop.hbase.client.HBaseAdmin;
 17 import org.apache.hadoop.hbase.client.HTable;
 18 import org.apache.hadoop.hbase.client.HTablePool;
 19 import org.apache.hadoop.hbase.client.Put;
 20 import org.apache.hadoop.hbase.client.Result;
 21 import org.apache.hadoop.hbase.client.ResultScanner;
 22 import org.apache.hadoop.hbase.client.Scan;
 23 import org.apache.hadoop.hbase.filter.Filter;
 24 import org.apache.hadoop.hbase.filter.FilterList;
 25 import org.apache.hadoop.hbase.filter.SingleColumnValueFilter;
 26 import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp;
 27 import org.apache.hadoop.hbase.util.Bytes;
 28 
 29 public class JinTaoTest {
 30 
 31     public static Configuration configuration;
 32     static {
 33         configuration = HBaseConfiguration.create();
 34         configuration.set("hbase.zookeeper.property.clientPort", "2181");
 35         configuration.set("hbase.zookeeper.quorum", "192.168.1.100");
 36         configuration.set("hbase.master", "192.168.1.100:600000");
 37     }
 38 
 39     public static void main(String[] args) {
 40         // createTable("wujintao");
 41         // insertData("wujintao");
 42         // QueryAll("wujintao");
 43         // QueryByCondition1("wujintao");
 44         // QueryByCondition2("wujintao");
 45         //QueryByCondition3("wujintao");
 46         //deleteRow("wujintao","abcdef");
 47         deleteByCondition("wujintao","abcdef");
 48     }
 49 
 50     /**
 51      * 创建表
 52      * @param tableName
 53      */
 54     public static void createTable(String tableName) {
 55         System.out.println("start create table ......");
 56         try {
 57             HBaseAdmin hBaseAdmin = new HBaseAdmin(configuration);
 58             if (hBaseAdmin.tableExists(tableName)) {// 如果存在要创建的表，那么先删除，再创建
 59                 hBaseAdmin.disableTable(tableName);
 60                 hBaseAdmin.deleteTable(tableName);
 61                 System.out.println(tableName + " is exist,detele....");
 62             }
 63             HTableDescriptor tableDescriptor = new HTableDescriptor(tableName);
 64             tableDescriptor.addFamily(new HColumnDescriptor("column1"));
 65             tableDescriptor.addFamily(new HColumnDescriptor("column2"));
 66             tableDescriptor.addFamily(new HColumnDescriptor("column3"));
 67             hBaseAdmin.createTable(tableDescriptor);
 68         } catch (MasterNotRunningException e) {
 69             e.printStackTrace();
 70         } catch (ZooKeeperConnectionException e) {
 71             e.printStackTrace();
 72         } catch (IOException e) {
 73             e.printStackTrace();
 74         }
 75         System.out.println("end create table ......");
 76     }
 77 
 78     /**
 79      * 插入数据
 80      * @param tableName
 81      */
 82     public static void insertData(String tableName) {
 83         System.out.println("start insert data ......");
 84         HTablePool pool = new HTablePool(configuration, 1000);
 85         HTable table = (HTable) pool.getTable(tableName);
 86         Put put = new Put("112233bbbcccc".getBytes());// 一个PUT代表一行数据，再NEW一个PUT表示第二行数据,每行一个唯一的ROWKEY，此处rowkey为put构造方法中传入的值
 87         put.add("column1".getBytes(), null, "aaa".getBytes());// 本行数据的第一列
 88         put.add("column2".getBytes(), null, "bbb".getBytes());// 本行数据的第三列
 89         put.add("column3".getBytes(), null, "ccc".getBytes());// 本行数据的第三列
 90         try {
 91             table.put(put);
 92         } catch (IOException e) {
 93             e.printStackTrace();
 94         }
 95         System.out.println("end insert data ......");
 96     }
 97 
 98     /**
 99      * 删除一张表
100      * @param tableName
101      */
102     public static void dropTable(String tableName) {
103         try {
104             HBaseAdmin admin = new HBaseAdmin(configuration);
105             admin.disableTable(tableName);
106             admin.deleteTable(tableName);
107         } catch (MasterNotRunningException e) {
108             e.printStackTrace();
109         } catch (ZooKeeperConnectionException e) {
110             e.printStackTrace();
111         } catch (IOException e) {
112             e.printStackTrace();
113         }
114 
115     }
116     /**
117      * 根据 rowkey删除一条记录
118      * @param tablename
119      * @param rowkey
120      */
121      public static void deleteRow(String tablename, String rowkey)  {
122         try {
123             HTable table = new HTable(configuration, tablename);
124             List list = new ArrayList();
125             Delete d1 = new Delete(rowkey.getBytes());
126             list.add(d1);
127             
128             table.delete(list);
129             System.out.println("删除行成功!");
130             
131         } catch (IOException e) {
132             e.printStackTrace();
133         }
134         
135 
136     }
137 
138      /**
139       * 组合条件删除
140       * @param tablename
141       * @param rowkey
142       */
143      public static void deleteByCondition(String tablename, String rowkey)  {
144             //目前还没有发现有效的API能够实现 根据非rowkey的条件删除 这个功能能，还有清空表全部数据的API操作
145 
146     }
147 
148 
149     /**
150      * 查询所有数据
151      * @param tableName
152      */
153     public static void QueryAll(String tableName) {
154         HTablePool pool = new HTablePool(configuration, 1000);
155         HTable table = (HTable) pool.getTable(tableName);
156         try {
157             ResultScanner rs = table.getScanner(new Scan());
158             for (Result r : rs) {
159                 System.out.println("获得到rowkey:" + new String(r.getRow()));
160                 for (KeyValue keyValue : r.raw()) {
161                     System.out.println("列：" + new String(keyValue.getFamily())
162                             + "====值:" + new String(keyValue.getValue()));
163                 }
164             }
165         } catch (IOException e) {
166             e.printStackTrace();
167         }
168     }
169 
170     /**
171      * 单条件查询,根据rowkey查询唯一一条记录
172      * @param tableName
173      */
174     public static void QueryByCondition1(String tableName) {
175 
176         HTablePool pool = new HTablePool(configuration, 1000);
177         HTable table = (HTable) pool.getTable(tableName);
178         try {
179             Get scan = new Get("abcdef".getBytes());// 根据rowkey查询
180             Result r = table.get(scan);
181             System.out.println("获得到rowkey:" + new String(r.getRow()));
182             for (KeyValue keyValue : r.raw()) {
183                 System.out.println("列：" + new String(keyValue.getFamily())
184                         + "====值:" + new String(keyValue.getValue()));
185             }
186         } catch (IOException e) {
187             e.printStackTrace();
188         }
189     }
190 
191     /**
192      * 单条件按查询，查询多条记录
193      * @param tableName
194      */
195     public static void QueryByCondition2(String tableName) {
196 
197         try {
198             HTablePool pool = new HTablePool(configuration, 1000);
199             HTable table = (HTable) pool.getTable(tableName);
200             Filter filter = new SingleColumnValueFilter(Bytes
201                     .toBytes("column1"), null, CompareOp.EQUAL, Bytes
202                     .toBytes("aaa")); // 当列column1的值为aaa时进行查询
203             Scan s = new Scan();
204             s.setFilter(filter);
205             ResultScanner rs = table.getScanner(s);
206             for (Result r : rs) {
207                 System.out.println("获得到rowkey:" + new String(r.getRow()));
208                 for (KeyValue keyValue : r.raw()) {
209                     System.out.println("列：" + new String(keyValue.getFamily())
210                             + "====值:" + new String(keyValue.getValue()));
211                 }
212             }
213         } catch (Exception e) {
214             e.printStackTrace();
215         }
216 
217     }
218 
219     /**
220      * 组合条件查询
221      * @param tableName
222      */
223     public static void QueryByCondition3(String tableName) {
224 
225         try {
226             HTablePool pool = new HTablePool(configuration, 1000);
227             HTable table = (HTable) pool.getTable(tableName);
228 
229             List<Filter> filters = new ArrayList<Filter>();
230 
231             Filter filter1 = new SingleColumnValueFilter(Bytes
232                     .toBytes("column1"), null, CompareOp.EQUAL, Bytes
233                     .toBytes("aaa"));
234             filters.add(filter1);
235 
236             Filter filter2 = new SingleColumnValueFilter(Bytes
237                     .toBytes("column2"), null, CompareOp.EQUAL, Bytes
238                     .toBytes("bbb"));
239             filters.add(filter2);
240 
241             Filter filter3 = new SingleColumnValueFilter(Bytes
242                     .toBytes("column3"), null, CompareOp.EQUAL, Bytes
243                     .toBytes("ccc"));
244             filters.add(filter3);
245 
246             FilterList filterList1 = new FilterList(filters);
247 
248             Scan scan = new Scan();
249             scan.setFilter(filterList1);
250             ResultScanner rs = table.getScanner(scan);
251             for (Result r : rs) {
252                 System.out.println("获得到rowkey:" + new String(r.getRow()));
253                 for (KeyValue keyValue : r.raw()) {
254                     System.out.println("列：" + new String(keyValue.getFamily())
255                             + "====值:" + new String(keyValue.getValue()));
256                 }
257             }
258             rs.close();
259 
260         } catch (Exception e) {
261             e.printStackTrace();
262         }
263 
264     }
265 
266 }

点击查看

注意：可能大家没看到更新数据的操作，其实更新的操作跟添加完全一致，只不过是添加呢rowkey不存在，更新呢rowkey已经存在，并且timstamp相同的情况下，还有就是目前好像还没办法实现hbase数据的分页查询，不知道有没有人知道怎么做

HBase性能优化建议：

针对前面的代码，有很多不足之处，在此我就不修改上面的代码了，只是提出建议的地方，大家自己加上

1)配置

当你调用create方法时将会加载两个配置文件:hbase-default.xml and hbase-site.xml,利用的是当前的java类路径，代码中configuration设置的这些配置将会覆盖hbase-default.xml和hbase-site.xml中相同的配置,如果两个配置文件都存在并且都设置好了相应参上面的属性下面的属性即可

2)关于建表

public void createTable(HTableDescriptor desc)

HTableDescriptor 代表的是表的schema, 提供的方法中比较有用的有

setMaxFileSize，指定最大的region size

setMemStoreFlushSize 指定memstore flush到HDFS上的文件大小

增加family通过 addFamily方法

public void addFamily(final HColumnDescriptor family)

HColumnDescriptor代表的是column的schema，提供的方法比较常用的有

setTimeToLive:指定最大的TTL,单位是ms,过期数据会被自动删除。

setInMemory:指定是否放在内存中，对小表有用，可用于提高效率。默认关闭

setBloomFilter:指定是否使用BloomFilter,可提高随机查询效率。默认关闭

setCompressionType:设定数据压缩类型。默认无压缩。

setMaxVersions:指定数据最大保存的版本个数。默认为3。

注意的是，一般我们不去setInMemory为true,默认是关闭的

3)关于入库

官方建议

table.setAutoFlush(false); //数据入库之前先设置此项为false

table.setflushCommits();//入库完成后，手动刷入数据

注意：

在入库过程中，put.setWriteToWAL(true/flase);

关于这一项如果不希望大量数据在存储过程中丢失，建议设置为true,如果仅是在测试演练阶段，为了节省入库时间建议设置为false

4)关于获取表实例

HTablePool pool = new HTablePool(configuration, Integer.MAX_VALUE);

HTable table = (HTable) pool.getTable(tableName);

建议用表连接池的方式获取表，具体池有什么作用，我想用过数据库连接池的同学都知道，我就不再重复

不建议使用new HTable(configuration,tableName);的方式获取表

5)关于查询

建议每个查询语句都放入try catch语句块，并且finally中要进行关闭ResultScanner实例以及将不使用的表重新放入到HTablePool中的操作，具体做法如下

 1 public static void QueryAll(String tableName) {
 2         HTablePool pool = new HTablePool(configuration, Integer.MAX_VALUE);
 3         HTable table = null;
 4         ResultScanner rs = null;
 5         try {
 6             Scan scan = new Scan();
 7             table = (HTable) pool.getTable(tableName);
 8             rs = table.getScanner(scan);
 9             for (Result r : rs) {
10                 System.out.println("获得到rowkey:" + new String(r.getRow()));
11                 for (KeyValue keyValue : r.raw()) {
12                     System.out.println("列：" + new String(keyValue.getFamily())
13                             + "====值:" + new String(keyValue.getValue()));
14                 }
15             }
16         } catch (IOException e) {
17             e.printStackTrace();
18         }finally{
19             rs.close();// 最后还得关闭
20             pool.putTable(table); //实际应用过程中，pool获取实例的方式应该抽取为单例模式的，不应在每个方法都重新获取一次(单例明白？就是抽取到专门获取pool的逻辑类中，具体逻辑为如果pool存在着直接使用，如果不存在则new)
21         }
22     }

点击查看

所以，以上代码有缺陷的地方，感兴趣的同学可以针对优化建议作出相应修改

(备注：原文链接:http://javacrazyer.iteye.com/blog/1186881)

posted @ 2015-07-29 16:23 cyt025 阅读(363) 评论(0) 收藏举报

刷新页面返回顶部

cyt025

(转)Java操作Hbase进行建表、删表以及对数据进行增删改查，条件查询

公告