用Java操纵HBase数据库(新建表,插入,删除,查找)
java代码如下:
package db.insert; /* * 创建一个students表,并进行相关操作 */ import java.io.IOException; import java.io.Serializable; import java.util.ArrayList; import java.util.List; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.HColumnDescriptor; import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.MasterNotRunningException; import org.apache.hadoop.hbase.ZooKeeperConnectionException; import org.apache.hadoop.hbase.client.Delete; import org.apache.hadoop.hbase.client.Get; import org.apache.hadoop.hbase.client.HBaseAdmin; import org.apache.hadoop.hbase.client.HTable; import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.client.ResultScanner; import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.io.compress.Compression.Algorithm; import org.apache.hadoop.hbase.util.Bytes; public class HBaseDBDao implements Serializable{ /** * */ private static final long serialVersionUID = 1L; // 声明静态配置 private static Configuration conf = null; private static HBaseAdmin hAdmin; private static HTable table; public HBaseDBDao(){ conf = HBaseConfiguration.create(); conf.set("hbase.zookeeper.quorum", "192.168.1.154"); conf.set("hbase.zookeeper.property.clientPort", "2181"); conf.set("hbase.master", "192.168.1.154:6000"); try { //初始化数据库管理员 hAdmin = new HBaseAdmin(conf); } catch (MasterNotRunningException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (ZooKeeperConnectionException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } //初始化HTable,在插入数据,查询数据之前必须先初始化 public void initHTable(String tableName) throws IOException{ table = new HTable(conf, tableName); } //判断表是否存在 public boolean isExist(String tableName) throws IOException { return hAdmin.tableExists(tableName); } // 创建数据库表 public void createTable(String tableName, String[] columnFamilys) throws Exception { if (hAdmin.tableExists(tableName)) { System.out.println("表 "+tableName+" 已存在!"); System.exit(0); } else { // 新建一个表的描述 HTableDescriptor tableDesc = new HTableDescriptor(tableName); // 在描述里添加列族 for (String columnFamily : columnFamilys) {// 新建一个列的描述 HColumnDescriptor hcd = new HColumnDescriptor(columnFamily); // 在列描述中设置Compression压缩格式 hcd.setCompressionType(Algorithm.LZO); tableDesc.addFamily(hcd); } // 根据配置好的描述建表 hAdmin.createTable(tableDesc); System.out.println("创建表 "+tableName+" 成功!"); } } // 删除数据库表 public void deleteTable(String tableName) throws Exception { if (hAdmin.tableExists(tableName)) { // 关闭一个表 hAdmin.disableTable(tableName); hAdmin.deleteTable(tableName); System.out.println("删除表 "+tableName+" 成功!"); } else { System.out.println("删除的表 "+tableName+" 不存在!"); System.exit(0); } } // 批量添加数据 public void addRowBatch(String tableName, String row, String columnFamily, String column, String value) throws Exception { //将自动提交关闭,如果不关闭,每写一条数据都会进行提交,是导入数据较慢的主要因素 table.setAutoFlush(false, false); //设置缓存大小,当缓存大于设置值时,hbase会自动提交。此处可自己尝试大小,一般对大数据量,设置为5M即可,本文设置为3M。 table.setWriteBufferSize(5*1024*1024); Put put = new Put(Bytes.toBytes(row));// 指定行 // 参数分别:列族、列、值 put.add(Bytes.toBytes(columnFamily), Bytes.toBytes(column), Bytes.toBytes(value)); table.put(put); } //执行flushCommits()操作,防止最后一次数据丢失(尤其是在spark中一个分片结束时) public void flushCommits(String tableName) throws Exception { table.flushCommits(); } //添加一条数据 public void addRow(String tableName, String row, String columnFamily, String column, String value) throws Exception { Put put = new Put(Bytes.toBytes(row));// 指定行 // 参数分别:列族、列、值 put.add(Bytes.toBytes(columnFamily), Bytes.toBytes(column), Bytes.toBytes(value)); table.put(put); } // 删除一条(行)数据 public static void delRow(String tableName, String row) throws Exception { Delete del = new Delete(Bytes.toBytes(row)); table.delete(del); } // 删除多条数据 public void delMultiRows(String tableName, String[] rows) throws Exception { List<Delete> delList = new ArrayList<Delete>(); for (String row : rows) { Delete del = new Delete(Bytes.toBytes(row)); delList.add(del); } table.delete(delList); } // 获取一条数据 public void getRow(String tableName, String row) throws Exception { Get get = new Get(Bytes.toBytes(row)); Result result = table.get(get); // 输出结果,raw方法返回所有keyvalue数组 for (KeyValue rowKV : result.raw()) { System.out.print("行名:" + new String(rowKV.getRow()) + " "); System.out.print("时间戳:" + rowKV.getTimestamp() + " "); System.out.print("列族名:" + new String(rowKV.getFamily()) + " "); System.out.print("列名:" + new String(rowKV.getQualifier()) + " "); System.out.println("值:" + new String(rowKV.getValue())); } } // 获取所有数据 public void getAllRows(String tableName) throws Exception { Scan scan = new Scan(); ResultScanner results = table.getScanner(scan); // 输出结果 for (Result result : results) { for (KeyValue rowKV : result.raw()) { System.out.print("行名:" + new String(rowKV.getRow()) + " "); System.out.print("时间戳:" + rowKV.getTimestamp() + " "); System.out.print("列族名:" + new String(rowKV.getFamily()) + " "); System.out .print("列名:" + new String(rowKV.getQualifier()) + " "); System.out.println("值:" + new String(rowKV.getValue())); } } } // 主函数 public static void main(String[] args) { HBaseDBDao hb = new HBaseDBDao(); try { String tableName = "student"; hb.initHTable(tableName); // 第一步:创建数据库表:“student” String[] columnFamilys = { "info", "course" }; if(!hb.isExist(tableName)) hb.createTable(tableName, columnFamilys); // 第二步:向数据表的添加数据 // 添加第一行数据 if (hb.isExist(tableName)) { hb.addRow(tableName, "zpc", "info", "age", "20"); hb.addRow(tableName, "zpc", "info", "sex", "boy"); hb.addRow(tableName, "zpc", "course", "china", "97"); hb.addRow(tableName, "zpc", "course", "math", "128"); hb.addRow(tableName, "zpc", "course", "english", "85"); // 添加第二行数据 hb.addRow(tableName, "henjun", "info", "age", "19"); hb.addRow(tableName, "henjun", "info", "sex", "boy"); hb.addRow(tableName, "henjun", "course", "china","90"); hb.addRow(tableName, "henjun", "course", "math","120"); hb.addRow(tableName, "henjun", "course", "english","90"); // 添加第三行数据 hb.addRow(tableName, "niaopeng", "info", "age", "18"); hb.addRow(tableName, "niaopeng", "info", "sex","girl"); hb.addRow(tableName, "niaopeng", "course", "china","100"); hb.addRow(tableName, "niaopeng", "course", "math","100"); hb.addRow(tableName, "niaopeng", "course", "english","99"); // 第三步:获取一条数据 System.out.println("**************获取一条(zpc)数据*************"); hb.getRow(tableName, "zpc"); // 第四步:获取所有数据 System.out.println("**************获取所有数据***************"); hb.getAllRows(tableName); // 第五步:删除一条数据 System.out.println("************删除一条(zpc)数据************"); HBaseDBDao.delRow(tableName, "zpc"); hb.getAllRows(tableName); // 第六步:删除多条数据 System.out.println("**************删除多条数据***************"); String rows[] = new String[] { "henjun","niaopeng" }; hb.delMultiRows(tableName, rows); hb.getAllRows(tableName); // 第七步:删除数据库 System.out.println("***************删除数据库表**************"); hb.deleteTable(tableName); System.out.println("表"+tableName+"存在吗?"+hb.isExist(tableName)); } else { System.out.println(tableName + "此数据库表不存在!"); } } catch (Exception e) { e.printStackTrace(); } } }
另一种采用工厂的操作方式如下:
import java.io.BufferedWriter; import java.io.File; import java.io.FileWriter; import java.io.IOException; import java.io.Serializable; import java.util.ArrayList; import java.util.List; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.CellUtil; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.HColumnDescriptor; import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.MasterNotRunningException; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.ZooKeeperConnectionException; import org.apache.hadoop.hbase.client.Connection; import org.apache.hadoop.hbase.client.ConnectionFactory; import org.apache.hadoop.hbase.client.Delete; import org.apache.hadoop.hbase.client.Get; import org.apache.hadoop.hbase.client.HBaseAdmin; import org.apache.hadoop.hbase.client.HTable; import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.client.ResultScanner; import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.filter.CompareFilter; import org.apache.hadoop.hbase.filter.Filter; import org.apache.hadoop.hbase.filter.PrefixFilter; import org.apache.hadoop.hbase.filter.RegexStringComparator; import org.apache.hadoop.hbase.filter.RowFilter; import org.apache.hadoop.hbase.io.compress.Compression.Algorithm; import org.apache.hadoop.hbase.util.Bytes; public class HBaseDBDao implements Serializable { /** * */ private static final long serialVersionUID = 3112265674603751415L; // 声明静态配置 private static Configuration conf = null; private static HBaseAdmin hAdmin; //private static HTable table; private static Connection conn ; private static HTable table; public HBaseDBDao() { conf = HBaseConfiguration.create(); conf.set("hbase.zookeeper.quorum", "192.168.1.154"); conf.set("hbase.zookeeper.property.clientPort", "2181"); conf.set("hbase.master", "192.168.1.154:60000"); try { // 初始化数据库管理员 //hAdmin = new HBaseAdmin(conf); conn = ConnectionFactory.createConnection(conf); hAdmin = (HBaseAdmin) conn.getAdmin(); } catch (MasterNotRunningException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (ZooKeeperConnectionException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } public void initHTable(String tableName) throws IOException { table = (HTable) conn.getTable(TableName.valueOf(tableName)); //table = new HTable(conf, Bytes.toBytes(tableName)); } // 关闭自动提交 public void closeAutoFlush() { table.setAutoFlush(false, false); } //关闭表格 public void closeTable() throws IOException{ table.close(); System.out.println("成功关闭表格!"); } //关闭connection public void closeConnection() throws IOException{ conn.close(); System.out.println("成功关闭链接!"); } // 判断表是否存在 public boolean isExist(String tableName) throws IOException { return hAdmin.tableExists(tableName); } // 创建数据库表 public void createTable(String tableName, String[] columnFamilys) throws Exception { if (hAdmin.tableExists(tableName)) { System.out.println("表 " + tableName + " 已存在!"); // System.exit(0); } else { // 新建一个表的描述 HTableDescriptor tableDesc = new HTableDescriptor(TableName.valueOf(tableName)); // 在描述里添加列族 for (String columnFamily : columnFamilys) { // 新建一个列的描述 HColumnDescriptor hcd = new HColumnDescriptor(columnFamily); // 在列描述中设置Compression压缩格式 // hcd.setCompressionType(Algorithm.LZO); tableDesc.addFamily(hcd); } // 根据配置好的描述建表 hAdmin.createTable(tableDesc); System.out.println("创建表 " + tableName + " 成功!"); } }
View Code
工厂方式的改进操作如下:
import java.io.BufferedWriter; import java.io.File; import java.io.FileWriter; import java.io.IOException; import java.io.Serializable; import java.util.ArrayList; import java.util.List; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.CellUtil; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.HColumnDescriptor; import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.MasterNotRunningException; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.ZooKeeperConnectionException; import org.apache.hadoop.hbase.client.Connection; import org.apache.hadoop.hbase.client.ConnectionFactory; import org.apache.hadoop.hbase.client.Delete; import org.apache.hadoop.hbase.client.Get; import org.apache.hadoop.hbase.client.HBaseAdmin; import org.apache.hadoop.hbase.client.HTable; import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.client.ResultScanner; import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.filter.CompareFilter; import org.apache.hadoop.hbase.filter.Filter; import org.apache.hadoop.hbase.filter.PrefixFilter; import org.apache.hadoop.hbase.filter.RegexStringComparator; import org.apache.hadoop.hbase.filter.RowFilter; import org.apache.hadoop.hbase.io.compress.Compression.Algorithm; import org.apache.hadoop.hbase.util.Bytes; public class HBaseDBDao implements Serializable { private static final long serialVersionUID = -3338957140027388957L; // 声明静态配置 private static Configuration conf = null; private static HBaseAdmin hAdmin; //private static HTable table; private static Connection conn ; // private static HTable table; public HBaseDBDao() { conf = HBaseConfiguration.create(); conf.set("hbase.zookeeper.quorum", "192.168.1.154"); conf.set("hbase.zookeeper.property.clientPort", "2181"); conf.set("hbase.master", "192.168.1.154:60000"); try { // 初始化数据库管理员 //hAdmin = new HBaseAdmin(conf); conn = ConnectionFactory.createConnection(conf); hAdmin = (HBaseAdmin) conn.getAdmin(); } catch (MasterNotRunningException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (ZooKeeperConnectionException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } public HTable initHTable(String tableName) throws IOException { HTable table = (HTable) conn.getTable(TableName.valueOf(tableName)); //table = new HTable(conf, Bytes.toBytes(tableName)); return table; } // 关闭自动提交 public void closeAutoFlush(HTable table) { table.setAutoFlush(false, false); } //关闭表格 public void closeTable(HTable table) throws IOException{ table.close(); System.out.println("成功关闭表格!"); } //关闭connection public void closeConnection() throws IOException{ conn.close(); System.out.println("成功关闭链接!"); } // 判断表是否存在 public boolean isExist(String tableName) throws IOException { return hAdmin.tableExists(tableName); } // 创建数据库表 public void createTable(String tableName, String[] columnFamilys) throws Exception { if (hAdmin.tableExists(tableName)) { System.out.println("表 " + tableName + " 已存在!"); // System.exit(0); } else { // 新建一个表的描述 HTableDescriptor tableDesc = new HTableDescriptor(TableName.valueOf(tableName)); // 在描述里添加列族 for (String columnFamily : columnFamilys) { // 新建一个列的描述 HColumnDescriptor hcd = new HColumnDescriptor(columnFamily); // 在列描述中设置Compression压缩格式 // hcd.setCompressionType(Algorithm.GZ); tableDesc.addFamily(hcd); } // 根据配置好的描述建表 hAdmin.createTable(tableDesc); System.out.println("创建表 " + tableName + " 成功!"); } } // 删除数据库表 public void deleteTable(String tableName) throws Exception { if (hAdmin.tableExists(tableName)) { // 关闭一个表 hAdmin.disableTable(tableName); hAdmin.deleteTable(tableName); System.out.println("删除表 " + tableName + " 成功!"); } else { System.out.println("删除的表 " + tableName + " 不存在!"); System.exit(0); } } // 批量添加数据 public void addRowBatch(HTable table, String row, String columnFamily, String column, String value) throws Exception { // 将自动提交关闭,如果不关闭,每写一条数据都会进行提交,是导入数据较慢的主要因素 //table.setAutoFlush(false, false); // 设置缓存大小,当缓存大于设置值时,hbase会自动提交。此处可自己尝试大小,一般对大数据量,设置为5M即可,本文设置为3M。 table.setWriteBufferSize(5 * 1024 * 1024); Put put = new Put(Bytes.toBytes(row));// 指定行 // 参数分别:列族、列、值 put.addColumn(Bytes.toBytes(columnFamily), Bytes.toBytes(column), Bytes.toBytes(value)); table.put(put); } // 执行flushCommits()操作,防止最后一次数据丢失(尤其是在spark中一个分片结束时) public void flushCommits(HTable table) throws Exception { table.flushCommits(); } // 添加一条数据 public void addRow(HTable table, String row, String columnFamily, String column, String value) throws Exception { Put put = new Put(Bytes.toBytes(row));// 指定行 // 参数分别:列族、列、值 put.addColumn(Bytes.toBytes(columnFamily), Bytes.toBytes(column), Bytes.toBytes(value)); table.put(put); } // 删除一条(行)数据 public static void delRow(HTable table, String row) throws Exception { Delete del = new Delete(Bytes.toBytes(row)); table.delete(del); } // 删除多条数据 public void delMultiRows(HTable table, String[] rows) throws Exception { List<Delete> delList = new ArrayList<Delete>(); for (String row : rows) { Delete del = new Delete(Bytes.toBytes(row)); delList.add(del); } table.delete(delList); } // 获取一条数据的详细信息 public void getRow(HTable table, String row) throws Exception { Get get = new Get(Bytes.toBytes(row)); Result result = table.get(get); // 输出结果,raw方法返回所有keyvalue数组 for (Cell cell : result.rawCells()) { System.out.print("行名:" + new String(CellUtil.cloneRow(cell)) + " "); System.out.print("时间戳:" + cell.getTimestamp() + " "); System.out.print("列族名:" + new String(CellUtil.cloneFamily(cell)) + " "); System.out.print("列名:" + new String(CellUtil.cloneQualifier(cell))+ " "); System.out.println("值:" + new String(CellUtil.cloneValue(cell))); } } //获取一条数据的值 public void getRowValue(HTable table,String row) throws Exception{ Get geter = new Get(Bytes.toBytes(row)); Result result = table.get(geter); for(Cell cell : result.rawCells()){ System.out.print(new String(CellUtil.cloneQualifier(cell))+ " "); System.out.println(new String(CellUtil.cloneValue(cell))); } } // 获取所有数据的详细信息 public void getAllRows(HTable table) throws Exception { Scan scan = new Scan(); ResultScanner results = table.getScanner(scan); // 输出结果 for (Result result : results) { for (Cell cell : result.rawCells()) { System.out.print("行名:" + new String(CellUtil.cloneRow(cell))+ " "); System.out.print("时间戳:" + cell.getTimestamp() + " "); System.out.print("列族名:"+ new String(CellUtil.cloneFamily(cell)) + " "); System.out.print("列名:" + new String(CellUtil.cloneQualifier(cell)) + " "); System.out.println("值:" + new String(CellUtil.cloneValue(cell))); } } } // 获取所有数据的值 public void getAllRowsValue(HTable table) throws Exception { int rownum = 0; Scan scan = new Scan(); ResultScanner results = table.getScanner(scan); File file = new File("/usr/local/myjar/txt-for-project/hbase.txt"); FileWriter filewriter = new FileWriter(file); BufferedWriter bfw = new BufferedWriter(filewriter); // 输出结果 for (Result result : results) { String str = new String(result.getRow())+" "; rownum++; System.out.print(new String(result.getRow())+" "); for (Cell cell : result.rawCells()) { str = str + cell.getTimestamp()+" "; System.out.print(new String(CellUtil.cloneQualifier(cell)) + " "); System.out.print(new String(CellUtil.cloneValue(cell))+" "); } bfw.write(str+"\n"); System.out.println(); } System.out.println("条目数量"+rownum); bfw.close(); } // 获取所有数据的值 加上filter public void getAllRowsValueWithFilter(HTable table) throws Exception { int rownum = 0; //String prefix = "144860945905310140"; //Scan scan = new Scan(prefix.getBytes()); //scan.setFilter(new PrefixFilter(prefix.getBytes())); Filter myFilter = new RowFilter(CompareFilter.CompareOp.EQUAL,new RegexStringComparator(".*1449453890982")); Scan scan = new Scan(); scan.setFilter(myFilter); ResultScanner results = table.getScanner(scan); File file = new File("/usr/local/myjar/txt-for-project/hbase.txt"); FileWriter filewriter = new FileWriter(file); BufferedWriter bfw = new BufferedWriter(filewriter); // 输出结果 for (Result result : results) { String str = new String(result.getRow()) + " "; rownum++; System.out.print(new String(result.getRow()) + " "); for (Cell cell : result.rawCells()) { str = str + cell.getTimestamp() + " "; System.out.print(new String(CellUtil.cloneQualifier(cell)) + " "); System.out.print(new String(CellUtil.cloneValue(cell)) + " "); } bfw.write(str + "\n"); System.out.println(); } System.out.println("条目数量" + rownum); bfw.close(); } // 主函数 public static void main(String[] args) { HBaseDBDao hb = new HBaseDBDao(); try { String tableName = "student"; // hb.deleteTable(tableName); // 第一步:创建数据库表:“student” String[] columnFamilys = { "info", "course" }; if(!hb.isExist(tableName)) hb.createTable(tableName, columnFamilys); HTable table = hb.initHTable(tableName); // 第二步:向数据表的添加数据 // 添加第一行数据 if (hb.isExist(tableName)) { hb.addRow(table, "zpc", "info", "age", "20"); hb.addRow(table, "zpc", "info", "sex", "boy"); hb.addRow(table, "zpc", "course", "china", "97"); hb.addRow(table, "zpc", "course", "math", "128"); hb.addRow(table, "zpc", "course", "english", "85"); // 添加第二行数据 hb.addRow(table, "henjun", "info", "age", "19"); hb.addRow(table, "henjun", "info", "sex", "boy"); hb.addRow(table, "henjun", "course", "china","90"); hb.addRow(table, "henjun", "course", "math","120"); hb.addRow(table, "henjun", "course", "english","90"); // 添加第三行数据 hb.addRow(table, "niaopeng", "info", "age", "18"); hb.addRow(table, "niaopeng", "info", "sex","girl"); hb.addRow(table, "niaopeng", "course", "china","100"); hb.addRow(table, "niaopeng", "course", "math","100"); hb.addRow(table, "niaopeng", "course", "english","99"); // 第三步:获取一条数据 System.out.println("**************获取一条(zpc)数据*************"); hb.getRow(table, "zpc"); // 第四步:获取所有数据 System.out.println("**************获取所有数据***************"); hb.getAllRows(table); // 第五步:删除一条数据 System.out.println("************删除一条(zpc)数据************"); HBaseDBDao.delRow(table, "zpc"); hb.getAllRows(table); // 第六步:删除多条数据 System.out.println("**************删除多条数据***************"); String rows[] = new String[] { "henjun","niaopeng" }; hb.delMultiRows(table, rows); hb.getAllRows(table); // 第七步:删除数据库 System.out.println("***************删除数据库表**************"); hb.deleteTable(tableName); System.out.println("表"+tableName+"存在吗?"+hb.isExist(tableName)); } else { System.out.println(tableName + "此数据库表不存在!"); } } catch (Exception e) { e.printStackTrace(); } } }
注:上面的工厂模式低版本HBase无法使用。
采用线程池的操作方式如下:
package com.defcons; import java.io.BufferedWriter; import java.io.File; import java.io.FileWriter; import java.io.IOException; import java.io.Serializable; import java.util.ArrayList; import java.util.List; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.CellUtil; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.HColumnDescriptor; import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.MasterNotRunningException; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.ZooKeeperConnectionException; import org.apache.hadoop.hbase.client.Delete; import org.apache.hadoop.hbase.client.Get; import org.apache.hadoop.hbase.client.HBaseAdmin; import org.apache.hadoop.hbase.client.HTable; import org.apache.hadoop.hbase.client.HTableInterface; import org.apache.hadoop.hbase.client.HTablePool; import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.client.ResultScanner; import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.filter.CompareFilter; import org.apache.hadoop.hbase.filter.Filter; import org.apache.hadoop.hbase.filter.PrefixFilter; import org.apache.hadoop.hbase.filter.RegexStringComparator; import org.apache.hadoop.hbase.filter.RowFilter; import org.apache.hadoop.hbase.io.compress.Compression.Algorithm; import org.apache.hadoop.hbase.util.Bytes; public class HBaseDBDao implements Serializable { private static final long serialVersionUID = -3338957140027388957L; // 声明静态配置 private static Configuration conf = null; private static HBaseAdmin hAdmin; //private static HTable table; //采用线程池 private static HTablePool pool ; // private static HTable table; public HBaseDBDao() { conf = HBaseConfiguration.create(); conf.set("hbase.zookeeper.quorum", "192.168.1.154"); conf.set("hbase.zookeeper.property.clientPort", "2181"); conf.set("hbase.master", "192.168.1.154:60000"); try { // 初始化数据库管理员 hAdmin = new HBaseAdmin(conf); pool = new HTablePool(conf, 5); //创建可容纳5个线程的线程池 } catch (MasterNotRunningException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (ZooKeeperConnectionException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } public HTableInterface initHTable(String tableName) throws IOException { HTableInterface table = pool.getTable(tableName); //从线程池中获取一个线程 //table = new HTable(conf, Bytes.toBytes(tableName)); return table; } // 关闭自动提交 public void closeAutoFlush(HTableInterface table) { table.setAutoFlush(false, false); } //关闭表格 public void closeTable(HTableInterface table) throws IOException{ table.close(); System.out.println("成功关闭表格!"); } //关闭connection public void closeConnection() throws IOException{ pool.close(); //关闭线程池 System.out.println("成功关闭链接!"); } // 判断表是否存在 public boolean isExist(String tableName) throws IOException { return hAdmin.tableExists(tableName); } // 创建数据库表 public void createTable(String tableName, String[] columnFamilys) throws Exception { if (hAdmin.tableExists(tableName)) { System.out.println("表 " + tableName + " 已存在!"); // System.exit(0); } else { // 新建一个表的描述 HTableDescriptor tableDesc = new HTableDescriptor(TableName.valueOf(tableName)); // 在描述里添加列族 for (String columnFamily : columnFamilys) { // 新建一个列的描述 HColumnDescriptor hcd = new HColumnDescriptor(columnFamily); // 在列描述中设置Compression压缩格式 // hcd.setCompressionType(Algorithm.GZ); tableDesc.addFamily(hcd); } // 根据配置好的描述建表 hAdmin.createTable(tableDesc); System.out.println("创建表 " + tableName + " 成功!"); } } // 删除数据库表 public void deleteTable(String tableName) throws Exception { if (hAdmin.tableExists(tableName)) { // 关闭一个表 hAdmin.disableTable(tableName); hAdmin.deleteTable(tableName); System.out.println("删除表 " + tableName + " 成功!"); } else { System.out.println("删除的表 " + tableName + " 不存在!"); System.exit(0); } } // 批量添加数据 public void addRowBatch(HTableInterface table, String row, String columnFamily, String column, String value) throws Exception { // 将自动提交关闭,如果不关闭,每写一条数据都会进行提交,是导入数据较慢的主要因素 //table.setAutoFlush(false, false); // 设置缓存大小,当缓存大于设置值时,hbase会自动提交。此处可自己尝试大小,一般对大数据量,设置为5M即可,本文设置为3M。 table.setWriteBufferSize(5 * 1024 * 1024); Put put = new Put(Bytes.toBytes(row));// 指定行 // 参数分别:列族、列、值 put.add(Bytes.toBytes(columnFamily), Bytes.toBytes(column), Bytes.toBytes(value)); table.put(put); } // 执行flushCommits()操作,防止最后一次数据丢失(尤其是在spark中一个分片结束时) public void flushCommits(HTableInterface table) throws Exception { table.flushCommits(); } // 添加一条数据 public void addRow(HTableInterface table, String row, String columnFamily, String column, String value) throws Exception { Put put = new Put(Bytes.toBytes(row));// 指定行 // 参数分别:列族、列、值 put.add(Bytes.toBytes(columnFamily), Bytes.toBytes(column), Bytes.toBytes(value)); table.put(put); } // 删除一条(行)数据 public static void delRow(HTableInterface table, String row) throws Exception { Delete del = new Delete(Bytes.toBytes(row)); table.delete(del); } // 删除多条数据 public void delMultiRows(HTableInterface table, String[] rows) throws Exception { List<Delete> delList = new ArrayList<Delete>(); for (String row : rows) { Delete del = new Delete(Bytes.toBytes(row)); delList.add(del); } table.delete(delList); } // 获取一条数据的详细信息 public void getRow(HTableInterface table, String row) throws Exception { Get get = new Get(Bytes.toBytes(row)); Result result = table.get(get); // 输出结果,raw方法返回所有keyvalue数组 for (Cell cell : result.rawCells()) { System.out.print("行名:" + new String(CellUtil.cloneRow(cell)) + " "); System.out.print("时间戳:" + cell.getTimestamp() + " "); System.out.print("列族名:" + new String(CellUtil.cloneFamily(cell)) + " "); System.out.print("列名:" + new String(CellUtil.cloneQualifier(cell))+ " "); System.out.println("值:" + new String(CellUtil.cloneValue(cell))); } } //获取一条数据的值 public void getRowValue(HTableInterface table,String row) throws Exception{ Get geter = new Get(Bytes.toBytes(row)); Result result = table.get(geter); for(Cell cell : result.rawCells()){ System.out.print(new String(CellUtil.cloneQualifier(cell))+ " "); System.out.println(new String(CellUtil.cloneValue(cell))); } } // 获取所有数据的详细信息 public void getAllRows(HTableInterface table) throws Exception { Scan scan = new Scan(); ResultScanner results = table.getScanner(scan); // 输出结果 for (Result result : results) { for (Cell cell : result.rawCells()) { System.out.print("行名:" + new String(CellUtil.cloneRow(cell))+ " "); System.out.print("时间戳:" + cell.getTimestamp() + " "); System.out.print("列族名:"+ new String(CellUtil.cloneFamily(cell)) + " "); System.out.print("列名:" + new String(CellUtil.cloneQualifier(cell)) + " "); System.out.println("值:" + new String(CellUtil.cloneValue(cell))); } } } // 获取所有数据的值 public void getAllRowsValue(HTableInterface table) throws Exception { int rownum = 0; Scan scan = new Scan(); ResultScanner results = table.getScanner(scan); File file = new File("/usr/local/myjar/txt-for-project/hbase.txt"); FileWriter filewriter = new FileWriter(file); BufferedWriter bfw = new BufferedWriter(filewriter); // 输出结果 for (Result result : results) { String str = new String(result.getRow())+" "; rownum++; System.out.print(new String(result.getRow())+" "); for (Cell cell : result.rawCells()) { str = str + cell.getTimestamp()+" "; System.out.print(new String(CellUtil.cloneQualifier(cell)) + " "); System.out.print(new String(CellUtil.cloneValue(cell))+" "); } bfw.write(str+"\n"); System.out.println(); } System.out.println("条目数量"+rownum); bfw.close(); } // 获取所有数据的值 加上filter public void getAllRowsValueWithFilter(HTableInterface table) throws Exception { int rownum = 0; //String prefix = "144860945905310140"; //Scan scan = new Scan(prefix.getBytes()); //scan.setFilter(new PrefixFilter(prefix.getBytes())); Filter myFilter = new RowFilter(CompareFilter.CompareOp.EQUAL,new RegexStringComparator(".*1449453890982")); Scan scan = new Scan(); scan.setFilter(myFilter); ResultScanner results = table.getScanner(scan); File file = new File("/usr/local/myjar/txt-for-project/hbase.txt"); FileWriter filewriter = new FileWriter(file); BufferedWriter bfw = new BufferedWriter(filewriter); // 输出结果 for (Result result : results) { String str = new String(result.getRow()) + " "; rownum++; System.out.print(new String(result.getRow()) + " "); for (Cell cell : result.rawCells()) { str = str + cell.getTimestamp() + " "; System.out.print(new String(CellUtil.cloneQualifier(cell)) + " "); System.out.print(new String(CellUtil.cloneValue(cell)) + " "); } bfw.write(str + "\n"); System.out.println(); } System.out.println("条目数量" + rownum); bfw.close(); } }