HBase API
package hbase; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.*; import org.apache.hadoop.hbase.client.*; import org.apache.hadoop.hbase.util.Bytes; import org.junit.After; import org.junit.Before; import org.junit.Test; import java.io.*; import java.util.ArrayList; import java.util.List; public class HBaseAPI { //对表结构的修改需要Admin进行操作,获取表数据进行getTable操作 Connection conn; @Before public void init() throws IOException { //创建配置,指定zk集群地址 Configuration conf = HBaseConfiguration.create(); conf.set("hbase.zookeeper.quorum", "master,node1,node2"); //hbase.zookeeper.quorum配置在conf/hbase.site.xml中 //创建连接 conn = ConnectionFactory.createConnection(conf); } @Test public void create() throws IOException { //创建Admin对象 Admin admin = conn.getAdmin(); //做一个判断,表不存在再创建 if (!admin.tableExists(TableName.valueOf("test_api"))){ //创建test_api表,因为是新建所有new HTableDescriptor test_api = new HTableDescriptor(TableName.valueOf("test_api")); //添加列蔟,new新建 HColumnDescriptor cf1 = new HColumnDescriptor("cf1"); //设置TTL cf1.setTimeToLive(10); //设置版本 cf1.setMaxVersions(3); test_api.addFamily(cf1); admin.createTable(test_api); } } @Test public void put() throws IOException { //通过conn连接先获取表 Table test_api = conn.getTable(TableName.valueOf("test_api")); //使用Put对象来新建ROWKEY Put put = new Put("001".getBytes()); //给put中增加需要增加的列蔟 put.addColumn("cf1".getBytes(),"name".getBytes(),"zs".getBytes()); //把整理好的put导入到表中 test_api.put(put); } @Test public void get() throws IOException { //通过conn获取表名的连接 Table test_api = conn.getTable(TableName.valueOf("test_api")); //通过ROWKEY来get Get get = new Get("001".getBytes()); Result rs = test_api.get(get); byte[] value = rs.getValue("cf1".getBytes(), "name".getBytes()); System.out.println(Bytes.toString(value)); } @Test public void alter() throws IOException { Admin admin = conn.getAdmin(); //获取表原有的结构 HTableDescriptor test_api = admin.getTableDescriptor(TableName.valueOf("test_api")); //获取所有列蔟构成的数组 HColumnDescriptor[] columnFamilies = test_api.getColumnFamilies(); for (HColumnDescriptor cf : columnFamilies) { //获取列蔟的名称 String s = cf.getNameAsString(); if ("cf1".equals(s)){ cf.setMaxVersions(5); cf.setTimeToLive(1000000); } }
admin.modifyTable(TableName.valueOf("test_api"),test_api); } @Test public void scan() throws IOException { Table students = conn.getTable(TableName.valueOf("students")); Scan scan = new Scan(); scan.setLimit(10); scan.withStartRow("1500100030".getBytes()); scan.withStopRow("1500100050".getBytes()); //通过表.getScanner方法获取数据 ResultScanner rs = students.getScanner(scan); //因为scan是大量数据,是一行一行存储,所以是以数组存储 for (Result r : rs) { //getValue方法是通过传入列蔟和列名得到value String id = Bytes.toString(r.getRow()); String name = Bytes.toString(r.getValue("info".getBytes(), "name".getBytes())); String age = Bytes.toString(r.getValue("info".getBytes(), "age".getBytes())); String sex = Bytes.toString(r.getValue("info".getBytes(), "sex".getBytes())); String clazz = Bytes.toString(r.getValue("info".getBytes(), "clazz".getBytes())); System.out.println(id+" "+name+" "+age+" "+sex+" "+clazz); } } @Test //用cell单元格获取数据 //适合于每条数据结构不唯一的情况下,直接遍历每条数据包含的所有cell public void scan2() throws IOException { Table students = conn.getTable(TableName.valueOf("students")); Scan scan = new Scan(); scan.setLimit(20); ResultScanner rs = students.getScanner(scan); //每一个result就是一条数据 for (Result r : rs) { //获取rowkey String rowkey = Bytes.toString(r.getRow()); System.out.print(rowkey+" "); //获取单元格数据CellUtil.clone(单元格包括值和版本号) List<Cell> cells = r.listCells(); for (Cell cell : cells) { byte[] bytes = CellUtil.cloneValue(cell); String s1 = Bytes.toString(bytes); //获取列名 String quali = Bytes.toString(CellUtil.cloneQualifier(cell)); if ("age".equals(quali)){ if (Integer.valueOf(s1)>=18){ s1="成年"; } else { s1="未成年"; } } System.out.print(s1+" "); } System.out.println(); } } @Test public void putAll() throws IOException { Admin admin = conn.getAdmin(); if (!admin.tableExists(TableName.valueOf("students"))) { HTableDescriptor students = new HTableDescriptor(TableName.valueOf("students")); HColumnDescriptor info = new HColumnDescriptor("info"); students.addFamily(info); admin.createTable(students); } BufferedReader br = new BufferedReader(new FileReader("C:/Users/19768/Desktop/students.txt")); Table students = conn.getTable(TableName.valueOf("students")); String line=null; //创建数组存储put会加快运行速度 ArrayList<Put> puts = new ArrayList<Put>(); //初始每10个put写入数组 int batchsize=10; int i=0; while ((line=br.readLine())!=null){ String[] split = line.split(","); String id = split[0]; String name = split[1]; String age = split[2]; String sex = split[3]; String clazz = split[4]; Put put = new Put(id.getBytes()); put.addColumn("info".getBytes(),"name".getBytes(),name.getBytes()); put.addColumn("info".getBytes(),"age".getBytes(),age.getBytes()); put.addColumn("info".getBytes(),"sex".getBytes(),sex.getBytes()); put.addColumn("info".getBytes(),"clazz".getBytes(),clazz .getBytes()); //添加到数组 puts.add(put); i++; if (i==batchsize){ students.put(puts); //将数组变成空 puts=new ArrayList<Put>(); i=0; } } if (i!=0){ students.put(puts); } } @Test public void delete() throws IOException { Admin admin = conn.getAdmin(); if (admin.tableExists(TableName.valueOf("delete"))) { HTableDescriptor delete = new HTableDescriptor(TableName.valueOf("delete")); HColumnDescriptor info = new HColumnDescriptor("info"); admin.createTable(delete); } if(admin.tableExists(TableName.valueOf("delete"))){ admin.disableTable(TableName.valueOf("delete")); admin.deleteTable(TableName.valueOf("delete")); } } @After public void close() throws IOException { conn.close(); } }
练习:
导入电信数据,用数组导入
将开始时间设置为版本号,以显示三个版本号来输出经度和纬度
package hbase; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.*; import org.apache.hadoop.hbase.client.*; import org.apache.hadoop.hbase.util.Bytes; import org.junit.After; import org.junit.Before; import org.junit.Test; import java.io.BufferedReader; import java.io.FileReader; import java.io.IOException; import java.util.ArrayList; public class HbaseDX { Connection conn; Admin admin; Table dx; @Before public void init() throws IOException { Configuration conf = HBaseConfiguration.create(); conf.set("hbase.zookeeper.quorum", "master,node1,node2"); conn = ConnectionFactory.createConnection(conf); admin = conn.getAdmin(); dx = conn.getTable(TableName.valueOf("DIANXIN")); } @Test public void create() throws IOException { HTableDescriptor dianxin = new HTableDescriptor(TableName.valueOf("DIANXIN")); HColumnDescriptor cf1 = new HColumnDescriptor("cf1"); cf1.setMaxVersions(3); dianxin.addFamily(cf1); admin.createTable(dianxin); } @Test public void putAll() throws IOException { BufferedReader br = new BufferedReader(new FileReader("D:/ALanzhishujia/soft/data/DIANXIN.csv")); ArrayList<Put> puts = new ArrayList<Put>(); String line=null; while ((line=br.readLine())!=null){ String[] split = line.split(","); String adm=split[0]; String time = split[1]; String jindu = split[4]; String weidu = split[5]; Put put = new Put(adm.getBytes()); //将时间作为版本号 long start_time = Long.parseLong(time); //put的时候,同时put上value的时间戳版本 put.addColumn("cf1".getBytes(),"jindu".getBytes(),start_time,jindu.getBytes()); put.addColumn("cf1".getBytes(),"weidu".getBytes(),start_time,weidu.getBytes()); puts.add(put); if (puts.size()==1000){ dx.put(puts); puts=new ArrayList<Put>(); } } if (puts.size()!=0){ dx.put(puts); } } @Test public void getthree() throws IOException { // hbase shell中的命令 // hbase(main):001:0> get 'DIANXIN', // 'FDA42AB4AA84D13C2AC7512D1BAEE93B7DCC327A',{COLUMN=>['cf1:jindu','cf1:weidu'],VERSIONS=>3} String adm="FDA42AB4AA84D13C2AC7512D1BAEE93B7DCC327A"; Get get = new Get(adm.getBytes()); get.setMaxVersions(3); Result rs = dx.get(get); ArrayList<String> jinlist = new ArrayList<String>(); ArrayList<String> weilist = new ArrayList<String>(); for (Cell cell : rs.listCells()) { String quo = Bytes.toString(CellUtil.cloneQualifier(cell)); String values = Bytes.toString(CellUtil.cloneValue(cell)); if ("jindu".equals(quo)){ jinlist.add(values); }else if ("weidu".equals(quo)){ weilist.add(values); } } for (int i = 0; i < 3; i++) { System.out.println(jinlist.get(i)+","+weilist.get(i)); } } @After public void close() throws IOException { conn.close(); } }
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 10年+ .NET Coder 心语,封装的思维:从隐藏、稳定开始理解其本质意义
· .NET Core 中如何实现缓存的预热?
· 从 HTTP 原因短语缺失研究 HTTP/2 和 HTTP/3 的设计差异
· AI与.NET技术实操系列:向量存储与相似性搜索在 .NET 中的实现
· 基于Microsoft.Extensions.AI核心库实现RAG应用
· TypeScript + Deepseek 打造卜卦网站:技术与玄学的结合
· 阿里巴巴 QwQ-32B真的超越了 DeepSeek R-1吗?
· 【译】Visual Studio 中新的强大生产力特性
· 10年+ .NET Coder 心语 ── 封装的思维:从隐藏、稳定开始理解其本质意义
· 【设计模式】告别冗长if-else语句:使用策略模式优化代码结构