HBase Java API

HBase Java API

添加依赖

    <dependencies>
        <dependency>
            <groupId>org.apache.hbase</groupId>
            <artifactId>hbase-client</artifactId>
            <version>1.4.6</version>
        </dependency>
    </dependencies>

基本流程

package com.shujia;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.client.Table;

import java.io.IOException;

public class Demo01TestAPI {
    public static void main(String[] args) throws IOException {
        // 1、创建配置文件,设置HBase的连接地址(ZK的地址)
        // 不知道的话 可以到 /usr/local/soft/hbase-1.4.6/conf/hbase-site.xml 文件中去查看
        Configuration conf = HBaseConfiguration.create();
        conf.set("hbase.zookeeper.quorum", "master:2181,node1:2181,node2:2181");
        // 2、建立连接
        Connection conn = ConnectionFactory.createConnection(conf);

        /**
         *  3、执行操作:
         *  对表的结构进行操作 则getAdmin
         *  对表的数据进行操作 则getTable
         */
        Admin admin = conn.getAdmin();

        Table test = conn.getTable(TableName.valueOf("test"));

        // 4、关闭连接
        conn.close();

    }
}

常用操作示例

package com.shujia;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.*;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.util.Bytes;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;

import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

public class Demo02API {
    Connection conn;

    @Before
    public void init() throws IOException {
        Configuration conf = HBaseConfiguration.create();
        conf.set("hbase.zookeeper.quorum", "master:2181,node1:2181,node2:2181");
        conn = ConnectionFactory.createConnection(conf);
    }


    @Test
    // create table
    
    public void createTable() throws IOException {
        Admin admin = conn.getAdmin();
        // 指定表名
        HTableDescriptor testAPI = new HTableDescriptor(TableName.valueOf("testAPI"));
        // 创建列簇
        HColumnDescriptor cf1 = new HColumnDescriptor("cf1");
        // 对列簇进行配置
        cf1.setMaxVersions(3);
        // 给testAPI表添加一个列簇
        testAPI.addFamily(cf1);

        // 创建testAPI表
        admin.createTable(testAPI);
    }


    @Test
    // list 查看所有表
    
    public void listTables() throws IOException {
        Admin admin = conn.getAdmin();

        TableName[] tableNames = admin.listTableNames();

        for (TableName tableName : tableNames) {
            System.out.println(tableName.getNameAsString());
        }
    }


    @Test
    // desc 查看表结构
    
    public void getTableDescriptor() throws IOException {
        Admin admin = conn.getAdmin();

        HTableDescriptor testAPI = admin.getTableDescriptor(TableName.valueOf("testAPI"));

        HColumnDescriptor[] cfs = testAPI.getColumnFamilies();

        for (HColumnDescriptor cf : cfs) {
            System.out.println(cf.getNameAsString());
            System.out.println(cf.getMaxVersions());
            System.out.println(cf.getTimeToLive());
        }
    }


    @Test
    // alter
    // 对testAPI 将cf1的版本设置为5,并且新加一个列簇cf2
    
    public void AlterTable() throws IOException {
        Admin admin = conn.getAdmin();
        TableName testAPI = TableName.valueOf("testAPI");
        // 在修改表结构之前,先获取原有表的结构
        HTableDescriptor testAPIDesc = admin.getTableDescriptor(testAPI);
        // 获取原有表的列族
        HColumnDescriptor[] cfs = testAPIDesc.getColumnFamilies();
        for (HColumnDescriptor cf : cfs) {
            if ("cf1".equals(cf.getNameAsString())) {
                cf.setMaxVersions(5);
            }
        }
        // 新加一个列簇cf2
        testAPIDesc.addFamily(new HColumnDescriptor("cf2"));
        // 改完之后 将 testAPIDesc 作为第二个参数传出去 以实现表结构的修改
        admin.modifyTable(testAPI, testAPIDesc);
    }


    @Test
    // drop
    
    public void DropTable() throws IOException {
        Admin admin = conn.getAdmin();
        TableName tableName = TableName.valueOf("test1");
        if (admin.tableExists(tableName)) {
            // 表在删除之前需要先disable
            admin.disableTable(tableName);
            admin.deleteTable(tableName);
        } else {
            System.out.println("表不存在!");
        }
    }


    @Test
    // put
    
    public void PutData() throws IOException {
        Table testAPI = conn.getTable(TableName.valueOf("testAPI"));
        // 设置 rowkey
        Put put = new Put("001".getBytes());
        // put 数据的时候 要指定 列族 列名
        put.addColumn("cf1".getBytes(), "name".getBytes(), "张三".getBytes());
        put.addColumn("cf1".getBytes(), "age".getBytes(), "18".getBytes());
        put.addColumn("cf1".getBytes(), "clazz".getBytes(), "文科一班".getBytes());
        put.addColumn("cf1".getBytes(), "clazz".getBytes(), 1, "文科二班".getBytes());

        testAPI.put(put);
    }


    @Test
    // get
    // 获取最新的数据
    
    public void GetData() throws IOException {
        Table testAPI = conn.getTable(TableName.valueOf("testAPI"));
        // 设置 rowkey
        Get get = new Get("001".getBytes());
        // 设置可获取的最大版本数
        get.setMaxVersions(10);

        Result rs = testAPI.get(get);

        // 获取rowkey
        byte[] row = rs.getRow();
        // 获取数据
        byte[] name = rs.getValue("cf1".getBytes(), "name".getBytes());
        byte[] age = rs.getValue("cf1".getBytes(), "age".getBytes());
        byte[] clazz = rs.getValue("cf1".getBytes(), "clazz".getBytes());
        // HBase提供了一个工具给我们 -- Bytes
        System.out.println(Bytes.toString(row) + "," + Bytes.toString(name) + "," + Bytes.toString(age) + "," + Bytes.toString(clazz));
    }


    @Test
    // 提取数据的另一种方式
    // 由HBase提供
    
    public void ListCells() throws IOException {
        Table testAPI = conn.getTable(TableName.valueOf("testAPI"));

        Get get = new Get("001".getBytes());

        get.setMaxVersions(10);

        Result rs = testAPI.get(get);

        // 获取所有的Cell 单元格
        // 获取该rowkey所有的数据
        List<Cell> cells = rs.listCells();

        for (Cell cell : cells) {
            String value = Bytes.toString(CellUtil.cloneValue(cell));
            System.out.println(value);
        }
    }
    

    @Test
    /**
     * 批量插入
     * 创建stu表,增加一个info列簇,将students.txt的1000条数据全部插入
     */
     
    public void PutStu() throws IOException {
        TableName stu = TableName.valueOf("stu");

        // 创建表
        Admin admin = conn.getAdmin();
        if (!admin.tableExists(stu)) {
            admin.createTable(new HTableDescriptor(stu)
                    .addFamily(new HColumnDescriptor("info")));
        }

        Table stuTable = conn.getTable(stu);
        // 批量插入
        ArrayList<Put> puts = new ArrayList<>();

        // 读取文件
        BufferedReader br = new BufferedReader(new FileReader("data/students.txt"));
        
        int cnt = 0;
        
        String line;
        while ((line = br.readLine()) != null) {
            String[] split = line.split(",");
            String id = split[0];
            String name = split[1];
            String age = split[2];
            String gender = split[3];
            String clazz = split[4];

            Put put = new Put(id.getBytes());
            put.addColumn("info".getBytes(), "name".getBytes(), name.getBytes());
            put.addColumn("info".getBytes(), "age".getBytes(), age.getBytes());
            put.addColumn("info".getBytes(), "gender".getBytes(), gender.getBytes());
            put.addColumn("info".getBytes(), "clazz".getBytes(), clazz.getBytes());

            // 批量插入
            // 每次插入100条 假如被插入的数据不是100的倍数,那么最后的几条数据会丢失
            // 所以为了避免这种情况,需要在最后 判断Put的List是否为空
            puts.add(put);
            cnt += 1;
            if (cnt == 100) {
                stuTable.put(puts);
                puts.clear(); // 清空
                cnt = 0;
            }
            // 逐条插入,效率较低
            // stuTable.put(put);
        }

        // 判断Put的List是否为空
        if (!puts.isEmpty()) {
            stuTable.put(puts);
        }
        
        br.close();
    }


    @Test
    // delete
    // 删除也可以批量删除,原理和上面类似,这里不做演示了
    
    public void DeleteData() throws IOException {
        Table stuTable = conn.getTable(TableName.valueOf("stu"));

        Delete del = new Delete("1500100001".getBytes());
        stuTable.delete(del);
    }


    @Test
    // scan
    
    public void ScanData() throws IOException {
        Table stuTable = conn.getTable(TableName.valueOf("stu"));

        Scan scan = new Scan();
        scan.setLimit(10);
        scan.withStartRow("1500100008".getBytes());
        scan.withStopRow("1500100020".getBytes());

        ResultScanner scanner = stuTable.getScanner(scan);
        for (Result rs : scanner) {
            String id = Bytes.toString(rs.getRow());
            String name = Bytes.toString(rs.getValue("info".getBytes(), "name".getBytes()));
            String age = Bytes.toString(rs.getValue("info".getBytes(), "age".getBytes()));
            String gender = Bytes.toString(rs.getValue("info".getBytes(), "gender".getBytes()));
            String clazz = Bytes.toString(rs.getValue("info".getBytes(), "clazz".getBytes()));
            System.out.println(id + "," + name + "," + age + "," + gender + "," + clazz);
        }
    }


    @After
    public void close() throws IOException {
        conn.close();
    }
}

需求示例

获取用户最新的三个位置

数据:

package com.shujia;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.*;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.util.Bytes;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;

import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;

public class Demo03DianXin {
    Connection conn;
    TableName dianXin;

    @Before
    public void init() throws IOException {
        Configuration conf = HBaseConfiguration.create();
        conf.set("hbase.zookeeper.quorum", "master:2181,node1:2181,node2:2181");
        conn = ConnectionFactory.createConnection(conf);
        dianXin = TableName.valueOf("dianXin");
    }

    @Test
    // create table
    public void createTable() throws IOException {
        Admin admin = conn.getAdmin();

        if (!admin.tableExists(dianXin)) {
            admin.createTable(new HTableDescriptor(dianXin)
                    .addFamily(new HColumnDescriptor("cf1")
                            .setMaxVersions(5)));
        } else {
            System.out.println("表已经存在!");
        }
    }

    @Test
    // 将数据写入HBase
    public void putALL() throws IOException {
        Table dx_tb = conn.getTable(dianXin);
        ArrayList<Put> puts = new ArrayList<>();
        int cnt = 0;
        int batchSize = 1000;

        BufferedReader br = new BufferedReader(new FileReader("data/DIANXIN.csv"));
        String line;
        while ((line = br.readLine()) != null) {
            String[] split = line.split(",");
            String mdn = split[0];
            String start_time = split[1];
            // lg -- longitude 经度
            // lat -- latitude 纬度
            String lg = split[4];
            String lat = split[5];

            Put put = new Put(mdn.getBytes());
            put.addColumn("cf1".getBytes(), "lg".getBytes(), Long.parseLong(start_time), lg.getBytes());
            put.addColumn("cf1".getBytes(), "lat".getBytes(), Long.parseLong(start_time), lat.getBytes());

            puts.add(put);
            cnt += 1;

            if (cnt == batchSize) {
                dx_tb.put(puts);
                puts.clear();
                cnt = 0;
            }
        }

        if (!puts.isEmpty()) {
            dx_tb.put(puts);
        }
        
        br.close();
    }

    @Test
    // 根据mdn获取用户最新的3个位置
    public void getPositionByMdn() throws IOException {
        Table dx_tb = conn.getTable(dianXin);

        String mdn = "48049101CE9FC280703582E667DE3F3D947ABD37";

        Get get = new Get(mdn.getBytes());
        get.setMaxVersions(3);

        Result rs = dx_tb.get(get);
        ArrayList<String> lgArr = new ArrayList<>();
        ArrayList<String> latArr = new ArrayList<>();

        for (Cell cell : rs.listCells()) {
            String value = Bytes.toString(CellUtil.cloneValue(cell));
            if ("lg".equals(Bytes.toString(CellUtil.cloneQualifier(cell)))) {
                lgArr.add(value);
            } else if ("lat".equals(Bytes.toString(CellUtil.cloneQualifier(cell)))) {
                latArr.add(value);
            }
        }

        for (int i = 0; i < 3; i++) {
            System.out.println(lgArr.get(i) + "," + latArr.get(i));
        }
    }

    @After
    public void close() throws IOException {
        conn.close();
    }
}
posted @ 2022-02-27 00:03  赤兔胭脂小吕布  阅读(31)  评论(0编辑  收藏  举报