hbase API

导入依赖

<dependencies>
        <dependency>
            <groupId>org.apache.hbase</groupId>
            <artifactId>hbase-server</artifactId>
            <version>2.4.11</version>
            <exclusions>
                <exclusion>
                    <groupId>org.glassfish</groupId>
                    <artifactId>javax.el</artifactId>
                </exclusion>
            </exclusions>
        </dependency>
        <dependency>
            <groupId>org.glassfish</groupId>
            <artifactId>javax.el</artifactId>
            <version>3.0.1-b06</version>
        </dependency>
    </dependencies>

官网api使用教程:https://hbase.apache.org/2.4/apidocs/index.html

创建连接

根据官方api介绍,hbase的客户端连接由ConnectionFactory类来创建,用户使用完成之后需要手动关闭连接.同时连接是一个重量级的,推荐一个进程使用一个连接,对hbase的命令通过连接中的两个属性Admin(元数据)和Table(具体表)来实现

package com.hbase;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.client.AsyncConnection;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;

import java.io.IOException;
import java.util.concurrent.CompletableFuture;

/**
 * ClassName: HBaseConnection
 * Package: com.hbase
 * Description:
 *
 * @Create 2023/9/21 8:19
 * @Version 1.0
 */
public class HBaseConnection00 {
    public static void main(String[] args) throws IOException {


        //创建连接配置对象
        Configuration conf = new Configuration();
        //添加配置参数,(zk的地址端口,hbase的地址),我这里写hbase.zookeeper.quorum意思是我在hbase的配置文件的hbase.zookeeper.quorum标签已经配置过了,直接拿过来用
        conf.set("hbase.zookeeper.quorum","hadoop102,hadoop103,hadoop104");
        //创建连接
        //默认使用同步连接
        //这里报错需要抛异常的原因是希望开发者的conf不是在代码中创建的而是应该从resources里的hbase-site.xml读取
        Connection connection = ConnectionFactory.createConnection(conf);
        //可以使用异步连接
        CompletableFuture<AsyncConnection> asyncConnection = ConnectionFactory.createAsyncConnection(conf);

        //使用连接
        System.out.println(connection);


        //关闭连接
        connection.close();
    }
}

改进使用xml文件而非直接设置conf

#hbase-site.xml

<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
-->
<configuration>
  <!--
    The following properties are set for running HBase as a single process on a
    developer workstation. With this configuration, HBase is running in
    "stand-alone" mode and without a distributed file system. In this mode, and
    without further configuration, HBase and ZooKeeper data are stored on the
    local filesystem, in a path under the value configured for `hbase.tmp.dir`.
    This value is overridden from its default value of `/tmp` because many
    systems clean `/tmp` on a regular basis. Instead, it points to a path within
    this HBase installation directory.

    Running against the `LocalFileSystem`, as opposed to a distributed
    filesystem, runs the risk of data integrity issues and data loss. Normally
    HBase will refuse to run in such an environment. Setting
    `hbase.unsafe.stream.capability.enforce` to `false` overrides this behavior,
    permitting operation. This configuration is for the developer workstation
    only and __should not be used in production!__

    See also https://hbase.apache.org/book.html#standalone_dist
  -->

<!--表示我们要使用自己分布式的Zookeeper,三台zk的地址分别为hadoop102...端口号默认为2181可以省略不填-->
 <property>
 <name>hbase.zookeeper.quorum</name>
 <value>hadoop102,hadoop103,hadoop104</value>
 <description>The directory shared by RegionServers.</description>
 </property>

</configuration>

#修改后创建连接，因为hbase连接时重量级的，不建议搞连接池，用到了就创一个

package com.hbase;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;

import java.io.IOException;


/**
 * ClassName: HBaseConnection01
 * Package: com.hbase
 * Description:
 *
 * @Create 2023/9/21 8:33
 * @Version 1.0
 */
public class HBaseConnection01 {

    //声明一个静态属性
    public static Connection connection = null;

    //在静态代码块里将他实现,以后都使用他,来实现类似单例的感觉
    static {

//        //1.创建连接配置对象
//        Configuration conf = new Configuration();
//
//        //2.添加配置对象
//        conf.set("hbase.zookeeper.quorum","hadoop102,hadoop103,hadoop104");

        //3.创建连接
        //默认使用同步连接
        try {
            //使用读取本地文件的形式来添加参数
            connection = ConnectionFactory.createConnection();
        } catch (IOException e) {
            throw new RuntimeException(e);
        }

    }

    //关闭连接
    public static void closeConnection() throws IOException {
        //考虑程序健壮性,判断是否有连接,有再关闭
        if (connection != null) {
            connection.close();
        }
    }

    public static void main(String[] args) throws IOException {

        //直接使用创建好的连接,不在main线程里面单独创建
        System.out.println(HBaseConnection01.connection);

        //在main线程的最后记得关闭连接
        HBaseConnection01.closeConnection();

    }
}

DDL

package com.hbase;

import org.apache.hadoop.hbase.NamespaceDescriptor;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.util.Bytes;

import java.io.IOException;

/**
 * ClassName: HBaseDDL
 * Package: com.hbase
 * Description:
 *
 * @Create 2023/9/21 8:55
 * @Version 1.0
 */
public class HBaseDDL {

    //我们前面提到HBase的连接是一个重量级的,所以为了减少创建,我们直接调用前面创建好的连接
    //声明一个静态属性,获取我们之前创建的连接
    public static Connection connection = HBaseConnection01.connection;


    /**
     * 创建命名空间
     * @param namespace 命名空间的名称
     */
    public static void createNamespace(String namespace) throws IOException {
        //点击connection查看源码我们发现作者将所有的操作都封装在了table和admin这两个里面,table是操作表dml,admin是操作元数据ddl
        //1.获取admin
        //此处的异常先不要抛出,等待方法写完,再统一进行处理
        //admin的连接是轻量级的,不是线程安全的,不推荐池化或者缓存这个连接
        Admin admin = connection.getAdmin();

        //2.调用方法创建命名空间
        //代码相对shell更加底层,所以shell能够实现的功能,代码一定能实现
        //所以需要填写完整的命名空间描述

        //2.1创建命名空间描述建造者 => 设计师
        NamespaceDescriptor.Builder builder = NamespaceDescriptor.create(namespace);

        //2.2给命名空间添加需求
        builder.addConfiguration("user","sunshine");

        //2.3使用builder构造出对应的添加完参数的对象 完成创建
        //创建命名空间出现的问题,都属于本方法自身的问题,不应该抛出
        try {
            admin.createNamespace(builder.build());
        } catch (IOException e) {
            System.out.println("命名空间已经存在");
            e.printStackTrace();
        }

        //3.关闭admin
        admin.close();
    }


    /**
     * 判断表格是否存在
     * @param namespace 命名空间名称
     * @param tableName 表格名称
     * @return true存在 false不存在
     */
    public static boolean isTableExists(String namespace,String tableName) throws IOException {

        //1.获取Admin
        Admin admin = connection.getAdmin();

        //2.使用方法判断表格是否存在
        boolean b = false;
        try {
            b = admin.tableExists(TableName.valueOf(namespace, tableName));
        } catch (IOException e) {
            e.printStackTrace();
        }


        //关闭admin
        admin.close();


        //3.返回结果
        return b;


    }

    /**
     * 创建表格
     * @param namespace 命名空间名称
     * @param tableName 表格名称
     * @param columnFamilies 列组名称 可以有多个
     */
    public static void createTable(String namespace,String tableName,String... columnFamilies) throws IOException {
        //判断是否至少有一个列族
        if (columnFamilies.length == 0) {
            System.out.println("创建表格至少有一个列族");
            return;
        }

        //判断表格是否存在
        if (isTableExists(namespace,tableName)) {
            System.out.println("表格已经存在");
            return;
        }

        //1.获取admin
        Admin admin = connection.getAdmin();

        //2.调用方法创建表格
        //2.1创建表格描述的建造者
        TableDescriptorBuilder tableDescriptorBuilder =
                TableDescriptorBuilder.newBuilder(TableName.valueOf(namespace, tableName));

        //2.2添加参数
        for (String columnFamily : columnFamilies) {
            //2.3创建列族描述的建造者
            ColumnFamilyDescriptorBuilder columnFamilyDescriptorBuilder =
                    ColumnFamilyDescriptorBuilder.newBuilder(Bytes.toBytes(columnFamily));

            //2.4对应当前的列族添加参数
            //添加版本参数
            columnFamilyDescriptorBuilder.setMaxVersions(5);

            //2.5创建添加完整参数的列族描述
            tableDescriptorBuilder.setColumnFamily(columnFamilyDescriptorBuilder.build());
        }

        //2.3创建对应的表格描述
        try {
            admin.createTable(tableDescriptorBuilder.build());
        } catch (IOException e) {
            System.out.println("表格已经存在");
        }

        //3.关闭admin
        admin.close();
    }

    /**
     * 修改表格中一个列族的版本
     * @param namespace 命名空间
     * @param tableName 表格名称
     * @param columnFamily 列族名称
     * @param version 版本
     */
    public static void modifyTable(String namespace,String tableName,String columnFamily,int version) throws IOException {

        //判断表格是否存在
        if (!isTableExists(namespace,tableName)) {
            System.out.println("表格不存在");
            return;
        }



        //获取admin
        Admin admin = connection.getAdmin();



        try {

            //2.调用方法修改表格
            //2.0获取之前的表格描述
            TableDescriptor descriptor = admin.getDescriptor(TableName.valueOf(namespace, tableName));

            //2.1创建一个表格描述建造者
            //如果使用填写tablename的方法,相当于创建了一个新的表格描述者 没有之前的信息
            //如果想要修改之前的信息  必须调用方法填写一个旧的表格描述信息
            TableDescriptorBuilder tableDescriptorBuilder =
                    TableDescriptorBuilder.newBuilder(descriptor);

            //2.2对应建造者进行表格数据的修改
            ColumnFamilyDescriptor columnFamily1 = descriptor.getColumnFamily(Bytes.toBytes(columnFamily));

            //创建列族描述建造者
            //需要填写旧的列族描述
            ColumnFamilyDescriptorBuilder columnFamilyDescriptorBuilder =
                    ColumnFamilyDescriptorBuilder.newBuilder(columnFamily1);

            //修改对应的版本
            columnFamilyDescriptorBuilder.setMaxVersions(version);

            //此处修改的时候,如果填写的新创建 那么别的参数会初始化
            tableDescriptorBuilder.modifyColumnFamily(columnFamilyDescriptorBuilder.build());



            admin.modifyTable(tableDescriptorBuilder.build());
        } catch (IOException e) {
            throw new RuntimeException(e);
        }

        //关闭admin
        admin.close();
    }

    /**
     * 删除表格
     * @param namespace 命名空间名称
     * @param tableName 表格名称
     * @return true删除成功
     */
    public static boolean deleTable(String namespace,String tableName) throws IOException {
        //1.判断表格是否存在
        if (!isTableExists(namespace,tableName)) {
            System.out.println("表格不存在,无法删除");
            return false;
        }

        //2.获取admin
        Admin admin = connection.getAdmin();

        //3.调用相关的方法删除表格
        try {
            //hbase删除表格之前,一定要先标记表格为不可用
            TableName tableName1 = TableName.valueOf(namespace, tableName);
            admin.disableTable(tableName1);
            admin.deleteTable(tableName1);

        } catch (IOException e) {
            throw new RuntimeException(e);
        }

        //关闭admin
        admin.close();
        return  true;
    }


    public static void main(String[] args) throws IOException {

        //测试创建命名空间
        //应该先保证连接没有问题,再来调用相关的方法
//        createNamespace("sunshine");

        //测试表格是否存在
//        System.out.println(isTableExists("bigdata", "student"));

        //测试创建表格
//        createTable("sunshine","student","info","msg");
//        createTable("sunshine","class");

        //测试修改表格
//        modifyTable("sunshine","student","info",6);

        //测试删除表格
        System.out.println(deleTable("sunshine", "student"));


        //其它代码
        System.out.println("其它代码");

        //关闭
        HBaseConnection01.closeConnection();
    }
}

DML

package com.hbase;

import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.CompareOperator;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.filter.ColumnValueFilter;
import org.apache.hadoop.hbase.filter.CompareFilter;
import org.apache.hadoop.hbase.filter.FilterList;
import org.apache.hadoop.hbase.filter.SingleColumnValueFilter;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.yarn.webapp.hamlet.Hamlet;

import java.io.IOException;
import java.nio.ByteBuffer;

/**
 * ClassName: HBaseDML
 * Package: com.hbase
 * Description:
 *
 * @Create 2023/9/21 12:31
 * @Version 1.0
 */
public class HBaseDML {
    //静态属性
    public static Connection connection = HBaseConnection01.connection;


    /**
     * 插入数据
     * @param namespace 命名空间名称
     * @param tableName 表格名称
     * @param rowKey    主键
     * @param columnFamily  列族名称
     * @param columnName    列名
     * @param value     值
     */
    public static void putCell(String namespace,String tableName,
                               String rowKey,String columnFamily,String columnName,String value)
            throws IOException {
        //暂时后面就不再判断表格是否存在这种事了,仅实现功能

        //1.获取table
        Table table = connection.getTable(TableName.valueOf(namespace, tableName));

        //2.调用相关方法插入数据
        //2.1创建put对象
        Put put = new Put(Bytes.toBytes(rowKey));
        //2.2给put对象添加数据
        put.addColumn(Bytes.toBytes(columnFamily),Bytes.toBytes(columnName),Bytes.toBytes(value));
        //2.3将对象写入相应的方法
        try {
            table.put(put);
        } catch (IOException e) {
            throw new RuntimeException(e);
        }

        //关闭table
        table.close();
    }


    /**
     * 读取数据 读取对应的一行的某一列
     * @param namespace 命名空间
     * @param tableName 表格名称
     * @param rowKey    主键
     * @param columnFamily  列族名称
     * @param columnName    列名
     */
    public static void getCells(String namespace,String tableName,
                                String rowKey,String columnFamily,String columnName)
            throws IOException {
        //1.获取table
        Table table = connection.getTable(TableName.valueOf(namespace, tableName));

        //2.获取get对象
        Get get = new Get(Bytes.toBytes(rowKey));

        //如果直接调用get方法读取数据   此时读一整行数据
        //如果想读取某一列的数据   需要添加对应的参数
        get.addColumn(Bytes.toBytes(columnFamily),Bytes.toBytes(columnName));


        //设置读取数据的版本
        get.readAllVersions();


        try {
            //读取数据 得到result对象
            Result result =  table.get(get);
            //处理数据
            Cell[] cells = result.rawCells();

            //测试方法:直接把读取的数据打印到控制台
            //如果是实际开发   需要再额外写方法    对应处理数据
            for (Cell cell : cells) {
                //cell存储数据比较底层  cell.getValueArray()读出来的数据是乱码
                String value = new String(CellUtil.cloneValue(cell));
                System.out.println(value);
            }
        } catch (IOException e) {
            throw new RuntimeException(e);
        }



        //关闭table
        table.close();
    }


    /**
     * 扫描数据(读取多行数据)
     * @param namespace 命名空间
     * @param tableName 表格名称
     * @param startRow  开始的row(包含)
     * @param stopRow   结束的row(不包含)
     */
    public static void scanRows(String namespace,String tableName,
                                String startRow,String stopRow) throws IOException {
        //1.获取table
        Table table = connection.getTable(TableName.valueOf(namespace, tableName));


        //2.创建scan对象
        Scan scan = new Scan();
        //如果此时直接调用 会直接扫描整张表

        //添加参数 来控制扫描的数据
        //默认包含,可在填写true或false参数来选择是否包含
        scan.withStartRow(Bytes.toBytes(startRow));
        //默认不包含
        scan.withStopRow(Bytes.toBytes(stopRow));

        //读取多行数据    获取scanner

        try {
            ResultScanner scanner = table.getScanner(scan);

            //result来记录一行数据         cell数组
            //ResultScanner来记录多行数据  result数组
            for (Result result : scanner) {
                Cell[] cells = result.rawCells();

                for (Cell cell : cells) {
                    System.out.print(
                            new String(CellUtil.cloneRow(cell))
                                    + "-"
                                    + new String(CellUtil.cloneFamily(cell))
                                    + "-"
                                    + new String(CellUtil.cloneQualifier(cell))
                                    + "-"
                                    +new String(CellUtil.cloneValue(cell))
                                    +"\t");
                }

                System.out.println();
            }


        } catch (IOException e) {
            throw new RuntimeException(e);
        }



        //关闭table
        table.close();

    }


    /**
     * 带过滤的扫描
     * @param namespace 命名空间
     * @param tableName 表名
     * @param startRow  开始row
     * @param stopRow   结束row     * @param columnFamily  列族名
     * @param columnName    列名
     * @param value     值
     */
    public static void filterScan(String namespace,String tableName,String startRow,
                                  String stopRow,String columnFamily,String columnName,
                                  String value) throws IOException {
        //1.获取table
        Table table = connection.getTable(TableName.valueOf(namespace, tableName));


        //2.创建scan对象
        Scan scan = new Scan();
        //如果此时直接调用 会直接扫描整张表

        //添加参数 来控制扫描的数据
        //默认包含,可在填写true或false参数来选择是否包含
        scan.withStartRow(Bytes.toBytes(startRow));
        //默认不包含
        scan.withStopRow(Bytes.toBytes(stopRow));



        try {

            //可以添加多个过滤
            FilterList filterList = new FilterList();

            //创建过滤器
            //(1)结果只保留当前列的数据
            ColumnValueFilter columnValueFilter = new ColumnValueFilter(
                    //列族名称
                    Bytes.toBytes(columnFamily),
                    //列名
                    Bytes.toBytes(columnName),
                    //比较关系
                    CompareOperator.EQUAL,
                    //值
                    Bytes.toBytes(value)
            );

            //(2)结果保留整行数据
            //结果同时会保留没有没有当前列的数据,这个过滤原理是谁不满足移除谁,但没有当前列无法比较就保留下来
            SingleColumnValueFilter singleColumnValueFilter = new SingleColumnValueFilter(
                    //列族名称
                    Bytes.toBytes(columnFamily),
                    //列名
                    Bytes.toBytes(columnName),
                    //比较关系
                    CompareOperator.EQUAL,
                    //值
                    Bytes.toBytes(value)
            );

            //本身可以保留多个过滤器,但数据只会越过滤越少
            filterList.addFilter(singleColumnValueFilter);

            //添加过滤
            scan.setFilter(filterList);

            //读取多行数据    获取scanner




            ResultScanner scanner = table.getScanner(scan);
            //result来记录一行数据         cell数组
            //ResultScanner来记录多行数据  result数组
            for (Result result : scanner) {
                Cell[] cells = result.rawCells();

                for (Cell cell : cells) {
                    System.out.print(
                            new String(CellUtil.cloneRow(cell))
                                    + "-"
                                    + new String(CellUtil.cloneFamily(cell))
                                    + "-"
                                    + new String(CellUtil.cloneQualifier(cell))
                                    + "-"
                                    +new String(CellUtil.cloneValue(cell))
                                    +"\t");
                }

                System.out.println();
            }


        } catch (IOException e) {
            throw new RuntimeException(e);
        }



        //关闭table
        table.close();


    }


    /**
     * 删除某一行的某一列
     * @param namespace 命名空间
     * @param tableName 表名
     * @param rowKey 主键
     * @param columnFamily 列族
     * @param columnName 列名
     */
    public static void deleteColumn(String namespace,String tableName,String rowKey,
                                    String columnFamily,String columnName) throws IOException {
        //1.获取table
        Table table = connection.getTable(TableName.valueOf(namespace, tableName));



        try {
            //2.创建delete对象
            Delete delete = new Delete(Bytes.toBytes(rowKey));

            //添加列信息
            //addColumn删除一个版本
//            delete.addColumn(Bytes.toBytes(columnFamily),Bytes.toBytes(columnName));
            //addColumns删除所有版本
            //按照逻辑需要删除所有版本的数据
            delete.addColumns(Bytes.toBytes(columnFamily),Bytes.toBytes(columnName));
            table.delete(delete);
        } catch (IOException e) {
            throw new RuntimeException(e);
        }

        //3.关闭table
        table.close();

    }

    public static void main(String[] args) throws IOException {

//        //测试添加数据
//        putCell("sunshine","student","2001",
//                "info","name","zhangsan");
//        putCell("sunshine","student","2001",
//                "info","name","lisi");
//        putCell("sunshine","student","2001",
//                "info","name","wangwu");

//        //测试读取数据
//        getCells("sunshine","student",
//                "2001", "info","name");

        //测试读取多行数据
//        scanRows("bigdata","student","1001","1004");

//        //测试有过滤的读取数据
//        filterScan("bigdata","student",
//                "1001","1004",
//                "info","age","18");

        //测试删除数据
        //删除前读取
        getCells("bigdata","student",
                "1002", "info","name");
        System.out.println(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>");
        //删除
        deleteColumn("bigdata","student",
                "1002", "info","name");
        System.out.println(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>");
        //删除后读取
        getCells("bigdata","student",
                "1002", "info","name");


        //其它代码
        System.out.println("其它代码");

        //关闭连接
        HBaseConnection01.closeConnection();
    }
}

github:https://github.com/sunshin1/hbase-demo

posted @ 2023-09-21 14:52 sunshin1 阅读(51) 评论(0) 收藏举报来源

刷新页面返回顶部

sunshie

hbase API

创建连接

DDL

DML

公告