IDEA代码操作Hbase(1)----程序案例

IDEA代码操作Hbase

IDEA代码操作Hbase

1、操作Hbase的基本流程

package com.shujia;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.client.Table;

import java.io.IOException;

public class Demo1TestAPI {
    public static void main(String[] args) throws IOException {
        //1、创建配置文件，设置HBase的连接地址（ZK的地址）
        Configuration conf = HBaseConfiguration.create();
        //第1个参数在 /usr/local/soft/hbase-1.4.6/conf/hbase-site.xml 中
        //第2个参数需要我们自己指定
        conf.set("hbase.zookeeper.quorum","master:2181,note01:2181,note2:2181");

        //2、建立连接
        Connection conn = ConnectionFactory.createConnection(conf);

        //3、执行操作
        //(1)对表的结构进行操作----getAdmin
        Admin admin = conn.getAdmin();

        //(2)对表的数据进行操作----getTable()
        //参数需要传入TableName类型，需要转一下类型，不能直接传入String类型
        Table test = conn.getTable(TableName.valueOf("test"));

        //4、关闭连接
        conn.close();
    }
}

2、程序案例---建表、查看、删除、加载、获取

package com.shujia;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.*;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.util.Bytes;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;

import java.io.IOException;
import java.util.List;

public class Demo2API {
    Connection conn;

    @Before
    public void init() throws IOException {
        //1、创建配置文件，设置HBase的连接地址（ZK的地址）
        Configuration conf = HBaseConfiguration.create();
        //第1个参数在 hbase-1.4.6/conf/hbase-site.xml 中,第2个参数需要我们自己指定
        conf.set("hbase.zookeeper.quorum", "master:2181,note01:2181,note2:2181");

        //2、建立连接(参数为配置对象的变量)
        conn = ConnectionFactory.createConnection(conf);
    }

    @Test
    //建表
    public void createTable() throws IOException {
        //对表的结构进行操作----getAdmin
        Admin admin = conn.getAdmin();

        //创建表结构
        HTableDescriptor testAPI = new HTableDescriptor(TableName.valueOf("testAPI"));//new一个'表结构'对象，参数为'表名'

        //创建列簇
        HColumnDescriptor cf1 = new HColumnDescriptor("cf1");//new一个‘列簇的属性’对象，参数为'列簇名'
        cf1.setMaxVersions(3);//设置版本号

        //给表testAPI添加列簇
        testAPI.addFamily(cf1);

        //创建表
        admin.createTable(testAPI);

    }

    @Test
    //查看表----list
    public void listTables() throws IOException {
        //对表的结构进行操作----getAdmin
        Admin admin = conn.getAdmin();
        //获取表名
        TableName[] tableNames = admin.listTableNames();
        //遍历表名
        for (TableName tableName : tableNames) {
            //TableName[]类型需要转换为String类型
            System.out.println(tableName.getNameAsString());//test test1 testAPI
        }
    }

    @Test
    //查看表结构----desc
    public void descTableDescriptor() throws IOException {
        //对表的结构进行操作----getAdmin
        Admin admin = conn.getAdmin();

        //将表名String类型转化为TableName类型(因为在下面获取表结构的时候需要传入TableName类型)
        TableName testAPI = TableName.valueOf("testAPI");
        //获取表结构
        HTableDescriptor testAPIDesc = admin.getTableDescriptor(testAPI);

        //获取表结构中的列簇
        HColumnDescriptor[] cfs = testAPIDesc.getColumnFamilies();
        //遍历列簇
        for (HColumnDescriptor cf : cfs) {
            System.out.println(cf.getNameAsString());//获取列簇名  cf1
            System.out.println(cf.getMaxVersions());//获取版本号  3
            System.out.println(cf.getTimeToLive());//获取时间戳  2147483647
        }
    }


    @Test
    // 修改表结构----alter
    // 对testAPI 将cf1的版本设置为5，并且新加一个列簇cf2
    public void AlterTable() throws IOException {
        //对表的结构进行操作----getAdmin
        Admin admin = conn.getAdmin();

        //将表名String类型转化为TableName类型(因为在下面获取表结构的时候需要传入TableName类型)
        TableName testAPI = TableName.valueOf("testAPI");
        //获取表结构
        HTableDescriptor testAPIAlter = admin.getTableDescriptor(testAPI);

        //获取表结构中的列簇
        HColumnDescriptor[] cfs = testAPIAlter.getColumnFamilies();
        //遍历列簇
        for (HColumnDescriptor cf : cfs) {
            //提取列簇名等于cf1的列簇（列簇需要转化为String类型）
            if ("cf1".equals(cf.getNameAsString())) {
                cf.setMaxVersions(5);//将cf1的版本设置为5
            }
        }
        //调用addFamily()方法添加新的列簇，参数需要传入HColumnDescriptor类型
        testAPIAlter.addFamily(new HColumnDescriptor("cf2"));

        //调取修改方法，传入参数，进行修改
        admin.modifyTable(testAPI, testAPIAlter);
    }

    @Test
    // drop
    public void DropTable() throws IOException {
        //对表的结构进行操作----getAdmin
        Admin admin = conn.getAdmin();
        //String类型转化为TableName类型
        TableName tableName = TableName.valueOf("test1");
        //删出表之前判断一下表是否存在
        if (admin.tableExists(tableName)) {
            admin.disableTable(tableName); //表在删除之前需要先禁用表--disable
            admin.deleteTable(tableName);  //删除表
        } else {
            System.out.println("表不存在！");
        }
    }

    @Test
    // put
    public void PutData() throws IOException {
        //获取想要插入数据的表，参数需要传入TableName类型
        //将表名String类型转化为TableName类型
        Table testAPI = conn.getTable(TableName.valueOf("testAPI"));

        //创建Put对象，参数要求传入一个rk的字节类型，需要转型
        Put put = new Put("001".getBytes());
        //插入数据，需要传入字节类型，都需要转型
        put.addColumn("cf1".getBytes(), "name".getBytes(), "张三".getBytes());
        put.addColumn("cf1".getBytes(), "age".getBytes(), "18".getBytes());
        put.addColumn("cf1".getBytes(), "clazz".getBytes(), "文科一班".getBytes());
        put.addColumn("cf1".getBytes(), "clazz".getBytes(), 1, "文科二班".getBytes());//还可以传入一个时间戳ts=1
        //调取put()方法来执行
        testAPI.put(put);
    }

    @Test
    // get
    public void GetData() throws IOException {
        //获取想要查询数据的表，参数需要传入TableName类型
        Table testAPI = conn.getTable(TableName.valueOf("testAPI"));
        //创建Gut对象，参数要求传入一个rk的字节类型，需要转型
        Get get = new Get("001".getBytes());
        //指定版本数量
        get.setMaxVersions(10);
        //调取get()方法来执行
        Result rs = testAPI.get(get);

        // 获取rowkey
        byte[] row = rs.getRow();
        //获取rowkey对应的value值
        byte[] name = rs.getValue("cf1".getBytes(), "name".getBytes());
        byte[] age = rs.getValue("cf1".getBytes(), "age".getBytes());
        byte[] clazz = rs.getValue("cf1".getBytes(), "clazz".getBytes());

        //输出，将byte[]类型转化为String类型
        System.out.println(Bytes.toString(row) + "," + Bytes.toString(name) + "," + Bytes.toString(age) + "," + Bytes.toString(clazz));
    }

    @Test
    // 提取数据的另一种方式----ListCells方法（Hbase提供）
    public void ListCells() throws IOException {
        //获取想要查询数据的表，参数需要传入TableName类型
        Table testAPI = conn.getTable(TableName.valueOf("testAPI"));
        //创建Gut对象，参数要求传入一个rk的字节类型，需要转型
        Get get = new Get("001".getBytes());
        //指定版本数量
        get.setMaxVersions(10);
        //调取get()方法来执行
        Result rs = testAPI.get(get);

        // 获取所有的Cell
        List<Cell> cells = rs.listCells();

        //遍历Cell
        for (Cell cell : cells) {
            //将Cell中的数据提取出来
            byte[] bytes = CellUtil.cloneValue(cell);
            //将byte[]类型转化为String类型
            String value = Bytes.toString(bytes);
            //输出
            System.out.println(value);
        }
    }

    @After
    public void close() throws IOException {
        //关闭连接
        conn.close();
    }
}

3、程序案例----将学生表的数据通过代码加载到表里(1000条数据)

方法一：逐条插入

package com.shujia;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.junit.Before;
import org.junit.Test;

import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;

public class Demo3API {
    Connection conn;

    @Before
    public void init() throws IOException {
        //1、创建配置文件，设置HBase的连接地址（ZK的地址）
        Configuration conf = HBaseConfiguration.create();
        //第1个参数在 hbase-1.4.6/conf/hbase-site.xml 中,第2个参数需要我们自己指定
        conf.set("hbase.zookeeper.quorum", "master:2181,note01:2181,note2:2181");

        //2、建立连接(参数为配置对象的变量)
        conn = ConnectionFactory.createConnection(conf);
    }

    @Test
    /**
     * 创建stu表，增加一个info列簇，将students.txt的1000条数据全部插入
     */
    public void PutStu() throws IOException {
        TableName stu = TableName.valueOf("stu");

        // 创建表
        Admin admin = conn.getAdmin();
        if (!admin.tableExists(stu)) {
            admin.createTable(new HTableDescriptor(stu)//创建表
                    .addFamily(new HColumnDescriptor("info")));//添加列簇
        }

        //读取文件
        FileReader fr = new FileReader("data/students.txt");
        BufferedReader br = new BufferedReader(fr);

        //和表建立连接
        Table stuTable = conn.getTable(stu);

        //提取数据
        String line;
        while ((line = br.readLine()) != null) {
            //切分数据
            String[] split = line.split(",");
            String id = split[0];
            String name = split[1];
            String age = split[2];
            String gender = split[3];
            String clazz = split[4];

            //创建Put对象，参数要求传入一个rk的字节类型，需要转型
            Put put = new Put(id.getBytes());
            //插入数据，需要传入字节类型，都需要转型
            put.addColumn("info".getBytes(), "name".getBytes(), name.getBytes());
            put.addColumn("info".getBytes(), "age".getBytes(), age.getBytes());
            put.addColumn("info".getBytes(), "gender".getBytes(), gender.getBytes());
            put.addColumn("info".getBytes(), "clazz".getBytes(), clazz.getBytes());
            
            //调取put()方法来执行，逐条插入
            //stuTable.put(put);
        }
        br.close();
    }
    
      @After
    public void close() throws IOException {
        //关闭连接
        conn.close();
    }
}

方法二：批量插入

逐条插入，据一共有一千条，相当于put了一千次，效率是非常低的

package com.shujia;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;

import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;

public class Demo3API {
    Connection conn;

    @Before
    public void init() throws IOException {
        //1、创建配置文件，设置HBase的连接地址（ZK的地址）
        Configuration conf = HBaseConfiguration.create();
        //第1个参数在 hbase-1.4.6/conf/hbase-site.xml 中,第2个参数需要我们自己指定
        conf.set("hbase.zookeeper.quorum", "master:2181,note01:2181,note2:2181");

        //2、建立连接(参数为配置对象的变量)
        conn = ConnectionFactory.createConnection(conf);
    }

    @Test
    /**
     * 创建stu表，增加一个info列簇，将students.txt的1000条数据全部插入
     */
    public void PutStu() throws IOException {
        TableName stu = TableName.valueOf("stu");

        // 创建表
        Admin admin = conn.getAdmin();
        if (!admin.tableExists(stu)) {
            admin.createTable(new HTableDescriptor(stu)//创建表
                    .addFamily(new HColumnDescriptor("info")));//添加列簇
        }

        //new一个ArrayList，泛型为Put
        ArrayList<Put> puts = new ArrayList<>();
        int cnt = 0;

        //读取文件
        FileReader fr = new FileReader("data/students.txt");
        BufferedReader br = new BufferedReader(fr);

        //和表建立连接
        Table stuTable = conn.getTable(stu);

        //提取数据
        String line;
        while ((line = br.readLine()) != null) {
            //切分数据
            String[] split = line.split(",");
            String id = split[0];
            String name = split[1];
            String age = split[2];
            String gender = split[3];
            String clazz = split[4];

            //创建Put对象，参数要求传入一个rk的字节类型，需要转型
            Put put = new Put(id.getBytes());
            //插入数据，需要传入字节类型，都需要转型
            put.addColumn("info".getBytes(), "name".getBytes(), name.getBytes());
            put.addColumn("info".getBytes(), "age".getBytes(), age.getBytes());
            put.addColumn("info".getBytes(), "gender".getBytes(), gender.getBytes());
            put.addColumn("info".getBytes(), "clazz".getBytes(), clazz.getBytes());

            // 批量插入（本次设置100条插入一次到表里）
            puts.add(put);//将数据存放在集合里
            cnt += 1;   //存放一条，cnt加1
            if (cnt == 100) {
                stuTable.put(puts);//当cnt=100的时候，将集合里的数据加载到表里
                puts.clear(); //集合是数据加载一次后，然后清空
                cnt = 0;//插入一次后，cnt置零
            }
        }
        // 判断Put的List是否为空
        if (!puts.isEmpty()) {
            stuTable.put(puts);
        }

        br.close();
    }
    
    @Test
    //删除列----delete
    public void DeleteData() throws IOException {
        Table stuTable = conn.getTable(TableName.valueOf("stu"));

        Delete del = new Delete("1500100001".getBytes());
        stuTable.delete(del);
    }
    
     @Test
    // scan获取一批数据
    public void ScanData() throws IOException {
        //scan是获取表的数据，需要获取表
        Table stuTable = conn.getTable(TableName.valueOf("stu"));

        Scan scan = new Scan();
        scan.setLimit(10);//获取10条数据
        scan.withStartRow("1500100008".getBytes());//起始rk
        scan.withStopRow("1500100020".getBytes());//截至rk

        //调取getScanner()来执行
        ResultScanner scanner = stuTable.getScanner(scan);
        //遍历
        for (Result rs : scanner) {
            String id = Bytes.toString(rs.getRow());//获取row
            //获取value值
            String name = Bytes.toString(rs.getValue("info".getBytes(), "name".getBytes()));
            String age = Bytes.toString(rs.getValue("info".getBytes(), "age".getBytes()));
            String gender = Bytes.toString(rs.getValue("info".getBytes(), "gender".getBytes()));
            String clazz = Bytes.toString(rs.getValue("info".getBytes(), "clazz".getBytes()));
            System.out.println(id + "," + name + "," + age + "," + gender + "," + clazz);
        }
    }

    @After
    public void close() throws IOException {
        //关闭连接
        conn.close();
    }
}

posted @ 2022-02-28 21:58 阿伟宝座阅读(1171) 评论(0) 收藏举报

刷新页面返回顶部

阿伟宝座

IDEA代码操作Hbase(1)----程序案例

IDEA代码操作Hbase

1、操作Hbase的基本流程

2、程序案例---建表、查看、删除、加载、获取

3、程序案例----将学生表的数据通过代码加载到表里(1000条数据)

公告