HBase API

package hbase;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.*;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.util.Bytes;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;

import java.io.*;
import java.util.ArrayList;
import java.util.List;

public class HBaseAPI {
    //对表结构的修改需要Admin进行操作,获取表数据进行getTable操作
    Connection conn;

    @Before
    public void init() throws IOException {
        //创建配置，指定zk集群地址
        Configuration conf = HBaseConfiguration.create();
        conf.set("hbase.zookeeper.quorum", "master,node1,node2");  //hbase.zookeeper.quorum配置在conf/hbase.site.xml中
        //创建连接
        conn = ConnectionFactory.createConnection(conf);

    }

    @Test
    public void create() throws IOException {
        //创建Admin对象
        Admin admin = conn.getAdmin();

        //做一个判断，表不存在再创建
        if (!admin.tableExists(TableName.valueOf("test_api"))){
            //创建test_api表,因为是新建所有new
            HTableDescriptor test_api = new HTableDescriptor(TableName.valueOf("test_api"));

            //添加列蔟，new新建
            HColumnDescriptor cf1 = new HColumnDescriptor("cf1");
            //设置TTL
            cf1.setTimeToLive(10);
            //设置版本
            cf1.setMaxVersions(3);

            test_api.addFamily(cf1);
            admin.createTable(test_api);
        }

    }

    @Test
    public void put() throws IOException {
        //通过conn连接先获取表
        Table test_api = conn.getTable(TableName.valueOf("test_api"));
        //使用Put对象来新建ROWKEY
        Put put = new Put("001".getBytes());
        //给put中增加需要增加的列蔟
        put.addColumn("cf1".getBytes(),"name".getBytes(),"zs".getBytes());


        //把整理好的put导入到表中
        test_api.put(put);
    }

    @Test
    public void get() throws IOException {
        //通过conn获取表名的连接
        Table test_api = conn.getTable(TableName.valueOf("test_api"));
        //通过ROWKEY来get
        Get get = new Get("001".getBytes());

        Result rs = test_api.get(get);
        byte[] value = rs.getValue("cf1".getBytes(), "name".getBytes());
        System.out.println(Bytes.toString(value));


    }

    @Test
    public void alter() throws IOException {
        Admin admin = conn.getAdmin();
        //获取表原有的结构
        HTableDescriptor test_api = admin.getTableDescriptor(TableName.valueOf("test_api"));
        //获取所有列蔟构成的数组
        HColumnDescriptor[] columnFamilies = test_api.getColumnFamilies();
        for (HColumnDescriptor cf : columnFamilies) {
            //获取列蔟的名称
            String s = cf.getNameAsString();
            if ("cf1".equals(s)){
                cf.setMaxVersions(5);
                cf.setTimeToLive(1000000);
            }
        }

        admin.modifyTable(TableName.valueOf("test_api"),test_api);
    }

    @Test
    public void scan() throws IOException {
        Table students = conn.getTable(TableName.valueOf("students"));
        Scan scan = new Scan();
        scan.setLimit(10);
        scan.withStartRow("1500100030".getBytes());
        scan.withStopRow("1500100050".getBytes());

        //通过表.getScanner方法获取数据
        ResultScanner rs = students.getScanner(scan);
        //因为scan是大量数据，是一行一行存储，所以是以数组存储
        for (Result r : rs) {
            //getValue方法是通过传入列蔟和列名得到value
            String id = Bytes.toString(r.getRow());
            String name = Bytes.toString(r.getValue("info".getBytes(), "name".getBytes()));
            String age = Bytes.toString(r.getValue("info".getBytes(), "age".getBytes()));
            String sex = Bytes.toString(r.getValue("info".getBytes(), "sex".getBytes()));
            String clazz = Bytes.toString(r.getValue("info".getBytes(), "clazz".getBytes()));
            System.out.println(id+" "+name+" "+age+" "+sex+" "+clazz);

        }

    }

    @Test
    //用cell单元格获取数据
    //适合于每条数据结构不唯一的情况下，直接遍历每条数据包含的所有cell
    public  void scan2() throws IOException {
        Table students = conn.getTable(TableName.valueOf("students"));
        Scan scan = new Scan();
        scan.setLimit(20);
        ResultScanner rs = students.getScanner(scan);

        //每一个result就是一条数据
        for (Result r : rs) {

            //获取rowkey
            String rowkey = Bytes.toString(r.getRow());
            System.out.print(rowkey+" ");


            //获取单元格数据CellUtil.clone(单元格包括值和版本号)
            List<Cell> cells = r.listCells();
            for (Cell cell : cells) {
                byte[] bytes = CellUtil.cloneValue(cell);
                String s1 = Bytes.toString(bytes);

                //获取列名
                String quali = Bytes.toString(CellUtil.cloneQualifier(cell));
                if ("age".equals(quali)){
                    if (Integer.valueOf(s1)>=18){
                        s1="成年";
                    }
                    else {
                        s1="未成年";
                    }
                }
                System.out.print(s1+" ");

            }
            System.out.println();
        }
    }


    @Test
    public void putAll() throws IOException {
        Admin admin = conn.getAdmin();
        if (!admin.tableExists(TableName.valueOf("students"))) {
            HTableDescriptor students = new HTableDescriptor(TableName.valueOf("students"));
            HColumnDescriptor info = new HColumnDescriptor("info");
            students.addFamily(info);
            admin.createTable(students);
        }

        BufferedReader br = new BufferedReader(new FileReader("C:/Users/19768/Desktop/students.txt"));
        Table students = conn.getTable(TableName.valueOf("students"));
        String line=null;
        //创建数组存储put会加快运行速度
        ArrayList<Put> puts = new ArrayList<Put>();
        //初始每10个put写入数组
        int batchsize=10;
        int i=0;

        while ((line=br.readLine())!=null){
            String[] split = line.split(",");
            String id = split[0];
            String name = split[1];
            String age = split[2];
            String sex = split[3];
            String clazz = split[4];
            Put put = new Put(id.getBytes());
            put.addColumn("info".getBytes(),"name".getBytes(),name.getBytes());
            put.addColumn("info".getBytes(),"age".getBytes(),age.getBytes());
            put.addColumn("info".getBytes(),"sex".getBytes(),sex.getBytes());
            put.addColumn("info".getBytes(),"clazz".getBytes(),clazz .getBytes());
            //添加到数组
            puts.add(put);

            i++;
            if (i==batchsize){
                students.put(puts);
                //将数组变成空
                puts=new ArrayList<Put>();
                i=0;
            }

        }
        if (i!=0){
            students.put(puts);
        }

    }

    @Test
    public void delete() throws IOException {

            Admin admin = conn.getAdmin();

            if (admin.tableExists(TableName.valueOf("delete"))) {
                HTableDescriptor delete = new HTableDescriptor(TableName.valueOf("delete"));
                HColumnDescriptor info = new HColumnDescriptor("info");
                admin.createTable(delete);

            }

            if(admin.tableExists(TableName.valueOf("delete"))){
                admin.disableTable(TableName.valueOf("delete"));
                admin.deleteTable(TableName.valueOf("delete"));
            }

    }


    @After
    public void close() throws IOException {
        conn.close();
    }
}

练习：

导入电信数据，用数组导入

将开始时间设置为版本号，以显示三个版本号来输出经度和纬度

package hbase;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.*;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.util.Bytes;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;

import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;

public class HbaseDX {

    Connection conn;
    Admin admin;
    Table dx;

    @Before
    public void init() throws IOException {
        Configuration conf = HBaseConfiguration.create();
        conf.set("hbase.zookeeper.quorum", "master,node1,node2");
        conn = ConnectionFactory.createConnection(conf);
        admin = conn.getAdmin();
        dx = conn.getTable(TableName.valueOf("DIANXIN"));

    }

    @Test
    public void create() throws IOException {
        HTableDescriptor dianxin = new HTableDescriptor(TableName.valueOf("DIANXIN"));
        HColumnDescriptor cf1 = new HColumnDescriptor("cf1");
        cf1.setMaxVersions(3);
        dianxin.addFamily(cf1);
        admin.createTable(dianxin);
    }

    @Test
    public void putAll() throws IOException {
        BufferedReader br = new BufferedReader(new FileReader("D:/ALanzhishujia/soft/data/DIANXIN.csv"));
        ArrayList<Put> puts = new ArrayList<Put>();
        String line=null;
        while ((line=br.readLine())!=null){
            String[] split = line.split(",");
            String adm=split[0];
            String time = split[1];
            String jindu = split[4];
            String weidu = split[5];
            Put put = new Put(adm.getBytes());

            //将时间作为版本号
            long start_time = Long.parseLong(time);

            //put的时候，同时put上value的时间戳版本
            put.addColumn("cf1".getBytes(),"jindu".getBytes(),start_time,jindu.getBytes());
            put.addColumn("cf1".getBytes(),"weidu".getBytes(),start_time,weidu.getBytes());
            puts.add(put);

            if (puts.size()==1000){
                dx.put(puts);
                puts=new ArrayList<Put>();
            }
        }
        if (puts.size()!=0){
            dx.put(puts);
        }


    }

    @Test
    public void getthree() throws IOException {
        // hbase shell中的命令
        // hbase(main):001:0> get 'DIANXIN',
        // 'FDA42AB4AA84D13C2AC7512D1BAEE93B7DCC327A',{COLUMN=>['cf1:jindu','cf1:weidu'],VERSIONS=>3}

        String adm="FDA42AB4AA84D13C2AC7512D1BAEE93B7DCC327A";
        Get get = new Get(adm.getBytes());
        get.setMaxVersions(3);

        Result rs = dx.get(get);
        ArrayList<String> jinlist = new ArrayList<String>();
        ArrayList<String> weilist = new ArrayList<String>();
        for (Cell cell : rs.listCells()) {
            String quo = Bytes.toString(CellUtil.cloneQualifier(cell));
            String values = Bytes.toString(CellUtil.cloneValue(cell));
            if ("jindu".equals(quo)){
                jinlist.add(values);
            }else if ("weidu".equals(quo)){
                weilist.add(values);
            }
        }

        for (int i = 0; i < 3; i++) {
            System.out.println(jinlist.get(i)+","+weilist.get(i));
        }

    }

    @After
    public void close() throws IOException {
        conn.close();
    }
}

posted on 2021-10-11 23:43 糟糟张阅读(193) 评论(0) 收藏举报

刷新页面返回顶部

再多学一点吧

导航

公告

HBase API

练习：