再多学一点吧

导航

< 2025年3月 >
23 24 25 26 27 28 1
2 3 4 5 6 7 8
9 10 11 12 13 14 15
16 17 18 19 20 21 22
23 24 25 26 27 28 29
30 31 1 2 3 4 5

统计

HBase API

复制代码
package hbase;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.*;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.util.Bytes;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;

import java.io.*;
import java.util.ArrayList;
import java.util.List;

public class HBaseAPI {
    //对表结构的修改需要Admin进行操作,获取表数据进行getTable操作
    Connection conn;

    @Before
    public void init() throws IOException {
        //创建配置,指定zk集群地址
        Configuration conf = HBaseConfiguration.create();
        conf.set("hbase.zookeeper.quorum", "master,node1,node2");  //hbase.zookeeper.quorum配置在conf/hbase.site.xml中
        //创建连接
        conn = ConnectionFactory.createConnection(conf);

    }

    @Test
    public void create() throws IOException {
        //创建Admin对象
        Admin admin = conn.getAdmin();

        //做一个判断,表不存在再创建
        if (!admin.tableExists(TableName.valueOf("test_api"))){
            //创建test_api表,因为是新建所有new
            HTableDescriptor test_api = new HTableDescriptor(TableName.valueOf("test_api"));

            //添加列蔟,new新建
            HColumnDescriptor cf1 = new HColumnDescriptor("cf1");
            //设置TTL
            cf1.setTimeToLive(10);
            //设置版本
            cf1.setMaxVersions(3);

            test_api.addFamily(cf1);
            admin.createTable(test_api);
        }

    }

    @Test
    public void put() throws IOException {
        //通过conn连接先获取表
        Table test_api = conn.getTable(TableName.valueOf("test_api"));
        //使用Put对象来新建ROWKEY
        Put put = new Put("001".getBytes());
        //给put中增加需要增加的列蔟
        put.addColumn("cf1".getBytes(),"name".getBytes(),"zs".getBytes());


        //把整理好的put导入到表中
        test_api.put(put);
    }

    @Test
    public void get() throws IOException {
        //通过conn获取表名的连接
        Table test_api = conn.getTable(TableName.valueOf("test_api"));
        //通过ROWKEY来get
        Get get = new Get("001".getBytes());

        Result rs = test_api.get(get);
        byte[] value = rs.getValue("cf1".getBytes(), "name".getBytes());
        System.out.println(Bytes.toString(value));


    }

    @Test
    public void alter() throws IOException {
        Admin admin = conn.getAdmin();
        //获取表原有的结构
        HTableDescriptor test_api = admin.getTableDescriptor(TableName.valueOf("test_api"));
        //获取所有列蔟构成的数组
        HColumnDescriptor[] columnFamilies = test_api.getColumnFamilies();
        for (HColumnDescriptor cf : columnFamilies) {
            //获取列蔟的名称
            String s = cf.getNameAsString();
            if ("cf1".equals(s)){
                cf.setMaxVersions(5);
                cf.setTimeToLive(1000000);
            }
        }
admin.modifyTable(TableName.valueOf(
"test_api"),test_api); } @Test public void scan() throws IOException { Table students = conn.getTable(TableName.valueOf("students")); Scan scan = new Scan(); scan.setLimit(10); scan.withStartRow("1500100030".getBytes()); scan.withStopRow("1500100050".getBytes()); //通过表.getScanner方法获取数据 ResultScanner rs = students.getScanner(scan); //因为scan是大量数据,是一行一行存储,所以是以数组存储 for (Result r : rs) { //getValue方法是通过传入列蔟和列名得到value String id = Bytes.toString(r.getRow()); String name = Bytes.toString(r.getValue("info".getBytes(), "name".getBytes())); String age = Bytes.toString(r.getValue("info".getBytes(), "age".getBytes())); String sex = Bytes.toString(r.getValue("info".getBytes(), "sex".getBytes())); String clazz = Bytes.toString(r.getValue("info".getBytes(), "clazz".getBytes())); System.out.println(id+" "+name+" "+age+" "+sex+" "+clazz); } } @Test //用cell单元格获取数据 //适合于每条数据结构不唯一的情况下,直接遍历每条数据包含的所有cell public void scan2() throws IOException { Table students = conn.getTable(TableName.valueOf("students")); Scan scan = new Scan(); scan.setLimit(20); ResultScanner rs = students.getScanner(scan); //每一个result就是一条数据 for (Result r : rs) { //获取rowkey String rowkey = Bytes.toString(r.getRow()); System.out.print(rowkey+" "); //获取单元格数据CellUtil.clone(单元格包括值和版本号) List<Cell> cells = r.listCells(); for (Cell cell : cells) { byte[] bytes = CellUtil.cloneValue(cell); String s1 = Bytes.toString(bytes); //获取列名 String quali = Bytes.toString(CellUtil.cloneQualifier(cell)); if ("age".equals(quali)){ if (Integer.valueOf(s1)>=18){ s1="成年"; } else { s1="未成年"; } } System.out.print(s1+" "); } System.out.println(); } } @Test public void putAll() throws IOException { Admin admin = conn.getAdmin(); if (!admin.tableExists(TableName.valueOf("students"))) { HTableDescriptor students = new HTableDescriptor(TableName.valueOf("students")); HColumnDescriptor info = new HColumnDescriptor("info"); students.addFamily(info); admin.createTable(students); } BufferedReader br = new BufferedReader(new FileReader("C:/Users/19768/Desktop/students.txt")); Table students = conn.getTable(TableName.valueOf("students")); String line=null; //创建数组存储put会加快运行速度 ArrayList<Put> puts = new ArrayList<Put>(); //初始每10个put写入数组 int batchsize=10; int i=0; while ((line=br.readLine())!=null){ String[] split = line.split(","); String id = split[0]; String name = split[1]; String age = split[2]; String sex = split[3]; String clazz = split[4]; Put put = new Put(id.getBytes()); put.addColumn("info".getBytes(),"name".getBytes(),name.getBytes()); put.addColumn("info".getBytes(),"age".getBytes(),age.getBytes()); put.addColumn("info".getBytes(),"sex".getBytes(),sex.getBytes()); put.addColumn("info".getBytes(),"clazz".getBytes(),clazz .getBytes()); //添加到数组 puts.add(put); i++; if (i==batchsize){ students.put(puts); //将数组变成空 puts=new ArrayList<Put>(); i=0; } } if (i!=0){ students.put(puts); } } @Test public void delete() throws IOException { Admin admin = conn.getAdmin(); if (admin.tableExists(TableName.valueOf("delete"))) { HTableDescriptor delete = new HTableDescriptor(TableName.valueOf("delete")); HColumnDescriptor info = new HColumnDescriptor("info"); admin.createTable(delete); } if(admin.tableExists(TableName.valueOf("delete"))){ admin.disableTable(TableName.valueOf("delete")); admin.deleteTable(TableName.valueOf("delete")); } } @After public void close() throws IOException { conn.close(); } }
复制代码

 

练习:

导入电信数据,用数组导入

将开始时间设置为版本号,以显示三个版本号来输出经度和纬度

复制代码
package hbase;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.*;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.util.Bytes;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;

import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;

public class HbaseDX {

    Connection conn;
    Admin admin;
    Table dx;

    @Before
    public void init() throws IOException {
        Configuration conf = HBaseConfiguration.create();
        conf.set("hbase.zookeeper.quorum", "master,node1,node2");
        conn = ConnectionFactory.createConnection(conf);
        admin = conn.getAdmin();
        dx = conn.getTable(TableName.valueOf("DIANXIN"));

    }

    @Test
    public void create() throws IOException {
        HTableDescriptor dianxin = new HTableDescriptor(TableName.valueOf("DIANXIN"));
        HColumnDescriptor cf1 = new HColumnDescriptor("cf1");
        cf1.setMaxVersions(3);
        dianxin.addFamily(cf1);
        admin.createTable(dianxin);
    }

    @Test
    public void putAll() throws IOException {
        BufferedReader br = new BufferedReader(new FileReader("D:/ALanzhishujia/soft/data/DIANXIN.csv"));
        ArrayList<Put> puts = new ArrayList<Put>();
        String line=null;
        while ((line=br.readLine())!=null){
            String[] split = line.split(",");
            String adm=split[0];
            String time = split[1];
            String jindu = split[4];
            String weidu = split[5];
            Put put = new Put(adm.getBytes());

            //将时间作为版本号
            long start_time = Long.parseLong(time);

            //put的时候,同时put上value的时间戳版本
            put.addColumn("cf1".getBytes(),"jindu".getBytes(),start_time,jindu.getBytes());
            put.addColumn("cf1".getBytes(),"weidu".getBytes(),start_time,weidu.getBytes());
            puts.add(put);

            if (puts.size()==1000){
                dx.put(puts);
                puts=new ArrayList<Put>();
            }
        }
        if (puts.size()!=0){
            dx.put(puts);
        }


    }

    @Test
    public void getthree() throws IOException {
        // hbase shell中的命令
        // hbase(main):001:0> get 'DIANXIN',
        // 'FDA42AB4AA84D13C2AC7512D1BAEE93B7DCC327A',{COLUMN=>['cf1:jindu','cf1:weidu'],VERSIONS=>3}

        String adm="FDA42AB4AA84D13C2AC7512D1BAEE93B7DCC327A";
        Get get = new Get(adm.getBytes());
        get.setMaxVersions(3);

        Result rs = dx.get(get);
        ArrayList<String> jinlist = new ArrayList<String>();
        ArrayList<String> weilist = new ArrayList<String>();
        for (Cell cell : rs.listCells()) {
            String quo = Bytes.toString(CellUtil.cloneQualifier(cell));
            String values = Bytes.toString(CellUtil.cloneValue(cell));
            if ("jindu".equals(quo)){
                jinlist.add(values);
            }else if ("weidu".equals(quo)){
                weilist.add(values);
            }
        }

        for (int i = 0; i < 3; i++) {
            System.out.println(jinlist.get(i)+","+weilist.get(i));
        }

    }

    @After
    public void close() throws IOException {
        conn.close();
    }
}
复制代码

 

 

posted on   糟糟张  阅读(172)  评论(0编辑  收藏  举报

编辑推荐:
· 10年+ .NET Coder 心语,封装的思维:从隐藏、稳定开始理解其本质意义
· .NET Core 中如何实现缓存的预热?
· 从 HTTP 原因短语缺失研究 HTTP/2 和 HTTP/3 的设计差异
· AI与.NET技术实操系列:向量存储与相似性搜索在 .NET 中的实现
· 基于Microsoft.Extensions.AI核心库实现RAG应用
阅读排行:
· TypeScript + Deepseek 打造卜卦网站:技术与玄学的结合
· 阿里巴巴 QwQ-32B真的超越了 DeepSeek R-1吗?
· 【译】Visual Studio 中新的强大生产力特性
· 10年+ .NET Coder 心语 ── 封装的思维:从隐藏、稳定开始理解其本质意义
· 【设计模式】告别冗长if-else语句:使用策略模式优化代码结构
点击右上角即可分享
微信分享提示