关闭页面特效

项目实战从 0 到 1 学习之Flink （21）Flink读HBase并写入HBase

这里读HBase提供两种方式，一种是继承RichSourceFunction，重写父类方法，一种是实现OutputFormat接口，具体代码如下：

方式一：继承RichSourceFunction

package com.my.flink.utils.streaming.hbase;
 
import com.my.flink.utils.config.ConfigKeys;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.functions.source.RichSourceFunction;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.util.Bytes;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
 
import java.io.IOException;
import java.util.Iterator;
 
/**
 * @Description hbase reader
 * @Author jiangxiaozhi
 * @Date 2018/10/17 10:05
 **/
public class HBaseReader extends RichSourceFunction<Tuple2<String, String>> {
    private static final Logger logger = LoggerFactory.getLogger(HBaseReader.class);
 
    private Connection conn = null;
    private Table table = null;
    private Scan scan = null;
 
    @Override
    public void open(Configuration parameters) throws Exception {
        super.open(parameters);
        conn = HBaseConnection.getHBaseConn();
        table = conn.getTable(TableName.valueOf(ConfigKeys.HBASE_SOURCE_TABLE()));
        scan = new Scan();
        scan.setStartRow(Bytes.toBytes("1001"));
        scan.setStopRow(Bytes.toBytes("1004"));
        scan.addFamily(Bytes.toBytes(ConfigKeys.HBASE_SOURCE_CF()));
 
    }
 
    @Override
    public void run(SourceContext<Tuple2<String, String>> ctx) throws Exception {
        ResultScanner rs = table.getScanner(scan);
        Iterator<Result> iterator = rs.iterator();
        while (iterator.hasNext()) {
            Result result = iterator.next();
            String rowkey = Bytes.toString(result.getRow());
            StringBuffer sb = new StringBuffer();
            for (Cell cell : result.listCells()) {
                String value = Bytes.toString(cell.getValueArray(), cell.getValueOffset(), cell.getValueLength());
                sb.append(value).append(",");
            }
            String valueString = sb.replace(sb.length() - 1, sb.length(), "").toString();
            Tuple2<String, String> tuple2 = new Tuple2<>();
            tuple2.setFields(rowkey, valueString);
            ctx.collect(tuple2);
        }
 
    }
 
    @Override
    public void cancel() {
        try {
            if (table != null) {
                table.close();
            }
            if (conn != null) {
                conn.close();
            }
        } catch (IOException e) {
            logger.error("Close HBase Exception:", e.toString());
        }
 
    }
}

方式二：重写TableInputFormat方法

    env.createInput(new TableInputFormat[org.apache.flink.api.java.tuple.Tuple2[String, String]] {
      override def mapResultToTuple(r: Result): org.apache.flink.api.java.tuple.Tuple2[String, String] = {
        val rowkey = Bytes.toString(r.getRow)
        val sb = new StringBuffer()
        for (cell: Cell <- r.rawCells()) {
          val value = Bytes.toString(cell.getValueArray, cell.getValueOffset, cell.getValueLength)
          sb.append(value).append(",")
        }
        val valueString = sb.replace(sb.length() - 1, sb.length(), "").toString
        val tuple2 = new org.apache.flink.api.java.tuple.Tuple2[String, String]
        tuple2.setField(rowkey, 0)
        tuple2.setField(valueString, 1)
        tuple2
      }
 
      override def getTableName: String = HBASE_SOURCE_TABLE
 
      override def getScanner: Scan = {
        scan
      }
 
      override def configure(parameters: Configuration): Unit = {
        val conf = HBaseConfiguration.create();
        conf.set(HConstants.ZOOKEEPER_QUORUM, ZOOKEEPER_QUORUM)
        conf.set(HConstants.ZOOKEEPER_CLIENT_PORT, ZOOKEEPER_CLIENT_PORT)
        conn = ConnectionFactory.createConnection(conf)
        table = classOf[HTable].cast(conn.getTable(TableName.valueOf(HBASE_SOURCE_TABLE)))
        scan = new Scan() {
          setStartRow(Bytes.toBytes("1001"))
          setStopRow(Bytes.toBytes("1004"))
          addFamily(Bytes.toBytes(HBASE_SOURCE_CF))
        }
      }
 
      override def close() = {
        if (table != null) {
          table.close()
        }
        if (conn != null) {
          conn.close()
        }
 
      }
    })

上面的env是StreamExecutionEnvironment。

写入HBase也有两种方法，其中写入和写入mysql类似，这里重点说明实现OutputFormat接口进行写入：

package com.my.flink.utils.streaming.hbase;
 
import com.my.flink.utils.config.ConfigKeys;
import org.apache.flink.api.common.io.OutputFormat;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.configuration.Configuration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.util.Bytes;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
 
import java.io.IOException;
 
/**
 * @Description HBaseOutputFormat
 * @Author jiangxiaozhi
 * @Date 2018/10/16 14:06
 **/
public class HBaseOutputFormat implements OutputFormat<Tuple2<String, String>> {
    private static final Logger logger = LoggerFactory.getLogger(HBaseOutputFormat.class);
 
    private org.apache.hadoop.conf.Configuration conf = null;
    private Connection conn = null;
    private Table table = null;
 
    @Override
    public void configure(Configuration parameters) {
    }
 
    @Override
    public void open(int taskNumber, int numTasks) throws IOException {
        conn = HBaseConnection.getHBaseConn();
        table = conn.getTable(TableName.valueOf(ConfigKeys.HBASE_SINK_TABLE()));
    }
 
    @Override
    public void writeRecord(org.apache.flink.api.java.tuple.Tuple2<String, String> record) throws IOException {
        Put put = new Put(Bytes.toBytes(record.f0));
        put.addColumn(Bytes.toBytes(ConfigKeys.HBASE_SINK_CF()), Bytes.toBytes("test1"), Bytes.toBytes(record.f1));
        table.put(put);
    }
 
    @Override
    public void close() throws IOException {
        if (table != null) {
            table.close();
        }
        if (conn != null) {
            conn.close();
        }
    }
}

在使用时：

读取HBase：
1.如果是HBaseReader
env.addSource(new HBaseReader())//产生DataStream
2.如果是TableInputFormat，产生DataStream见方式二，env.createInput.....
 
存储到HBase：
1.如果是HBaseWritter
env.addSource(new HBaseWriter())//HBaseWritter没有实现，可参看JdbcWriter
2.dataStream.writeUsingOutputFormat(new HBaseOutputFormat())

posted on 2020-09-08 15:38 大码王阅读(911) 评论(0) 编辑收藏举报

刷新页面返回顶部

登录后才能查看或发表评论，立即登录或者逛逛博客园首页

公告

青青陵上柏，磊磊涧中石o

运行时长：2258天0小时56分46秒

您的浏览器不兼容canvas

昵称：大码王
园龄： 5年8个月
粉丝： 233
关注： 30

+加关注

2025年3月

日

一

二

三

四

五

六

随笔分类 (719)

clickhouse(4)

flink源码分析(2)

Groovy(1)

Java(34)

Linux(3)

office(10)

OpenStack入门(1)

Phoenix+hbase(11)

photoshop(10)

python之绘图(7)

python之爬虫(15)

python之入门到实战(26)

shell大全(1)

SparkCore(14)

sparkGraphx(2)

sparksql(8)

sparkstreaming(17)

spark源码分析(11)

博客园美化(6)

操作系统(1)

随笔档案 (693)

2024年5月(4)

2024年3月(3)

2023年9月(1)

2023年4月(2)

2023年3月(4)

2023年2月(1)

2022年12月(1)

2022年11月(1)

2022年9月(2)

2022年8月(17)

2022年7月(5)

2022年5月(3)

2022年4月(18)

2021年9月(1)

2021年6月(9)

2021年5月(19)

2021年2月(1)

2021年1月(17)

2020年12月(7)

2020年11月(19)

文章分类 (35)

airflow(4)

azkban(1)

canal(1)

Cassandra(1)

datax(1)

druid(1)

Elasticsearch(8)

java(11)

mongodb(2)

redis(3)

scala(2)

文章档案 (40)

2024年4月(2)

2023年5月(2)

2023年4月(1)

2023年1月(1)

2020年6月(9)

2020年5月(25)

公告

搜索

常用链接

最新随笔

积分与排名

随笔分类 (719)

随笔档案 (693)

文章分类 (35)

文章档案 (40)

阅读排行榜

评论排行榜

推荐排行榜

最新评论

喜欢请打赏