Flink TableApi读取Hbase

  由于在实时同步数据过程中,需要对数据进行一次初始化维表关联的操作,需要读取HBase里面的历史数据与维表进行关联,刚开始使用spark做离线处理,现在使用Flink做离线的ETL。

  • 环境:

    Flink:1.12.2

    HBase:2.1.0-cdh6.2.1

  • pom依赖
    <!--
    Licensed to the Apache Software Foundation (ASF) under one
    or more contributor license agreements.  See the NOTICE file
    distributed with this work for additional information
    regarding copyright ownership.  The ASF licenses this file
    to you under the Apache License, Version 2.0 (the
    "License"); you may not use this file except in compliance
    with the License.  You may obtain a copy of the License at
    
      http://www.apache.org/licenses/LICENSE-2.0
    
    Unless required by applicable law or agreed to in writing,
    software distributed under the License is distributed on an
    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
    KIND, either express or implied.  See the License for the
    specific language governing permissions and limitations
    under the License.
    -->
    <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
             xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
        <modelVersion>4.0.0</modelVersion>
    
        <groupId>com.shydow</groupId>
        <artifactId>flink-tutorial</artifactId>
        <version>1.0-SNAPSHOT</version>
        <packaging>jar</packaging>
    
        <name>Flink Tutorial</name>
        <url>https://flink.apache.org</url>
    
        <properties>
            <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
            <flink.version>1.12.2</flink.version>
            <hbase.version>2.1.0</hbase.version>
            <target.java.version>1.8</target.java.version>
            <scala.binary.version>2.11</scala.binary.version>
            <maven.compiler.source>${target.java.version}</maven.compiler.source>
            <maven.compiler.target>${target.java.version}</maven.compiler.target>
            <log4j.version>2.12.1</log4j.version>
        </properties>
    
        <repositories>
            <repository>
                <id>apache.snapshots</id>
                <name>Apache Development Snapshot Repository</name>
                <url>https://repository.apache.org/content/repositories/snapshots/</url>
                <releases>
                    <enabled>false</enabled>
                </releases>
                <snapshots>
                    <enabled>true</enabled>
                </snapshots>
            </repository>
        </repositories>
    
        <dependencies>
            <dependency>
                <groupId>org.apache.flink</groupId>
                <artifactId>flink-walkthrough-common_${scala.binary.version}</artifactId>
                <version>${flink.version}</version>
            </dependency>
    
            <!-- This dependency is provided, because it should not be packaged into the JAR file. -->
            <dependency>
                <groupId>org.apache.flink</groupId>
                <artifactId>flink-streaming-java_${scala.binary.version}</artifactId>
                <version>${flink.version}</version>
            </dependency>
            <dependency>
                <groupId>org.apache.flink</groupId>
                <artifactId>flink-clients_${scala.binary.version}</artifactId>
                <version>${flink.version}</version>
            </dependency>
    
            <!-- Scala Dependency -->
            <dependency>
                <groupId>org.apache.flink</groupId>
                <artifactId>flink-streaming-scala_${scala.binary.version}</artifactId>
                <version>${flink.version}</version>
            </dependency>
            <dependency>
                <groupId>org.apache.flink</groupId>
                <artifactId>flink-scala_${scala.binary.version}</artifactId>
                <version>${flink.version}</version>
            </dependency>
    
            <!-- Add connector dependencies here. They must be in the default scope (compile). -->
    
            <!-- Example:
    
            <dependency>
                <groupId>org.apache.flink</groupId>
                <artifactId>flink-connector-kafka_${scala.binary.version}</artifactId>
                <version>${flink.version}</version>
            </dependency>
            -->
            <dependency>
                <groupId>org.apache.flink</groupId>
                <artifactId>flink-connector-kafka_${scala.binary.version}</artifactId>
                <version>${flink.version}</version>
            </dependency>
            <dependency>
                <groupId>org.apache.flink</groupId>
                <artifactId>flink-connector-jdbc_2.11</artifactId>
                <version>1.12.2</version>
            </dependency>
            <dependency>
                <groupId>org.apache.flink</groupId>
                <artifactId>flink-csv</artifactId>
                <version>${flink.version}</version>
            </dependency>
            <dependency>
                <groupId>org.apache.flink</groupId>
                <artifactId>flink-json</artifactId>
                <version>${flink.version}</version>
            </dependency>
    
            <!-- table api -->
            <dependency>
                <groupId>org.apache.flink</groupId>
                <artifactId>flink-table-planner-blink_${scala.binary.version}</artifactId>
                <version>${flink.version}</version>
            </dependency>
            <dependency>
                <groupId>org.apache.flink</groupId>
                <artifactId>flink-table-planner_${scala.binary.version}</artifactId>
                <version>${flink.version}</version>
            </dependency>
    
            <!-- HBase -->
            <dependency>
                <groupId>org.apache.flink</groupId>
                <artifactId>flink-connector-hbase-2.2_${scala.binary.version}</artifactId>
                <version>${flink.version}</version>
            </dependency>
            <dependency>
                <groupId>org.apache.hbase</groupId>
                <artifactId>hbase-server</artifactId>
                <version>${hbase.version}</version>
            </dependency>
            <dependency>
                <groupId>org.apache.hbase</groupId>
                <artifactId>hbase-client</artifactId>
                <version>${hbase.version}</version>
            </dependency>
    
            <dependency>
                <groupId>mysql</groupId>
                <artifactId>mysql-connector-java</artifactId>
                <version>5.1.38</version>
                <!--<version>8.0.20</version>-->
            </dependency>
            <dependency>
                <groupId>com.alibaba</groupId>
                <artifactId>fastjson</artifactId>
                <version>1.2.76</version>
            </dependency>
            <dependency>
                <groupId>org.projectlombok</groupId>
                <artifactId>lombok</artifactId>
                <version>RELEASE</version>
                <scope>compile</scope>
            </dependency>
    
            <!-- Add logging framework, to produce console output when running in the IDE. -->
            <!-- These dependencies are excluded from the application JAR by default. -->
            <dependency>
                <groupId>org.apache.logging.log4j</groupId>
                <artifactId>log4j-slf4j-impl</artifactId>
                <version>${log4j.version}</version>
                <scope>runtime</scope>
            </dependency>
            <dependency>
                <groupId>org.apache.logging.log4j</groupId>
                <artifactId>log4j-api</artifactId>
                <version>${log4j.version}</version>
                <scope>runtime</scope>
            </dependency>
            <dependency>
                <groupId>org.apache.logging.log4j</groupId>
                <artifactId>log4j-core</artifactId>
                <version>${log4j.version}</version>
                <scope>runtime</scope>
            </dependency>
        </dependencies>
    
        <build>
            <plugins>
    
                <!-- Java Compiler -->
                <plugin>
                    <groupId>org.apache.maven.plugins</groupId>
                    <artifactId>maven-compiler-plugin</artifactId>
                    <version>3.1</version>
                    <configuration>
                        <source>${target.java.version}</source>
                        <target>${target.java.version}</target>
                    </configuration>
                </plugin>
    
                <!-- We use the maven-shade plugin to create a fat jar that contains all necessary dependencies. -->
                <!-- Change the value of <mainClass>...</mainClass> if your program entry point changes. -->
                <plugin>
                    <groupId>org.apache.maven.plugins</groupId>
                    <artifactId>maven-shade-plugin</artifactId>
                    <version>3.0.0</version>
                    <executions>
                        <!-- Run shade goal on package phase -->
                        <execution>
                            <phase>package</phase>
                            <goals>
                                <goal>shade</goal>
                            </goals>
                            <configuration>
                                <artifactSet>
                                    <excludes>
                                        <exclude>org.apache.flink:force-shading</exclude>
                                        <exclude>com.google.code.findbugs:jsr305</exclude>
                                        <exclude>org.slf4j:*</exclude>
                                        <exclude>org.apache.logging.log4j:*</exclude>
                                    </excludes>
                                </artifactSet>
                                <filters>
                                    <filter>
                                        <!-- Do not copy the signatures in the META-INF folder.
                                        Otherwise, this might cause SecurityExceptions when using the JAR. -->
                                        <artifact>*:*</artifact>
                                        <excludes>
                                            <exclude>META-INF/*.SF</exclude>
                                            <exclude>META-INF/*.DSA</exclude>
                                            <exclude>META-INF/*.RSA</exclude>
                                        </excludes>
                                    </filter>
                                </filters>
                                <transformers>
                                    <transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
                                        <mainClass>com.shydow.FraudDetectionJob</mainClass>
                                    </transformer>
                                </transformers>
                            </configuration>
                        </execution>
                    </executions>
                </plugin>
            </plugins>
    
            <pluginManagement>
                <plugins>
    
                    <!-- This improves the out-of-the-box experience in Eclipse by resolving some warnings. -->
                    <plugin>
                        <groupId>org.eclipse.m2e</groupId>
                        <artifactId>lifecycle-mapping</artifactId>
                        <version>1.0.0</version>
                        <configuration>
                            <lifecycleMappingMetadata>
                                <pluginExecutions>
                                    <pluginExecution>
                                        <pluginExecutionFilter>
                                            <groupId>org.apache.maven.plugins</groupId>
                                            <artifactId>maven-shade-plugin</artifactId>
                                            <versionRange>[3.0.0,)</versionRange>
                                            <goals>
                                                <goal>shade</goal>
                                            </goals>
                                        </pluginExecutionFilter>
                                        <action>
                                            <ignore/>
                                        </action>
                                    </pluginExecution>
                                    <pluginExecution>
                                        <pluginExecutionFilter>
                                            <groupId>org.apache.maven.plugins</groupId>
                                            <artifactId>maven-compiler-plugin</artifactId>
                                            <versionRange>[3.1,)</versionRange>
                                            <goals>
                                                <goal>testCompile</goal>
                                                <goal>compile</goal>
                                            </goals>
                                        </pluginExecutionFilter>
                                        <action>
                                            <ignore/>
                                        </action>
                                    </pluginExecution>
                                </pluginExecutions>
                            </lifecycleMappingMetadata>
                        </configuration>
                    </plugin>
                </plugins>
            </pluginManagement>
        </build>
    </project>

     

  • 具体代码
    package com.shydow.source.custom;
    
    import com.alibaba.fastjson.JSONObject;
    import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
    import org.apache.flink.table.api.*;
    import org.apache.flink.types.Row;
    import java.util.function.Consumer;
    
    /**
     * @author Shydow
     * @date 2021-11-22
     */
    public class DefaultHBaseSource {
        public static void main(String[] args) {
    
            StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
            EnvironmentSettings settings = EnvironmentSettings.newInstance()
                    .useBlinkPlanner()
                    .inBatchMode()
                    .build();
            TableEnvironment tableEnv = TableEnvironment.create(settings);
    
            // 需要查询的SQL
            String sql = "CREATE TABLE t_cdc_test (" +
                    " rowkey STRING," +
                    " data ROW<id STRING, age STRING>," +
                    " PRIMARY KEY (rowkey) NOT ENFORCED" +
                    " ) WITH (" +
                    " 'connector' = 'hbase-2.2' ," +
                    " 'table-name' = 'default.fink_hbase_1632400908505' ," +
                    " 'zookeeper.quorum' = 'dss01:2181,dss02:2181,dss03:2181'" +
                    " )";
            tableEnv.executeSql(sql);
            Table table = tableEnv.sqlQuery("select data.id, data.age from t_cdc_test limit 10");
            TableResult execute = table.execute();
    
            int cnt = execute.getTableSchema().getFieldCount();
            String[] names = execute.getTableSchema().getFieldNames();
            execute.collect().forEachRemaining(new Consumer<Row>() {
                @Override
                public void accept(Row row) {
                    JSONObject object = new JSONObject();
                    for (int i = 0; i < cnt; i++) {
                        object.put(names[i], row.getField(i).toString());
                    }
                    System.out.println(object.toJSONString());
                }
            });
        }
    }

     

  • 多表关联写入ClickHouse

  

posted @ 2021-11-22 17:28  Shydow  阅读(342)  评论(0编辑  收藏  举报