Flink Table&Sql提交至集群全流程

环境:Flink0.10.1 scala2.11

如果报一下的错,请把对应的pom.xml文件进行修改。具体的可以看下下面的pom.xml文件的配置

Exception in thread "main" org.apache.flink.table.api.TableException: findAndCreateTableSource failed.
    at org.apache.flink.table.factories.TableFactoryUtil.findAndCreateTableSource(TableFactoryUtil.java:67)
    at org.apache.flink.table.factories.TableFactoryUtil.findAndCreateTableSource(TableFactoryUtil.java:54)
    at org.apache.flink.table.descriptors.ConnectTableDescriptor.registerTableSource(ConnectTableDescriptor.java:69)
    at com.qianxin.luran.flinksql.Demo1.main(Demo1.java:42)
Caused by: org.apache.flink.table.api.NoMatchingTableFactoryException: Could not find a suitable table factory for 'org.apache.flink.table.factories.TableSourceFactory' in
the classpath.

Reason: No context matches.

The following properties are requested:
connector.properties.0.key=bootstrap.servers
connector.properties.0.value=localhost:9092
connector.property-version=1
connector.topic=amount
connector.type=kafka
connector.version=0.10
format.derive-schema=true
format.property-version=1
format.type=json
schema.0.name=amount
schema.0.type=VARCHAR
schema.1.name=product
schema.1.type=VARCHAR
update-mode=append

The following factories have been considered:
org.apache.flink.api.java.io.jdbc.JDBCTableSourceSinkFactory
org.apache.flink.formats.json.JsonRowFormatFactory
org.apache.flink.streaming.connectors.kafka.KafkaTableSourceSinkFactory
org.apache.flink.table.catalog.GenericInMemoryCatalogFactory
org.apache.flink.table.sources.CsvBatchTableSourceFactory
org.apache.flink.table.sources.CsvAppendTableSourceFactory
org.apache.flink.table.sinks.CsvBatchTableSinkFactory
org.apache.flink.table.sinks.CsvAppendTableSinkFactory
org.apache.flink.table.planner.delegation.BlinkPlannerFactory
org.apache.flink.table.planner.delegation.BlinkExecutorFactory
org.apache.flink.table.planner.StreamPlannerFactory
org.apache.flink.table.executor.StreamExecutorFactory
    at org.apache.flink.table.factories.TableFactoryService.filterByContext(TableFactoryService.java:283)
    at org.apache.flink.table.factories.TableFactoryService.filter(TableFactoryService.java:191)
    at org.apache.flink.table.factories.TableFactoryService.findSingleInternal(TableFactoryService.java:144)
    at org.apache.flink.table.factories.TableFactoryService.find(TableFactoryService.java:97)
    at org.apache.flink.table.factories.TableFactoryUtil.findAndCreateTableSource(TableFactoryUtil.java:64)
    ... 3 more

1、数据格式

 

user_info_table:
1,1547718199,35.8
6,1547718201,15.4
7,1547718202,6.7
10,1547718205,38.1
14,1547718206,32
5,1547718208,36.2
9,1547718210,29.7
3,1547718213,30.9

user_table:
1,yang,11
6,wen,35
7,jie,6
10,min,38
14,zhou,32
5,qin,36
9,shu,29
3,qing,30

 

2、pom.xml文件配置

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>

    <groupId>org.example</groupId>
    <artifactId>Flink1.11</artifactId>
    <version>1.0-SNAPSHOT</version>
    <!--全局版本控制-->
    <properties>
        <java.version>1.8</java.version>
        <flink.version>1.10.1</flink.version>
    </properties>
    <dependencies>
        <dependency>
            <groupId>com.alibaba</groupId>
            <artifactId>fastjson</artifactId>
            <version>1.2.75</version>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-streaming-java_2.11</artifactId>
            <version>${flink.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-clients_2.11</artifactId>
            <version>${flink.version}</version>
        </dependency>

        <dependency>
            <groupId>mysql</groupId>
            <artifactId>mysql-connector-java</artifactId>
            <version>8.0.16</version>
        </dependency>

        <dependency>
            <groupId>com.alibaba.ververica</groupId>
            <!-- add the dependency matching your database -->
            <artifactId>flink-connector-mysql-cdc</artifactId>
            <version>1.0.0</version>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-connector-kafka-0.10_2.11</artifactId>
            <version>${flink.version}</version>
            <scope> compile</scope>
        </dependency>
        <!--csv依赖-->
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-csv</artifactId>
            <version>1.10.1</version>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-connector-elasticsearch6_2.11</artifactId>
            <version>${flink.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-json</artifactId>
            <version>1.10.1</version>
        </dependency>
        <!-- old planner flink table  打包提交的时候,标红的地方一定要加上-->
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-table-planner_2.11</artifactId>
            <version>${flink.version}</version>
            <scope>provided</scope>
        </dependency>
        <!--new planner 打包提交的时候,标红的地方一定要加上-->
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-table-planner-blink_2.11</artifactId>
            <version>${flink.version}</version>
            <scope>provided</scope>
        </dependency>

        <!-- 日志相关依赖,flink必须要加,否则报错 -->
        <dependency>
            <groupId>org.slf4j</groupId>
            <artifactId>slf4j-api</artifactId>
            <version>1.7.25</version>
        </dependency>

        <dependency>
            <groupId>org.slf4j</groupId>
            <artifactId>slf4j-log4j12</artifactId>
            <version>1.7.25</version>
        </dependency>

<!--        <dependency>-->
<!--            <groupId>org.apache.flink</groupId>-->
<!--            <artifactId>flink-table-common</artifactId>-->
<!--            <version>1.10.1</version>-->
<!--        </dependency>-->
<!--        <dependency>-->
<!--            <groupId>org.apache.flink</groupId>-->
<!--            <artifactId>flink-table-api-java</artifactId>-->
<!--            <version>1.10.1</version>-->
<!--        </dependency>-->



        <dependency>
            <groupId>org.slf4j</groupId>
            <artifactId>slf4j-api</artifactId>
            <version>1.7.25</version>
        </dependency>

        <dependency>
            <groupId>org.slf4j</groupId>
            <artifactId>slf4j-log4j12</artifactId>
            <version>1.7.25</version>
        </dependency>

    </dependencies>

    <build>
        <plugins>
            <!-- 编译插件 -->
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-compiler-plugin</artifactId>
                <version>3.6.0</version>
                <configuration>
                    <source>1.8</source>
                    <target>1.8</target>
                    <encoding>UTF-8</encoding>
                </configuration>
            </plugin>
            <!-- scala编译插件 -->
            <plugin>
                <groupId>net.alchim31.maven</groupId>
                <artifactId>scala-maven-plugin</artifactId>
                <version>3.1.6</version>
                <configuration>
                    <scalaCompatVersion>2.11</scalaCompatVersion>
                    <scalaVersion>2.11.12</scalaVersion>
                    <encoding>UTF-8</encoding>
                </configuration>
                <executions>
                    <execution>
                        <id>compile-scala</id>
                        <phase>compile</phase>
                        <goals>
                            <goal>add-source</goal>
                            <goal>compile</goal>
                        </goals>
                    </execution>
                    <execution>
                        <id>test-compile-scala</id>
                        <phase>test-compile</phase>
                        <goals>
                            <goal>add-source</goal>
                            <goal>testCompile</goal>
                        </goals>
                    </execution>
                </executions>
            </plugin>
<!--             打jar包插件(会包含所有依赖) -->
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-shade-plugin</artifactId>
                <version>2.3</version>
                <executions>
                    <execution>
                        <phase>package</phase>
                        <goals>
                            <goal>shade</goal>
                        </goals>
                        <configuration>
                            <filters>
                                <filter>
                                    <artifact>*:*</artifact>
                                    <excludes>
                                        <exclude>META-INF/*.SF</exclude>
                                        <exclude>META-INF/*.DSA</exclude>
                                        <exclude>META-INF/*.RSA</exclude>
                                    </excludes>
                                </filter>
                            </filters>
                            <transformers>
                                <transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
                                </transformer>
                                <transformer implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer"/>
                            </transformers>
                        </configuration>
                    </execution>
                </executions>
            </plugin>


<!--            <plugin>-->
<!--                <groupId>org.apache.maven.plugins</groupId>-->
<!--                <artifactId>maven-assembly-plugin</artifactId>-->
<!--                <version>2.6</version>-->
<!--                <configuration>-->
<!--                    <descriptorRefs>-->
<!--                        <descriptorRef>jar-with-dependencies</descriptorRef>-->
<!--                    </descriptorRefs>-->
<!--                    <archive>-->
<!--                        <manifest>-->
<!--                            &lt;!&ndash; 可以设置jar包的入口类(可选) &ndash;&gt;-->
<!--                            <mainClass>test.SqlJoinKafka10Test</mainClass>-->
<!--                        </manifest>-->
<!--                    </archive>-->
<!--                </configuration>-->
<!--                <executions>-->
<!--                    <execution>-->
<!--                        <id>make-assembly</id>-->
<!--                        <phase>package</phase>-->
<!--                        <goals>-->
<!--                            <goal>single</goal>-->
<!--                        </goals>-->
<!--                    </execution>-->
<!--                </executions>-->
<!--            </plugin>-->
        </plugins>
    </build>

</project>

3、案例代码

package test

import java.util.concurrent.TimeUnit

import org.apache.flink.api.common.restartstrategy.RestartStrategies
import org.apache.flink.api.common.time.Time
import org.apache.flink.runtime.state.filesystem.FsStateBackend
import org.apache.flink.streaming.api.CheckpointingMode
import org.apache.flink.streaming.api.environment.CheckpointConfig
import org.apache.flink.streaming.api.scala._
import org.apache.flink.table.api.scala._
import org.apache.flink.table.api.{DataTypes, EnvironmentSettings, Table}
import org.apache.flink.table.descriptors._
import org.apache.flink.types.Row

/**
 * @program: demo
 * @description: 从kafka0.10读取数据,sink到es或者kafka
 * @author: yang
 * @create: 2021-01-15 11:48
 */
object SqlJoinKafka10Test {

  def main(args: Array[String]): Unit = {
    val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
    env.setParallelism(1)
    //1.1checkpoint设置
//    env.setStateBackend(new FsStateBackend("hdfs://uat-datacenter1:8020/flink/kafka10/checkpoints"))
//    env.enableCheckpointing(5000)
//    env.getCheckpointConfig.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE)
//    env.getCheckpointConfig.setMaxConcurrentCheckpoints(2)
//    env.setRestartStrategy(RestartStrategies.fixedDelayRestart(3, Time.of(10, TimeUnit.SECONDS)))
//    env.getCheckpointConfig.enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION)
    val settings: EnvironmentSettings = EnvironmentSettings.newInstance().useBlinkPlanner().inStreamingMode().build()
    val tableEnv: StreamTableEnvironment = StreamTableEnvironment.create(env, settings)

    //输入表1 user_info_table
    tableEnv.connect(new Kafka()
        .version("0.10")
        .topic("Demo20")
        .property("bootstrap.servers","uat-datacenter1:9092")
        .property("zookeeper.connect","uat-datacenter1:2181")
//      .property("group.id","test")
    )
      .withFormat(new Csv())
      .withSchema(new Schema()
          .field("id",DataTypes.STRING())
          .field("ts",DataTypes.BIGINT())
          .field("temperature",DataTypes.DOUBLE())
      ).createTemporaryTable("user_info_table")

    //输入表2 user_table
    tableEnv.connect(new Kafka()
      .version("0.10")
      .topic("Demo21")
      .property("bootstrap.servers","uat-datacenter1:9092")
      .property("zookeeper.connect","uat-datacenter1:2181")
//      .property("group.id","test")
    )
      .withFormat(new Csv())
      .withSchema(new Schema()
        .field("id",DataTypes.STRING())
        .field("name",DataTypes.STRING())
        .field("age",DataTypes.BIGINT())
      ).createTemporaryTable("user_table")

    //1.10版本目前支持 join / inner join /left join
    val sqlResult: Table = tableEnv.sqlQuery(
      """
        |select ut.id as utId ,ut.name,ut.age,uit.id as uitId ,uit.ts,uit.temperature
        |from user_table as ut inner join user_info_table as uit
        |on  ut.id = uit.id
      """.stripMargin)

    //输出表 kafka
    tableEnv.connect(new Kafka()
      .version("0.10")
      .topic("Demo22")
      .property("bootstrap.servers","uat-datacenter1:9092")
      .property("zookeeper.connect","uat-datacenter1:2181")
//      .property("group.id","test")
    )
      .withFormat(new Csv())
      .withSchema(new Schema()
        .field("utId",DataTypes.STRING())
        .field("name",DataTypes.STRING())
        .field("age",DataTypes.BIGINT())
        .field("uitId",DataTypes.STRING())
        .field("ts",DataTypes.BIGINT())
        .field("temperature",DataTypes.DOUBLE())
      ).createTemporaryTable("user_information")

    //输出表 es,不需要创建隐射
    tableEnv.connect(new Elasticsearch()
        .version("6")
        .host("uat-datacenter1",9200,"http")
        .index("sql_sensor")
        .documentType("_doc")
        .keyNullLiteral("null")
        .failureHandlerIgnore()
        .disableFlushOnCheckpoint()
        .bulkFlushMaxActions(1)
    ).inUpsertMode()
      .withFormat(new Json())
      .withSchema(new Schema()
        .field("utId",DataTypes.STRING())
        .field("name",DataTypes.STRING())
        .field("age",DataTypes.BIGINT())
        .field("uitId",DataTypes.STRING())
        .field("ts",DataTypes.BIGINT())
        .field("temperature",DataTypes.DOUBLE())
      ).createTemporaryTable("user_es_information")

    //控制台打印
    sqlResult.toAppendStream[Row].print("sqlResult")

    //输出至kafka
    sqlResult.insertInto("user_information")

    //输出至es
    sqlResult.insertInto("user_es_information")

    env.execute("kafka Source and Sink")

  }
}

 

posted @ 2021-01-20 17:00  小白啊小白,Fighting  阅读(2301)  评论(0编辑  收藏  举报