Flink Table&Sql提交至集群全流程
环境:Flink0.10.1 scala2.11
如果报一下的错,请把对应的pom.xml文件进行修改。具体的可以看下下面的pom.xml文件的配置
Exception in thread "main" org.apache.flink.table.api.TableException: findAndCreateTableSource failed. at org.apache.flink.table.factories.TableFactoryUtil.findAndCreateTableSource(TableFactoryUtil.java:67) at org.apache.flink.table.factories.TableFactoryUtil.findAndCreateTableSource(TableFactoryUtil.java:54) at org.apache.flink.table.descriptors.ConnectTableDescriptor.registerTableSource(ConnectTableDescriptor.java:69) at com.qianxin.luran.flinksql.Demo1.main(Demo1.java:42) Caused by: org.apache.flink.table.api.NoMatchingTableFactoryException: Could not find a suitable table factory for 'org.apache.flink.table.factories.TableSourceFactory' in the classpath. Reason: No context matches. The following properties are requested: connector.properties.0.key=bootstrap.servers connector.properties.0.value=localhost:9092 connector.property-version=1 connector.topic=amount connector.type=kafka connector.version=0.10 format.derive-schema=true format.property-version=1 format.type=json schema.0.name=amount schema.0.type=VARCHAR schema.1.name=product schema.1.type=VARCHAR update-mode=append The following factories have been considered: org.apache.flink.api.java.io.jdbc.JDBCTableSourceSinkFactory org.apache.flink.formats.json.JsonRowFormatFactory org.apache.flink.streaming.connectors.kafka.KafkaTableSourceSinkFactory org.apache.flink.table.catalog.GenericInMemoryCatalogFactory org.apache.flink.table.sources.CsvBatchTableSourceFactory org.apache.flink.table.sources.CsvAppendTableSourceFactory org.apache.flink.table.sinks.CsvBatchTableSinkFactory org.apache.flink.table.sinks.CsvAppendTableSinkFactory org.apache.flink.table.planner.delegation.BlinkPlannerFactory org.apache.flink.table.planner.delegation.BlinkExecutorFactory org.apache.flink.table.planner.StreamPlannerFactory org.apache.flink.table.executor.StreamExecutorFactory at org.apache.flink.table.factories.TableFactoryService.filterByContext(TableFactoryService.java:283) at org.apache.flink.table.factories.TableFactoryService.filter(TableFactoryService.java:191) at org.apache.flink.table.factories.TableFactoryService.findSingleInternal(TableFactoryService.java:144) at org.apache.flink.table.factories.TableFactoryService.find(TableFactoryService.java:97) at org.apache.flink.table.factories.TableFactoryUtil.findAndCreateTableSource(TableFactoryUtil.java:64) ... 3 more
1、数据格式
user_info_table: 1,1547718199,35.8 6,1547718201,15.4 7,1547718202,6.7 10,1547718205,38.1 14,1547718206,32 5,1547718208,36.2 9,1547718210,29.7 3,1547718213,30.9 user_table: 1,yang,11 6,wen,35 7,jie,6 10,min,38 14,zhou,32 5,qin,36 9,shu,29 3,qing,30
2、pom.xml文件配置
<?xml version="1.0" encoding="UTF-8"?> <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> <modelVersion>4.0.0</modelVersion> <groupId>org.example</groupId> <artifactId>Flink1.11</artifactId> <version>1.0-SNAPSHOT</version> <!--全局版本控制--> <properties> <java.version>1.8</java.version> <flink.version>1.10.1</flink.version> </properties> <dependencies> <dependency> <groupId>com.alibaba</groupId> <artifactId>fastjson</artifactId> <version>1.2.75</version> </dependency> <dependency> <groupId>org.apache.flink</groupId> <artifactId>flink-streaming-java_2.11</artifactId> <version>${flink.version}</version> </dependency> <dependency> <groupId>org.apache.flink</groupId> <artifactId>flink-clients_2.11</artifactId> <version>${flink.version}</version> </dependency> <dependency> <groupId>mysql</groupId> <artifactId>mysql-connector-java</artifactId> <version>8.0.16</version> </dependency> <dependency> <groupId>com.alibaba.ververica</groupId> <!-- add the dependency matching your database --> <artifactId>flink-connector-mysql-cdc</artifactId> <version>1.0.0</version> </dependency> <dependency> <groupId>org.apache.flink</groupId> <artifactId>flink-connector-kafka-0.10_2.11</artifactId> <version>${flink.version}</version> <scope> compile</scope> </dependency> <!--csv依赖--> <dependency> <groupId>org.apache.flink</groupId> <artifactId>flink-csv</artifactId> <version>1.10.1</version> </dependency> <dependency> <groupId>org.apache.flink</groupId> <artifactId>flink-connector-elasticsearch6_2.11</artifactId> <version>${flink.version}</version> </dependency> <dependency> <groupId>org.apache.flink</groupId> <artifactId>flink-json</artifactId> <version>1.10.1</version> </dependency> <!-- old planner flink table 打包提交的时候,标红的地方一定要加上--> <dependency> <groupId>org.apache.flink</groupId> <artifactId>flink-table-planner_2.11</artifactId> <version>${flink.version}</version> <scope>provided</scope> </dependency> <!--new planner 打包提交的时候,标红的地方一定要加上--> <dependency> <groupId>org.apache.flink</groupId> <artifactId>flink-table-planner-blink_2.11</artifactId> <version>${flink.version}</version> <scope>provided</scope> </dependency> <!-- 日志相关依赖,flink必须要加,否则报错 --> <dependency> <groupId>org.slf4j</groupId> <artifactId>slf4j-api</artifactId> <version>1.7.25</version> </dependency> <dependency> <groupId>org.slf4j</groupId> <artifactId>slf4j-log4j12</artifactId> <version>1.7.25</version> </dependency> <!-- <dependency>--> <!-- <groupId>org.apache.flink</groupId>--> <!-- <artifactId>flink-table-common</artifactId>--> <!-- <version>1.10.1</version>--> <!-- </dependency>--> <!-- <dependency>--> <!-- <groupId>org.apache.flink</groupId>--> <!-- <artifactId>flink-table-api-java</artifactId>--> <!-- <version>1.10.1</version>--> <!-- </dependency>--> <dependency> <groupId>org.slf4j</groupId> <artifactId>slf4j-api</artifactId> <version>1.7.25</version> </dependency> <dependency> <groupId>org.slf4j</groupId> <artifactId>slf4j-log4j12</artifactId> <version>1.7.25</version> </dependency> </dependencies> <build> <plugins> <!-- 编译插件 --> <plugin> <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-compiler-plugin</artifactId> <version>3.6.0</version> <configuration> <source>1.8</source> <target>1.8</target> <encoding>UTF-8</encoding> </configuration> </plugin> <!-- scala编译插件 --> <plugin> <groupId>net.alchim31.maven</groupId> <artifactId>scala-maven-plugin</artifactId> <version>3.1.6</version> <configuration> <scalaCompatVersion>2.11</scalaCompatVersion> <scalaVersion>2.11.12</scalaVersion> <encoding>UTF-8</encoding> </configuration> <executions> <execution> <id>compile-scala</id> <phase>compile</phase> <goals> <goal>add-source</goal> <goal>compile</goal> </goals> </execution> <execution> <id>test-compile-scala</id> <phase>test-compile</phase> <goals> <goal>add-source</goal> <goal>testCompile</goal> </goals> </execution> </executions> </plugin> <!-- 打jar包插件(会包含所有依赖) --> <plugin> <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-shade-plugin</artifactId> <version>2.3</version> <executions> <execution> <phase>package</phase> <goals> <goal>shade</goal> </goals> <configuration> <filters> <filter> <artifact>*:*</artifact> <excludes> <exclude>META-INF/*.SF</exclude> <exclude>META-INF/*.DSA</exclude> <exclude>META-INF/*.RSA</exclude> </excludes> </filter> </filters> <transformers> <transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer"> </transformer> <transformer implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer"/> </transformers> </configuration> </execution> </executions> </plugin> <!-- <plugin>--> <!-- <groupId>org.apache.maven.plugins</groupId>--> <!-- <artifactId>maven-assembly-plugin</artifactId>--> <!-- <version>2.6</version>--> <!-- <configuration>--> <!-- <descriptorRefs>--> <!-- <descriptorRef>jar-with-dependencies</descriptorRef>--> <!-- </descriptorRefs>--> <!-- <archive>--> <!-- <manifest>--> <!-- <!– 可以设置jar包的入口类(可选) –>--> <!-- <mainClass>test.SqlJoinKafka10Test</mainClass>--> <!-- </manifest>--> <!-- </archive>--> <!-- </configuration>--> <!-- <executions>--> <!-- <execution>--> <!-- <id>make-assembly</id>--> <!-- <phase>package</phase>--> <!-- <goals>--> <!-- <goal>single</goal>--> <!-- </goals>--> <!-- </execution>--> <!-- </executions>--> <!-- </plugin>--> </plugins> </build> </project>
3、案例代码
package test import java.util.concurrent.TimeUnit import org.apache.flink.api.common.restartstrategy.RestartStrategies import org.apache.flink.api.common.time.Time import org.apache.flink.runtime.state.filesystem.FsStateBackend import org.apache.flink.streaming.api.CheckpointingMode import org.apache.flink.streaming.api.environment.CheckpointConfig import org.apache.flink.streaming.api.scala._ import org.apache.flink.table.api.scala._ import org.apache.flink.table.api.{DataTypes, EnvironmentSettings, Table} import org.apache.flink.table.descriptors._ import org.apache.flink.types.Row /** * @program: demo * @description: 从kafka0.10读取数据,sink到es或者kafka * @author: yang * @create: 2021-01-15 11:48 */ object SqlJoinKafka10Test { def main(args: Array[String]): Unit = { val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment env.setParallelism(1) //1.1checkpoint设置 // env.setStateBackend(new FsStateBackend("hdfs://uat-datacenter1:8020/flink/kafka10/checkpoints")) // env.enableCheckpointing(5000) // env.getCheckpointConfig.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE) // env.getCheckpointConfig.setMaxConcurrentCheckpoints(2) // env.setRestartStrategy(RestartStrategies.fixedDelayRestart(3, Time.of(10, TimeUnit.SECONDS))) // env.getCheckpointConfig.enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION) val settings: EnvironmentSettings = EnvironmentSettings.newInstance().useBlinkPlanner().inStreamingMode().build() val tableEnv: StreamTableEnvironment = StreamTableEnvironment.create(env, settings) //输入表1 user_info_table tableEnv.connect(new Kafka() .version("0.10") .topic("Demo20") .property("bootstrap.servers","uat-datacenter1:9092") .property("zookeeper.connect","uat-datacenter1:2181") // .property("group.id","test") ) .withFormat(new Csv()) .withSchema(new Schema() .field("id",DataTypes.STRING()) .field("ts",DataTypes.BIGINT()) .field("temperature",DataTypes.DOUBLE()) ).createTemporaryTable("user_info_table") //输入表2 user_table tableEnv.connect(new Kafka() .version("0.10") .topic("Demo21") .property("bootstrap.servers","uat-datacenter1:9092") .property("zookeeper.connect","uat-datacenter1:2181") // .property("group.id","test") ) .withFormat(new Csv()) .withSchema(new Schema() .field("id",DataTypes.STRING()) .field("name",DataTypes.STRING()) .field("age",DataTypes.BIGINT()) ).createTemporaryTable("user_table") //1.10版本目前支持 join / inner join /left join val sqlResult: Table = tableEnv.sqlQuery( """ |select ut.id as utId ,ut.name,ut.age,uit.id as uitId ,uit.ts,uit.temperature |from user_table as ut inner join user_info_table as uit |on ut.id = uit.id """.stripMargin) //输出表 kafka tableEnv.connect(new Kafka() .version("0.10") .topic("Demo22") .property("bootstrap.servers","uat-datacenter1:9092") .property("zookeeper.connect","uat-datacenter1:2181") // .property("group.id","test") ) .withFormat(new Csv()) .withSchema(new Schema() .field("utId",DataTypes.STRING()) .field("name",DataTypes.STRING()) .field("age",DataTypes.BIGINT()) .field("uitId",DataTypes.STRING()) .field("ts",DataTypes.BIGINT()) .field("temperature",DataTypes.DOUBLE()) ).createTemporaryTable("user_information") //输出表 es,不需要创建隐射 tableEnv.connect(new Elasticsearch() .version("6") .host("uat-datacenter1",9200,"http") .index("sql_sensor") .documentType("_doc") .keyNullLiteral("null") .failureHandlerIgnore() .disableFlushOnCheckpoint() .bulkFlushMaxActions(1) ).inUpsertMode() .withFormat(new Json()) .withSchema(new Schema() .field("utId",DataTypes.STRING()) .field("name",DataTypes.STRING()) .field("age",DataTypes.BIGINT()) .field("uitId",DataTypes.STRING()) .field("ts",DataTypes.BIGINT()) .field("temperature",DataTypes.DOUBLE()) ).createTemporaryTable("user_es_information") //控制台打印 sqlResult.toAppendStream[Row].print("sqlResult") //输出至kafka sqlResult.insertInto("user_information") //输出至es sqlResult.insertInto("user_es_information") env.execute("kafka Source and Sink") } }
本文来自博客园,作者:小白啊小白,Fighting,转载请注明原文链接:https://www.cnblogs.com/ywjfx/p/14303834.html