Link Table API JAVA_STREAM_DEMO
<dependencies>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-api-java-bridge_2.11</artifactId>
<version>1.9.0</version>
<!--<scope>provided</scope>-->
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-planner_2.11</artifactId>
<version>1.9.0</version>
<!--<scope>provided</scope>-->
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-streaming-scala_2.11</artifactId>
<version>1.9.0</version>
<!--<scope>provided</scope>-->
</dependency>
</dependencies>
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.source.SourceFunction;
import org.apache.flink.table.api.EnvironmentSettings;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.java.StreamTableEnvironment;
/**
* FLink Java Stream Table API DEMO
*
* @author: create by maoxiangyi
* @version: v1.0
*/
public class WordCountSql_Stream {
public static void main(String[] args) throws Exception {
EnvironmentSettings fsSettings = EnvironmentSettings.newInstance().useOldPlanner().inStreamingMode().build();
StreamExecutionEnvironment fsEnv = StreamExecutionEnvironment.getExecutionEnvironment();
StreamTableEnvironment fsTableEnv = StreamTableEnvironment.create(fsEnv, fsSettings);
DataStreamSource<WC> myDataSource = fsEnv.addSource(new SourceFunction<WC>() {
@Override
public void run(SourceContext<WC> sourceContext) throws Exception {
while (true) {
Thread.sleep(1000);
sourceContext.collect(new WC("Hello", 1));
}
}
@Override
public void cancel() {
// nothing
}
});
fsTableEnv.registerDataStream("WordCount", myDataSource);
// run a SQL query on the Table and retrieve the result as a new Table
Table table = fsTableEnv.sqlQuery(
"SELECT word, SUM(frequency) as frequency FROM WordCount GROUP BY word");
// 使用flinkSQL处理实时数据当我们把表转化成流的时候,需要使用toAppendStream与toRetractStream这两个方法。
// 稍不注意可能直接选择了toAppendStream。
//追加模式:只有在动态Table仅通过INSERT更改修改时才能使用此模式,即它仅附加,并且以前发出的结果永远不会更新。
// 如果更新或删除操作使用追加模式会失败报错
//缩进模式:始终可以使用此模式。返回值是boolean类型。
// 它用true或false来标记数据的插入和撤回,返回true代表数据插入,false代表数据的撤回
// 按照官网的理解如果数据只是不断添加,可以使用追加模式,
// 其余方式则不可以使用追加模式,而缩进模式侧可以适用于更新,删除等场景
// https://blog.csdn.net/aa518189/article/details/87816139
DataStream<Tuple2<Boolean, WC>> stream = fsTableEnv.toRetractStream(table, WC.class);
stream.print();
fsTableEnv.execute("flink stream table job");
}
}
10> (true,word='Hello', frequency=1)
10> (false,word='Hello', frequency=1)
10> (true,word='Hello', frequency=2)
10> (false,word='Hello', frequency=2)
10> (true,word='Hello', frequency=3)
10> (false,word='Hello', frequency=3)
10> (true,word='Hello', frequency=4)
10> (false,word='Hello', frequency=4)
10> (true,word='Hello', frequency=5)
10> (false,word='Hello', frequency=5)
10> (true,word='Hello', frequency=6)
10> (false,word='Hello', frequency=6)
10> (true,word='Hello', frequency=7)
个人网站:shuoyizui.com
公众号:写个框架玩
近期在公众号会发布一系列文章,主要是想完成一个简化的MapReduce框架的编写。实现Mapreduce编程模型、任务远程提交、任务分配、任务执行等功能。设计到了动态代理、反射、网络通信、序列化、消息队列、netty、自定义类加载器、多线程、shell等技术点。