Flink入门WorldCount1
1.1 pom文件
scala版本,flink版本等等结合实际情况
<properties>
<maven.compiler.source>8</maven.compiler.source>
<maven.compiler.target>8</maven.compiler.target>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<flink.version>1.12.2</flink.version>
<scala.binary.version>2.11</scala.binary.version>
<scala.version>2.11.12</scala.version>
<slf4j.version>1.7.30</slf4j.version>
</properties>
<dependencies>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-java</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-streaming-java_${scala.binary.version}</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-clients_${scala.binary.version}</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-scala_${scala.binary.version}</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-streaming-scala_${scala.binary.version}</artifactId>
<version>${flink.version}</version>
</dependency>
</dependencies>
1.2 代码实现
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setRuntimeMode(RuntimeExecutionMode.BATCH);
//数据接入 source
DataStream<String> sourceDs = env.fromElements("test demo", "hello gc well", "gc test");
//数据转换 下列写法效果相同
//写法一
/*
DataStream<String> processDs = sourceDs.flatMap(new FlatMapFunction<String, String>() {
@Override
public void flatMap(String lines, Collector<String> collector) throws Exception {
for (String word : lines.split("\\s+")) {
collector.collect(word);
}
}
});
*/
//写法二 Lambda表达式 需要指定返回类型 (泛型丢失)
DataStream<String> processDs = sourceDs.flatMap((String lines, Collector<String> collector) ->
Arrays.stream(lines.split("\\s+")).forEach(collector::collect)
).returns(Types.STRING);
DataStream<Tuple2<String, Integer>> outDs = processDs.map(x -> Tuple2.of(x, 1))
.returns(Types.TUPLE(Types.STRING, Types.INT))
.keyBy(x -> x.f0)
.sum(1);
//输出到控制台 下列都可以
outDs.addSink(new PrintSinkFunction<>());
// outDs.print();
//调用execute才会真正执行
env.execute();
}