Flink 流处理和批处理测试小代码
批处理:
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.java.DataSet;
import org.apache.flink.api.java.ExecutionEnvironment;
import org.apache.flink.api.java.operators.DataSource;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.util.Collector;
public class BatchJob {
public static void main(String[] args) throws Exception {
// set up the batch execution environment
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSource<String> stringDataSource = env.fromElements("ssssss yi ss", "ss jiu sss");
DataSet<Tuple2<String, Integer>> counts =
// 把每一行文本切割成二元组,每个二元组为: (word,1)
stringDataSource.flatMap(new Tokenizer())
// 根据二元组的第“0”位分组,然后对第“1”位求和
.groupBy(0)
.sum(1);
counts.print();
/*env.execute("Flink Batch Java API Skeleton");*/
}
public static class Tokenizer implements FlatMapFunction<String, Tuple2<String, Integer>> {
@Override
public void flatMap(String value, Collector<Tuple2<String, Integer>> out) {
// 统一大小写并把每一行切割为单词
String[] tokens = value.toLowerCase().split("\\W+");
// 消费二元组
for (String token : tokens) {
if (token.length() > 0) {
out.collect(new Tuple2<String, Integer>(token, 1));
}
}
}
}
}
流处理:
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.util.Collector;
public class StreamingJob {
public static void main(String[] args) throws Exception {
// 创建执行环境
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setStreamTimeCharacteristic(TimeCharacteristic.ProcessingTime);
// 设置socket数据源
DataStreamSource<String> source = env.socketTextStream("master1", 7777, "\n");
// 转化处理数据
DataStream<WordWithCount> dataStream = source.flatMap(new FlatMapFunction<String, WordWithCount>() {
@Override
public void flatMap(String line, Collector<WordWithCount> collector) throws Exception {
for (String word : line.split(" ")) {
collector.collect(new WordWithCount(word, 1));
}
}
}).keyBy("word")//以key分组统计
.timeWindow(Time.seconds(2),Time.seconds(2))//设置一个窗口函数,模拟数据流动
.sum("count");//计算时间窗口内的词语个数
// 输出数据到目的端
dataStream.print();
// 执行任务操作
env.execute("Flink Streaming Word Count By Java");
}
public static class WordWithCount{
public String word;
public int count;
public WordWithCount(){
}
public WordWithCount(String word, int count) {
this.word = word;
this.count = count;
}
@Override
public String toString() {
return "WordWithCount{" +
"word='" + word + '\'' +
", count=" + count +
'}';
}
}
}
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· Manus重磅发布:全球首款通用AI代理技术深度解析与实战指南
· 被坑几百块钱后,我竟然真的恢复了删除的微信聊天记录!
· 没有Manus邀请码?试试免邀请码的MGX或者开源的OpenManus吧
· 【自荐】一款简洁、开源的在线白板工具 Drawnix
· 园子的第一款AI主题卫衣上架——"HELLO! HOW CAN I ASSIST YOU TODAY