Flink AggregatingState 实例
AggregatingState介绍
- AggregatingState需要和AggregateFunction配合使用
- add()方法添加一个元素,触发AggregateFunction计算
- get()获取State的值
需求:计算每个设备10秒内的平均温度
import org.apache.flink.api.common.eventtime.SerializableTimestampAssigner;
import org.apache.flink.api.common.eventtime.WatermarkStrategy;
import org.apache.flink.api.common.functions.AggregateFunction;
import org.apache.flink.api.common.state.AggregatingState;
import org.apache.flink.api.common.state.AggregatingStateDescriptor;
import org.apache.flink.api.common.typeinfo.TypeHint;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.java.functions.KeySelector;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.api.java.tuple.Tuple3;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.KeyedProcessFunction;
import org.apache.flink.streaming.api.functions.source.SourceFunction;
import org.apache.flink.util.Collector;
import java.time.Duration;
import java.util.Random;
public class AggregatingStateTest {
public static void main(String[] args) throws Exception {
// 计算每个设备10s内温度的平均值
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
env.getConfig().setAutoWatermarkInterval(100l);
DataStreamSource<Tuple3<String, Integer, Long>> tuple3DataStreamSource = env.addSource(new SourceFunction<Tuple3<String, Integer, Long>>() {
boolean flag = true;
@Override
public void run(SourceContext<Tuple3<String, Integer, Long>> ctx) throws Exception {
String[] str = {"水阀1", "水阀2", "水阀3"};
while (flag) {
int i = new Random().nextInt(3);
// 温度
int temperature = new Random().nextInt(100);
Thread.sleep(1000l);
// 设备号、温度、事件时间
ctx.collect(new Tuple3<String, Integer, Long>(str[i], temperature, System.currentTimeMillis()));
}
}
@Override
public void cancel() {
flag = false;
}
});
tuple3DataStreamSource.assignTimestampsAndWatermarks(WatermarkStrategy.<Tuple3<String, Integer, Long>>forBoundedOutOfOrderness(Duration.ofSeconds(2))
.withTimestampAssigner(new SerializableTimestampAssigner<Tuple3<String, Integer, Long>>() {
@Override
public long extractTimestamp(Tuple3<String, Integer, Long> stringIntegerLongTuple3, long l) {
return stringIntegerLongTuple3.f2;
}
})).keyBy(new KeySelector<Tuple3<String, Integer, Long>, String>() {
@Override
public String getKey(Tuple3<String, Integer, Long> stringIntegerLongTuple3) throws Exception {
return stringIntegerLongTuple3.f0;
}
}).process(new KeyedProcessFunction<String, Tuple3<String, Integer, Long>, String>() {
Long interval = 10 * 1000l;
// <Integer, Double>这个类型是aggregatingState中的输入和输出类型
AggregatingState<Integer, Double> aggregatingState = null;
@Override
public void open(Configuration parameters) throws Exception {
@Override
public void open(Configuration parameters) throws Exception {
super.open(parameters);
// <Integer, Tuple2<Integer,Integer>, Double>这是输入,中间状态,输出类型。TypeInformation.of(new TypeHint<Tuple2<Integer,Integer>>(){})这个是aggregatingState存储的数据的类型
AggregatingStateDescriptor<Integer, Tuple2<Integer,Integer>, Double> aggregatingStateDescriptor =
new AggregatingStateDescriptor<Integer, Tuple2<Integer,Integer>, Double>("aggregatingState", new MyAggregate(), TypeInformation.of(new TypeHint<Tuple2<Integer,Integer>>(){}));
aggregatingState = getRuntimeContext().getAggregatingState(aggregatingStateDescriptor);
}
@Override
public void processElement(Tuple3<String, Integer, Long> value, Context ctx, Collector<String> out) throws Exception {
// 10s的起始的时间
Long start = ctx.timestamp() - (ctx.timestamp() % interval);
Long timerTimestamp = start + interval;
ctx.timerService().registerEventTimeTimer(timerTimestamp);
aggregatingState.add(value.f1);
}
@Override
public void onTimer(long timestamp, OnTimerContext ctx, Collector<String> out) throws Exception {
super.onTimer(timestamp, ctx, out);
Double aDouble = aggregatingState.get();
String str = "[" + ctx.getCurrentKey() + "] " + "十秒内的平均温度为:" + aDouble;
out.collect(str);
}
}).print();
env.execute("aggregatingState");
}
private static class MyAggregate implements AggregateFunction<Integer, Tuple2<Integer,Integer>, Double> {
@Override
public Tuple2<Integer, Integer> createAccumulator() {
// 初始化温度和次数
return new Tuple2<Integer, Integer>(0,0);
}
@Override
public Tuple2<Integer, Integer> add(Integer integer, Tuple2<Integer, Integer> integerIntegerTuple2) {
// 历史温度加上本次温度,次数加1
return new Tuple2<Integer, Integer>(integerIntegerTuple2.f0 + integer, integerIntegerTuple2.f1 +1);
}
@Override
public Double getResult(Tuple2<Integer, Integer> integerIntegerTuple2) {
return Double.valueOf(integerIntegerTuple2.f0 / integerIntegerTuple2.f1);
}
@Override
public Tuple2<Integer, Integer> merge(Tuple2<Integer, Integer> integerIntegerTuple2, Tuple2<Integer, Integer> acc1) {
return new Tuple2<Integer, Integer>(integerIntegerTuple2.f0 + acc1.f0, integerIntegerTuple2.f1 + acc1.f1);
}
}
}
标签:
Java Flink
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· DeepSeek 开源周回顾「GitHub 热点速览」
· 物流快递公司核心技术能力-地址解析分单基础技术分享
· .NET 10首个预览版发布:重大改进与新特性概览!
· AI与.NET技术实操系列(二):开始使用ML.NET
· .NET10 - 预览版1新功能体验(一)