window函数 resetAccumulator
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.table.runtime.aggregate
import java.lang.Iterable
import org.apache.flink.api.common.functions.{MapPartitionFunction, RichGroupReduceFunction}
import org.apache.flink.configuration.Configuration
import org.apache.flink.table.codegen.{Compiler, GeneratedAggregationsFunction}
import org.apache.flink.table.util.Logging
import org.apache.flink.types.Row
import org.apache.flink.util.Collector
/**
* [[RichGroupReduceFunction]] and [[MapPartitionFunction]] to compute aggregates that do
* not support pre-aggregation for batch(DataSet) queries.
*
* @param genAggregations Code-generated [[GeneratedAggregations]]
*/
class DataSetAggFunction(
private val genAggregations: GeneratedAggregationsFunction)
extends RichGroupReduceFunction[Row, Row]
with MapPartitionFunction[Row, Row]
with Compiler[GeneratedAggregations] with Logging {
private var output: Row = _
private var accumulators: Row = _
private var function: GeneratedAggregations = _
override def open(config: Configuration) {
LOG.debug(s"Compiling AggregateHelper: $genAggregations.name \n\n " +
s"Code:\n$genAggregations.code")
val clazz = compile(
getRuntimeContext.getUserCodeClassLoader,
genAggregations.name,
genAggregations.code)
LOG.debug("Instantiating AggregateHelper.")
function = clazz.newInstance()
output = function.createOutputRow()
accumulators = function.createAccumulators()
}
/**
* Computes a non-pre-aggregated aggregation.
*
* @param records An iterator over all records of the group.
* @param out The collector to hand results to.
*/
override def reduce(records: Iterable[Row], out: Collector[Row]): Unit = {
// reset accumulators
function.resetAccumulator(accumulators)
val iterator = records.iterator()
var record: Row = null
while (iterator.hasNext) {
record = iterator.next()
// accumulate
function.accumulate(accumulators, record)
}
// set group keys value to final output
function.setForwardedFields(record, output)
// set agg results to output
function.setAggregationResults(accumulators, output)
out.collect(output)
}
/**
* Computes a non-pre-aggregated aggregation and returns a row even if the input is empty.
*
* @param records An iterator over all records of the partition.
* @param out The collector to hand results to.
*/
override def mapPartition(records: Iterable[Row], out: Collector[Row]): Unit = {
reduce(records, out)
}
}
、、、、、、、、、、、、、、、、、、、、、、、、、、、、、、、、、、、、、、、、、、、、、、、、、、、、、、、、、、、、、
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.table.runtime.aggregate
import java.lang.Iterable
import org.apache.flink.api.common.functions.RichGroupReduceFunction
import org.apache.flink.configuration.Configuration
import org.apache.flink.table.codegen.{Compiler, GeneratedAggregationsFunction}
import org.apache.flink.table.util.Logging
import org.apache.flink.types.Row
import org.apache.flink.util.Collector
/**
* It wraps the aggregate logic inside of
* [[org.apache.flink.api.java.operators.GroupReduceOperator]]. It is used for tumbling time-window
* on batch.
*
* @param genAggregations Code-generated [[GeneratedAggregations]]
* @param windowSize Tumbling time window size
* @param windowStartPos The relative window-start field position to the last field of output row
* @param windowEndPos The relative window-end field position to the last field of output row
* @param windowRowtimePos The relative window-rowtime field position to the last field of
* output row
* @param keysAndAggregatesArity The total arity of keys and aggregates
*/
class DataSetTumbleTimeWindowAggReduceGroupFunction(
genAggregations: GeneratedAggregationsFunction,
windowSize: Long,
windowStartPos: Option[Int],
windowEndPos: Option[Int],
windowRowtimePos: Option[Int],
keysAndAggregatesArity: Int)
extends RichGroupReduceFunction[Row, Row]
with Compiler[GeneratedAggregations]
with Logging {
private var collector: DataSetTimeWindowPropertyCollector = _
protected var aggregateBuffer: Row = new Row(keysAndAggregatesArity + 1)
private var output: Row = _
protected var accumulators: Row = _
protected var function: GeneratedAggregations = _
override def open(config: Configuration) {
LOG.debug(s"Compiling AggregateHelper: $genAggregations.name \n\n " +
s"Code:\n$genAggregations.code")
val clazz = compile(
getRuntimeContext.getUserCodeClassLoader,
genAggregations.name,
genAggregations.code)
LOG.debug("Instantiating AggregateHelper.")
function = clazz.newInstance()
output = function.createOutputRow()
accumulators = function.createAccumulators()
collector = new DataSetTimeWindowPropertyCollector(
windowStartPos,
windowEndPos,
windowRowtimePos)
}
override def reduce(records: Iterable[Row], out: Collector[Row]): Unit = {
var last: Row = null
val iterator = records.iterator()
// reset accumulator
function.resetAccumulator(accumulators)
while (iterator.hasNext) {
val record = iterator.next()
function.mergeAccumulatorsPair(accumulators, record)
last = record
}
// set group keys value to final output.
function.setForwardedFields(last, output)
// get final aggregate value and set to output.
function.setAggregationResults(accumulators, output)
// get window start timestamp
val startTs: Long = last.getField(keysAndAggregatesArity).asInstanceOf[Long]
// set collector and window
collector.wrappedCollector = out
collector.windowStart = startTs
collector.windowEnd = startTs + windowSize
collector.collect(output)
}
}
分类:
Flink源码分析
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 如何编写易于单元测试的代码
· 10年+ .NET Coder 心语,封装的思维:从隐藏、稳定开始理解其本质意义
· .NET Core 中如何实现缓存的预热?
· 从 HTTP 原因短语缺失研究 HTTP/2 和 HTTP/3 的设计差异
· AI与.NET技术实操系列:向量存储与相似性搜索在 .NET 中的实现
· 周边上新:园子的第一款马克杯温暖上架
· Open-Sora 2.0 重磅开源!
· .NET周刊【3月第1期 2025-03-02】
· 分享 3 个 .NET 开源的文件压缩处理库,助力快速实现文件压缩解压功能!
· Ollama——大语言模型本地部署的极速利器