自定义UDAF2(多进一出函数)

package SparkSQL.fun.registerfum
import org.apache.spark.SparkConf
import org.apache.spark.sql.expressions.{MutableAggregationBuffer, UserDefinedAggregateFunction}
import org.apache.spark.sql.types.{DataType, DataTypes, StructField, StructType}
import org.apache.spark.sql.{Dataset, Row, SparkSession}
object registerfun3 {
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setAppName("registfun2").setMaster("local[*]")
val session = SparkSession.builder().config(conf).getOrCreate()
import session.implicits._
val dataset: Dataset[student] = session.createDataset(Array(
student("zs", "c001", 21, "男"),
student("ls", "c001", 22, "女"),
student("ww", "c001", 23, "男"),
student("ml", "c002", 20, "女"),
student("zb", "c002", 23, "男")
))
dataset.createOrReplaceTempView("student")
session.udf.register("clz_age_max", new MyMax())
val frame = session.sql("select clz, clz_age_max(age) from student group by clz")
frame.show()
session.stop()
}
}
class MyMax extends UserDefinedAggregateFunction {
override def inputSchema: StructType = {
StructType(Array(
StructField("input", DataTypes.LongType)
))
}
override def bufferSchema: StructType = {
StructType(Array(
StructField("max", DataTypes.LongType)
))
}
override def dataType: DataType = DataTypes.LongType
override def deterministic: Boolean = true
override def initialize(buffer: MutableAggregationBuffer): Unit = {
buffer(0) = 0L
}
override def update(buffer: MutableAggregationBuffer, input: Row): Unit = {
var max = input.getAs[Long](0)
if (max > buffer.getLong(0)) {
buffer(0) = max
}
}
override def merge(buffer1: MutableAggregationBuffer, buffer2: Row): Unit = {
var max: Long = 0L
var a: Long = buffer1.getLong(0)
var b: Long = buffer2.getLong(0)
if (a > b) {
max = a
} else {
max = b
}
buffer1(0) = max
}
override def evaluate(buffer: Row): Any = {
buffer.getAs[Long](0)
}
}
posted @   jsqup  阅读(38)  评论(0编辑  收藏  举报
相关博文:
阅读排行:
· TypeScript + Deepseek 打造卜卦网站:技术与玄学的结合
· Manus的开源复刻OpenManus初探
· AI 智能体引爆开源社区「GitHub 热点速览」
· 三行代码完成国际化适配,妙~啊~
· .NET Core 中如何实现缓存的预热?
点击右上角即可分享
微信分享提示