|NO.Z.00050|——————————|BigDataEnd|——|Hadoop&实时数仓.V30|——|项目.v30|需求三:数据处理&增量统计广告.V4|——|编程实现|

一、编程实现:每隔5秒统计最近1小时内广告的点击量$增量统计
package dw.dws

import modes.{AdClick, CountByProductAd}
import myutils.SourceKafka
import java.sql.Date
import java.text.SimpleDateFormat
import java.util.concurrent.TimeUnit

import com.alibaba.fastjson.JSON.parseObject
import com.alibaba.fastjson.{JSON, JSONArray, JSONObject}
import org.apache.flink.api.common.functions.AggregateFunction
import org.apache.flink.streaming.api.TimeCharacteristic
import org.apache.flink.streaming.api.scala._
import org.apache.flink.streaming.api.scala.function.WindowFunction
import org.apache.flink.streaming.api.windowing.time.Time
import org.apache.flink.streaming.api.windowing.windows.TimeWindow
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer
import org.apache.flink.util.Collector

object AdEventLog {

  def main(args: Array[String]): Unit = {
    val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
    import org.apache.flink.api.scala._

    env.setParallelism(1)
    env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime) //设置时间特征为事件时间

    //从kafka中获取数据(flume)
    val kafkaSource: FlinkKafkaConsumer[String] = new SourceKafka().getKafkaSource("eventlog")
    val eventLogStream: DataStream[String] = env.addSource(kafkaSource)

    /**
     * area:
     * uid:
     * product_id:
     * time:
     */
    val mapEventStream: DataStream[AdClick] = eventLogStream.map(x => {
      val jsonObj: JSONObject = parseObject(x)

      val attr: String = jsonObj.get("attr").toString
      val attrJson: JSONObject = parseObject(attr)
      val area: String = attrJson.get("area").toString
      val uid: String = attrJson.get("uid").toString

      val eventData: String = jsonObj.get("yanqi_event").toString
      val datas: JSONArray = JSON.parseArray(eventData)
      val list = new java.util.ArrayList[String]()

      datas.forEach(x => list.add(x.toString))

      var productId: String = null
      var timestamp: Long = 0L


      list.forEach(x => {
        val xJson: JSONObject = parseObject(x)
        if (xJson.get("name").toString.equals("ad")) {
          val jsonData: String = xJson.get("json").toString
          val jsonDatas = parseObject(jsonData)

          productId = jsonDatas.get("product_id").toString
          timestamp = TimeUnit.MILLISECONDS.toSeconds(xJson.get("time").toString.toLong)
        }
      })

      AdClick(area, uid, productId, timestamp)
    })

    val filtered: DataStream[AdClick] = mapEventStream.filter(x => x.productId != null)

    val result: DataStream[CountByProductAd] = filtered
      .assignAscendingTimestamps(x => x.timestamp)
      .keyBy(_.productId)
     .timeWindow(Time.seconds(20), Time.seconds(10))
      .aggregate(new AdAggFunc, new AdWindowFunc())

    result.print()

    env.execute()
  }

  class AdAggFunc() extends AggregateFunction[AdClick, Long, Long] {
    override def createAccumulator(): Long = 0L

    override def add(ad: AdClick, acc: Long): Long = acc + 1

    override def getResult(acc: Long): Long = acc

    override def merge(acc1: Long, acc2: Long): Long = acc1 + acc2
  }

  class AdWindowFunc() extends WindowFunction[Long, CountByProductAd, String, TimeWindow] {

    private def formatTs(ts: Long) = {
      val df = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss")
      df.format(new Date(ts))
    }
    override def apply(key: String,
                       window: TimeWindow,
                       input: Iterable[Long],
                       out: Collector[CountByProductAd]): Unit = {

      out.collect(CountByProductAd(formatTs(window.getEnd * 1000), key, input.iterator.next()))
    }

  }

}

 
 
 
 
 
 
 
 
 

Walter Savage Landor:strove with none,for none was worth my strife.Nature I loved and, next to Nature, Art:I warm'd both hands before the fire of life.It sinks, and I am ready to depart
                                                                                                                                                   ——W.S.Landor

 

 

posted on   yanqi_vip  阅读(20)  评论(0编辑  收藏  举报

相关博文:
阅读排行:
· 无需6万激活码!GitHub神秘组织3小时极速复刻Manus,手把手教你使用OpenManus搭建本
· Manus爆火,是硬核还是营销?
· 终于写完轮子一部分:tcp代理 了,记录一下
· 别再用vector<bool>了!Google高级工程师:这可能是STL最大的设计失误
· 单元测试从入门到精通
< 2025年3月 >
23 24 25 26 27 28 1
2 3 4 5 6 7 8
9 10 11 12 13 14 15
16 17 18 19 20 21 22
23 24 25 26 27 28 29
30 31 1 2 3 4 5

导航

统计

点击右上角即可分享
微信分享提示