window
- 画图理解

- 说明
countByWindow 对每个滑动窗口的数据执行count操作
reduceByWindow 对每个滑动窗口的数据执行reduce操作
reduceByKeyAndWindow 对每个滑动窗口的数据执行reduceByKey操作
countByValueAndWindow 对每个滑动窗口的数据执行countByValue操作
都需要传入两个核心参数
windowDuration: Duration, 窗口时间长度--一般是batchSize(批次时间)的整数倍
slideDuration: Duration: 滑动时间长度----一般是batchSize(批次时间)的整数倍
- 案例
| package SparkStreaming.trans |
| |
| import org.apache.spark.SparkConf |
| import org.apache.spark.storage.StorageLevel |
| import org.apache.spark.streaming.{Milliseconds, Seconds, StreamingContext} |
| import org.apache.spark.streaming.dstream.DStream |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| object ByWindow { |
| def main(args: Array[String]): Unit = { |
| val conf = new SparkConf().setMaster("local[3]").setAppName("transform3") |
| val ssc = new StreamingContext(conf, Milliseconds(10000)) |
| |
| val ds: DStream[String] = ssc.socketTextStream("node1", 44444, StorageLevel.MEMORY_ONLY) |
| |
| val ds1 = ds.window(Seconds(10), Seconds(10)) |
| ds1.print() |
| |
| ssc.start() |
| ssc.awaitTermination() |
| } |
| } |
| package SparkStreaming.trans |
| |
| import org.apache.spark.SparkConf |
| import org.apache.spark.streaming.dstream.DStream |
| import org.apache.spark.streaming.{Milliseconds, Minutes, Seconds, StreamingContext} |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| object ByWindow2 { |
| def main(args: Array[String]): Unit = { |
| val sparkConf = new SparkConf().setAppName("state02").setMaster("local[2]") |
| val ssc = new StreamingContext(sparkConf,Milliseconds(10000)) |
| ssc.checkpoint("hdfs://node1:9000/sparkstreaming") |
| |
| val ds:DStream[String] = ssc.socketTextStream("node1", 44444) |
| val ds1 = ds.map((_, 1)) |
| val ds2 = ds1.reduceByKeyAndWindow((a: Int, b: Int)=>(a+b), Seconds(10), Seconds(10)) |
| ds2.print() |
| println(",,,,,,,,,,") |
| val ds3: DStream[Long] = ds1.countByWindow(Seconds(10), Seconds(10)) |
| ds3.print() |
| println(",,,,,,,,,,") |
| val ds4 = ds1.reduceByWindow((a, b) => (a._1+b._1, 0), Seconds(10), Seconds(10)) |
| ds4.print() |
| println(",,,,,,,,,,") |
| |
| val ds5 = ds1.countByValueAndWindow(Seconds(10), Seconds(10)) |
| ds5.print() |
| ssc.start() |
| ssc.awaitTermination() |
| } |
| } |
应用场景:黑名单
| package SparkStreaming.trans |
| |
| import org.apache.spark.SparkConf |
| import org.apache.spark.streaming.dstream.DStream |
| import org.apache.spark.streaming.{Milliseconds, Minutes, Seconds, StreamingContext} |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| object BlackUser { |
| def main(args: Array[String]): Unit = { |
| val sparkConf = new SparkConf().setAppName("state01").setMaster("local[2]") |
| val ssc = new StreamingContext(sparkConf,Milliseconds(3000)) |
| val ds:DStream[String] = ssc.socketTextStream("node1", 44444) |
| |
| val ds1:DStream[String] = ds.window(Minutes(1),Minutes(1)) |
| val ds2:DStream[(String,Int)] = ds1.map((_, 1)).reduceByKey(_ + _) |
| |
| val ds3:DStream[(String,Int)] = ds2.filter(tuple=>{ |
| if(tuple._2>=10){ |
| true |
| }else{ |
| false |
| } |
| }) |
| ds3.print() |
| ssc.start() |
| ssc.awaitTermination() |
| } |
| } |
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· TypeScript + Deepseek 打造卜卦网站:技术与玄学的结合
· Manus的开源复刻OpenManus初探
· AI 智能体引爆开源社区「GitHub 热点速览」
· 三行代码完成国际化适配,妙~啊~
· .NET Core 中如何实现缓存的预热?