- HighWordCountAccumulator.scala
| package accumulator |
| |
| import org.apache.spark.util.AccumulatorV2 |
| |
| import scala.collection.mutable |
| |
| |
| |
| |
| |
| |
| class HighWordCountAccumulator extends AccumulatorV2[Array[String], collection.mutable.Map[String, Long]] { |
| |
| var wordCountMap = collection.mutable.Map[String, Long]() |
| |
| |
| override def isZero: Boolean = { |
| wordCountMap.isEmpty |
| } |
| |
| override def copy(): AccumulatorV2[Array[String], mutable.Map[String, Long]] = { |
| val wordCountAccumulator = new HighWordCountAccumulator() |
| wordCountAccumulator.wordCountMap = wordCountMap |
| wordCountAccumulator |
| } |
| |
| override def reset(): Unit = { |
| wordCountMap = collection.mutable.Map[String, Long]() |
| } |
| |
| override def add(v: Array[String]): Unit = { |
| for (word <- v) { |
| val flag = wordCountMap.contains(word) |
| if (flag) { |
| wordCountMap.update(word, wordCountMap.getOrElse(word, 0L) + 1L) |
| } else { |
| wordCountMap.put(word, 1L) |
| } |
| } |
| } |
| |
| override def merge(other: AccumulatorV2[Array[String], mutable.Map[String, Long]]): Unit = { |
| val res = other.value |
| for (elem <- res) { |
| val word = elem._1 |
| val count = elem._2 |
| val flag = wordCountMap.contains(word) |
| if (flag) { |
| wordCountMap.update(word, wordCountMap.getOrElse(word, 0L)+count) |
| } else { |
| wordCountMap.put(word, count) |
| } |
| } |
| } |
| |
| override def value: mutable.Map[String, Long] = { |
| wordCountMap |
| } |
| } |
| package accumulator |
| |
| import org.apache.spark.rdd.RDD |
| import org.apache.spark.{SparkConf, SparkContext} |
| |
| object HighAccCode { |
| def main(args: Array[String]): Unit = { |
| val sparkConf = new SparkConf().setMaster("local[2]").setAppName("accumulator") |
| val sc = new SparkContext(sparkConf) |
| val rdd: RDD[String] = sc.textFile("hdfs://node1:9000/wc.txt") |
| |
| val hwca = new HighWordCountAccumulator() |
| sc.register(hwca) |
| |
| val value = rdd.flatMap((line: String) => { |
| val wordArrays = line.split(" ") |
| hwca.add(wordArrays) |
| wordArrays |
| }) |
| value.collect() |
| |
| println(hwca.wordCountMap) |
| sc.stop() |
| } |
| } |
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· TypeScript + Deepseek 打造卜卦网站:技术与玄学的结合
· Manus的开源复刻OpenManus初探
· AI 智能体引爆开源社区「GitHub 热点速览」
· 三行代码完成国际化适配,妙~啊~
· .NET Core 中如何实现缓存的预热?