注:
本内容依赖相关参考资料以及本人理解,不保证100%正确,仅供参考
欢迎大家指出错误之处,一同交流、学习。
----------------------------------------------------------------------------------------------------------------
建议大家先看一下:https://www.cnblogs.com/bigdatacaoyu/p/10925404.html
我的另一篇博文,说明如何在命令行下实现单行代码执行WordsCount
本篇文章就是对那篇文章的原理封装成了Actor,针对多线程轻度修改了一两个函数
代码如下:
package com.caoyu.actor.wordcount import scala.actors.{Actor, Future} import scala.collection.mutable import scala.io.Source class Task extends Actor{ override def act(): Unit = { loop{ // A loop for recycle react{ // partial function case MapTaskSummit(filePath) => { // If this is a map task for running. println(Thread.currentThread().getName + ", map actor has created......") val fileSource = Source.fromFile(filePath) val lines = fileSource.getLines().toList fileSource.close() // Transform source content to a list object. And source can be closed. // All words in a single list. val worldsList = lines.map(_.split(" ")).flatten // Map will get: List((word, 1), (word, 1)......) // groupBy will get:Map(word->List((word, 1), (word, 1))) // So the value in the map its the that word's size of occurrences number val wordWithOccurrencesNumberMap = worldsList.map((_, 1)).groupBy(_._1) // So next, we will sum it to get the result like this: (word->count) // Because we will use char of '_' in tow times. So we need to use function to make sure '_' can appear two times. val mapResult = wordWithOccurrencesNumberMap.map(x=>(x._1, x._2.size)) // Return result to the sender sender ! MapTaskResult(mapResult) } case MapTaskShutdown => { // If this case for shutdown the map task actor. println(Thread.currentThread().getName + ", this actor will be shutdown in time......") exit() } } } } } // Case class with a single parameter of file path string for summit a map task to running. case class MapTaskSummit(filePath: String) // Case object for shutdown the map task actor. case object MapTaskShutdown // Case class for encapsulation the map task result. case class MapTaskResult(mapResult: Map[String, Int]) object ActorWordsCount extends App{ val files = Array("D:\\tmp\\words.txt", "D:\\tmp\\words-2.txt") val futures = new mutable.HashSet[Future[Any]] val results = new mutable.HashSet[MapTaskResult]() val createdActors = new mutable.HashSet[Actor]() //Starting actor by asynchronism and send file path to them. //Number of started actor depend on how many of files that need be reading. for(filePath <- files){ val actor = new Task createdActors += actor //Get map task result with asynchronism calling. val future = actor.start() !! MapTaskSummit(filePath) futures += future } while (futures.nonEmpty){ // Ignore the results that not finished. val nonEmptyFutures = futures.filter(_.isSet) for(r <- nonEmptyFutures){ // Maybe nonEmptyFutures is empty.Cause result is not available in it currently. //If we into this loop we can know some task has finished and returned result here. // r is a future object.And apply function is a way to get content.But you'll get a Any type. // So you need transform it to MapTaskResult by using asInstanceOf function. val mapResult: MapTaskResult = r.apply().asInstanceOf[MapTaskResult] results += mapResult //Cause the conditions for while loop is futures not empty. //So we need remove the future that already be get result to make sure the while can be exit futures -= r } Thread.sleep(10) } // When all map task has finished. We processing the map result to get we want. // println(results.flatMap(_.mapResult).groupBy(_._1)) val finallyResult = results.flatMap(_.mapResult).groupBy(_._1).mapValues(_.foldLeft(0)(_+_._2)) .toList.sortBy(_._2).reverse println(finallyResult) // When work done. Don't forget to send shutdown message to actor that you created. for(actor <- createdActors){ actor ! MapTaskShutdown } println("All work has finished......") }
讲解:
val finallyResult = results.flatMap(_.mapResult).groupBy(_._1).mapValues(_.foldLeft(0)(_+_._2)) .toList.sortBy(_._2).reverse
为什么在汇总的时候还用到了groupBy 以及最后mapValues的时候为什么是foldLeft 而不是原本单行代码wordscount中的size呢。
首先
因为我们是两个文件,最后汇总到results中。所以results经过flatMap(_.mapResult)后得到的是:
List((beautiful,1), (down,2), (byond,1), (stairs,1), (people,1), (choosed,1), (this,3), (in,2), (switch,1), (your,3), (heard,1), (daughter,1), (off,1), (worry,1), (are,6), (is,11), (oh,3), (excuses,1), (honey,1), (am,3), (ringing,1), (want,1), (sure,1), (what,2), (noise,1), (front,1), (someone's,1), (christmas,1), (happen,1), (big,1), (so,5), (keep,1), (minute,1), (do,3), (scored,1), (saw,1), (us,1), (it,5), (watch,2), (a,5), (movie,1), (office,1), (night,1), (inconsiderate,1), (thing,1), (she,2), (okay,1), (nice,1), (such,1), (trying,1), (finish,1), (way,2), (last,2), (don't,3), (i,12), (56th,1), (that,4), (hear,2), (to,6), (you,14), (did,2), (keeping,1), (goodbye,1), (here,2), (talking,1), (was,1), (there,1), (70th,1), (at,1), (sir,1), (can,2), (on,2), (mind,1), (how,1), (my,2), (party,1), (take,1), (embarrassing,1), (floor,2), (me,1), (rocks,1), (by,1), (think,1), (new,1), (will,2), (much,1), (scala,1), (not,2), (with,1), (from,3), (money,1), (forget,1), (beijing,1), (news,1), (football,2), (skills,1), (file,1), (which,1), (sorry,1), (street,1), (get,1), (fine,2), (more,1), (haha,1), (hello,1), (catch,1), (about,3), (wife,1), (game,1), (thank,1), (morning,2), (no,3), (very,1), (we,1), (some,2), (when,1), (deal,1), (learning,1), (college,1), (like,2), (of,1), (and,3), (right,1), (phone,2), (marking,1), (the,7), (those,1), (beautiful,1), (down,4), (byond,1), (stairs,1), (people,1), (choosed,1), (bye,1), (this,3), (in,3), (switch,1), (your,3), (heard,1), (daughter,1), (off,1), (point,1), (worry,1), (are,6), (is,11), (oh,3), (why,1), (wait,1), (excuses,1), (honey,1), (try,1), (got,1), (am,3), (ringing,1), (want,2), (but,1), (sure,1), (what,2), (noise,1), (front,1), (line,1), (if,1), (someone's,1), (christmas,1), (happen,1), (saying,1), (big,1), (so,6), (keep,1), (minute,1), (do,4), (all,5), (scored,1), (crash,1), (just,1), (saw,1), (us,1), (it,6), (watch,2), (a,5), (movie,1), (office,1), (night,1), (bad,1), (inconsiderate,1), (thing,2), (she,2), (okay,1), (nice,1), (such,1), (trying,1), (finish,1), (way,2), (last,2), (don't,4), (i,19), (56th,1), (that,4), (hear,2), (to,10), (you,22), (know,1), (did,2), (falls,2), (keeping,1), (goodbye,2), (here,2), (talking,1), (was,1), (there,1), (70th,1), (at,1), (been,1), (sir,1), (can,2), (on,2), (cloud,1), (mind,1), (how,3), (my,3), (party,1), (take,3), (embarrassing,1), (floor,2), (me,1), (rocks,1), (by,1), (think,1), (new,1), (will,3), (much,1), (cause,1), (scala,1), (not,3), (with,2), (from,3), (still,1), (means,1), (what's,1), (money,1), (forget,1), (beijing,1), (joke,1), (news,1), (football,2), (skills,1), (file,1), (which,1), (say,3), (sorry,1), (be,1), (street,1), (get,1), (fine,3), (more,1), (haha,1), (hello,1), (persional,1), (catch,1), (about,3), (wife,1), (game,1), (thank,1), (morning,3), (call,1), (no,3), (very,1), (we,2), (some,2), (when,3), (deal,1), (learning,1), (college,1), (like,2), (of,1), (and,6), (right,1), (phone,2), (marking,1), (the,12), (those,1))
其中如图:
因为两个文件都有 you这个单词,一个文件14次 一个文件22次
所以需要groupBy 把内容变成:
scala.collection.immutable.Map[String,List[(String, Int)]] = Map(you -> List((you,14), (you,22))..........
然后基于上面的结果进行汇总
对单个文件的时候我们是size汇总因为内容是比如:
Map(you -> List( (you,1), (you,1), (you,1), (you,1), (you,1) ....... )
所以得到value的size就得到you的次数
但是现在得到的是:
Map(you -> List((you,14), (you,22))
用size得到的就是2,所以要用foldLeft进行累加
如: mapValues(_.foldLeft(0)(_ + _._2))
详解:
这里出现了4个 _
首先,调用mapValues的数据是
Map(you -> List((you,14), (you,22))
那么 mapValues取的就是
List((you,14), (you,22))
对应第一个 _
后面的 _ + _._2 的意思是
对 _._2 进行累加 初始值是0
也就是第一次执行是 0 + _._2
就是说 第二个_ 第一次 执行的时候 的意义就是0,我们设置的初始值0
但是后面每次的意义就是上一次的结果
第3和第4个_是指:
第三个_ 就是指的
List((you,14), (you,22)
第四个_ 的意义就是: 因为第三个是
List((you,14), (you,22) 所以第四个就是 14(或者22,看执行的第几次)
所以执行完的结果就是:
Map(you -> 14 + 22=36)
可能有点晕。大家可以去看看foldLeft 方法的说明就能明白了 主要是foldLeft这里比较晕人。
参考,在命令行上的模拟:
scala> import scala.io.Source import scala.io.Source scala> val lines1 = Source.fromFile("D:\\tmp\\words.txt").getLines.toList lines1: List[String] = List(hello scala, and i want to get some more money, and i do like to learning some new skills, haha so what are you from, i am from beijing and what you from, so which way you will choosed when you are finish your college, how are you on this fine morning, i am fine thank you, did you catch the news this morning, i heard there was a file a byond street, no i don't hear about it, do you happen to watch the football game last night, the rocks scored in last minute, no i don't like football, oh by the way i saw you with your daughter at office christmas party she is very beautiful, she is my wife oh here is my floor nice talking to you goodbye, sir this is the 56th floor we are on the 70th, that is okay i will take the stairs, those people in front of us are marking... scala> val lines2 = Source.fromFile("D:\\tmp\\words-2.txt").getLines.toList lines2: List[String] = List(hello scala, and i want to get some more money, and i do like to learning some new skills, haha so what are you from, i am from beijing and what you from, so which way you will choosed when you are finish your college, how are you on this fine morning, i am fine thank you, did you catch the news this morning, i heard there was a file a byond street, no i don't hear about it, do you happen to watch the football game last night, the rocks scored in last minute, no i don't like football, oh by the way i saw you with your daughter at office christmas party she is very beautiful, she is my wife oh here is my floor nice talking to you goodbye, sir this is the 56th floor we are on the 70th, that is okay i will take the stairs, those people in front of us are marking... scala> lines1.map(_.split(" ")).flatten.map((_, 1)).groupBy(_._1).mapValues(_.size) res0: scala.collection.immutable.Map[String,Int] = Map(beautiful -> 1, down -> 2, byond -> 1, stairs -> 1, people -> 1, choosed -> 1, this -> 3, in -> 2, switch -> 1, your -> 3, heard -> 1, daughter -> 1, off -> 1, worry -> 1, are -> 6, is -> 11, oh -> 3, excuses -> 1, honey -> 1, am -> 3, ringing -> 1, want -> 1, sure -> 1, what -> 2, noise -> 1, front -> 1, someone's -> 1, christmas -> 1, happen -> 1, big -> 1, so -> 5, keep -> 1, minute -> 1, do -> 3, scored -> 1, saw -> 1, us -> 1, it -> 5, watch -> 2, a -> 5, movie -> 1, office -> 1, night -> 1, inconsiderate -> 1, thing -> 1, she -> 2, okay -> 1, nice -> 1, such -> 1, trying -> 1, finish -> 1, way -> 2, last -> 2, don't -> 3, i -> 12, 56th -> 1, that -> 4, hear -> 2, to -> 6, you -> 14, did -> 2, keeping -> 1, goodbye -> 1, here -... scala> val mapResult1 = lines1.map(_.split(" ")).flatten.map((_, 1)).groupBy(_._1).mapValues(_.size) mapResult1: scala.collection.immutable.Map[String,Int] = Map(beautiful -> 1, down -> 2, byond -> 1, stairs -> 1, people -> 1, choosed -> 1, this -> 3, in -> 2, switch -> 1, your -> 3, heard -> 1, daughter -> 1, off -> 1, worry -> 1, are -> 6, is -> 11, oh -> 3, excuses -> 1, honey -> 1, am -> 3, ringing -> 1, want -> 1, sure -> 1, what -> 2, noise -> 1, front -> 1, someone's -> 1, christmas -> 1, happen -> 1, big -> 1, so -> 5, keep -> 1, minute -> 1, do -> 3, scored -> 1, saw -> 1, us -> 1, it -> 5, watch -> 2, a -> 5, movie -> 1, office -> 1, night -> 1, inconsiderate -> 1, thing -> 1, she -> 2, okay -> 1, nice -> 1, such -> 1, trying -> 1, finish -> 1, way -> 2, last -> 2, don't -> 3, i -> 12, 56th -> 1, that -> 4, hear -> 2, to -> 6, you -> 14, did -> 2, keeping -> 1, goodbye -> 1, ... scala> val mapResult2 = lines2.map(_.split(" ")).flatten.map((_, 1)).groupBy(_._1).mapValues(_.size) mapResult2: scala.collection.immutable.Map[String,Int] = Map(beautiful -> 1, down -> 4, byond -> 1, stairs -> 1, people -> 1, choosed -> 1, bye -> 1, this -> 3, in -> 3, switch -> 1, your -> 3, heard -> 1, daughter -> 1, off -> 1, point -> 1, worry -> 1, are -> 6, is -> 11, oh -> 3, why -> 1, wait -> 1, excuses -> 1, honey -> 1, try -> 1, got -> 1, am -> 3, ringing -> 1, want -> 2, but -> 1, sure -> 1, what -> 2, noise -> 1, front -> 1, line -> 1, if -> 1, someone's -> 1, christmas -> 1, happen -> 1, saying -> 1, big -> 1, so -> 6, keep -> 1, minute -> 1, do -> 4, all -> 5, scored -> 1, crash -> 1, just -> 1, saw -> 1, us -> 1, it -> 6, watch -> 2, a -> 5, movie -> 1, office -> 1, night -> 1, bad -> 1, inconsiderate -> 1, thing -> 2, she -> 2, okay -> 1, nice -> 1, such -> 1, trying -> ... scala> val results = List(mapResult1, mapResult2) results: List[scala.collection.immutable.Map[String,Int]] = List(Map(beautiful -> 1, down -> 2, byond -> 1, stairs -> 1, people -> 1, choosed -> 1, this -> 3, in -> 2, switch -> 1, your -> 3, heard -> 1, daughter -> 1, off -> 1, worry -> 1, are -> 6, is -> 11, oh -> 3, excuses -> 1, honey -> 1, am -> 3, ringing -> 1, want -> 1, sure -> 1, what -> 2, noise -> 1, front -> 1, someone's -> 1, christmas -> 1, happen -> 1, big -> 1, so -> 5, keep -> 1, minute -> 1, do -> 3, scored -> 1, saw -> 1, us -> 1, it -> 5, watch -> 2, a -> 5, movie -> 1, office -> 1, night -> 1, inconsiderate -> 1, thing -> 1, she -> 2, okay -> 1, nice -> 1, such -> 1, trying -> 1, finish -> 1, way -> 2, last -> 2, don't -> 3, i -> 12, 56th -> 1, that -> 4, hear -> 2, to -> 6, you -> 14, did -> 2, keeping -> 1, goodby... scala> results.flatten res1: List[(String, Int)] = List((beautiful,1), (down,2), (byond,1), (stairs,1), (people,1), (choosed,1), (this,3), (in,2), (switch,1), (your,3), (heard,1), (daughter,1), (off,1), (worry,1), (are,6), (is,11), (oh,3), (excuses,1), (honey,1), (am,3), (ringing,1), (want,1), (sure,1), (what,2), (noise,1), (front,1), (someone's,1), (christmas,1), (happen,1), (big,1), (so,5), (keep,1), (minute,1), (do,3), (scored,1), (saw,1), (us,1), (it,5), (watch,2), (a,5), (movie,1), (office,1), (night,1), (inconsiderate,1), (thing,1), (she,2), (okay,1), (nice,1), (such,1), (trying,1), (finish,1), (way,2), (last,2), (don't,3), (i,12), (56th,1), (that,4), (hear,2), (to,6), (you,14), (did,2), (keeping,1), (goodbye,1), (here,2), (talking,1), (was,1), (there,1), (70th,1), (at,1), (sir,1), (can,2), (on,2), (min... scala> println(results.flatten) List((beautiful,1), (down,2), (byond,1), (stairs,1), (people,1), (choosed,1), (this,3), (in,2), (switch,1), (your,3), (heard,1), (daughter,1), (off,1), (worry,1), (are,6), (is,11), (oh,3), (excuses,1), (honey,1), (am,3), (ringing,1), (want,1), (sure,1), (what,2), (noise,1), (front,1), (someone's,1), (christmas,1), (happen,1), (big,1), (so,5), (keep,1), (minute,1), (do,3), (scored,1), (saw,1), (us,1), (it,5), (watch,2), (a,5), (movie,1), (office,1), (night,1), (inconsiderate,1), (thing,1), (she,2), (okay,1), (nice,1), (such,1), (trying,1), (finish,1), (way,2), (last,2), (don't,3), (i,12), (56th,1), (that,4), (hear,2), (to,6), (you,14), (did,2), (keeping,1), (goodbye,1), (here,2), (talking,1), (was,1), (there,1), (70th,1), (at,1), (sir,1), (can,2), (on,2), (mind,1), (how,1), (my,2), (party,1), (take,1), (embarrassing,1), (floor,2), (me,1), (rocks,1), (by,1), (think,1), (new,1), (will,2), (much,1), (scala,1), (not,2), (with,1), (from,3), (money,1), (forget,1), (beijing,1), (news,1), (football,2), (skills,1), (file,1), (which,1), (sorry,1), (street,1), (get,1), (fine,2), (more,1), (haha,1), (hello,1), (catch,1), (about,3), (wife,1), (game,1), (thank,1), (morning,2), (no,3), (very,1), (we,1), (some,2), (when,1), (deal,1), (learning,1), (college,1), (like,2), (of,1), (and,3), (right,1), (phone,2), (marking,1), (the,7), (those,1), (beautiful,1), (down,4), (byond,1), (stairs,1), (people,1), (choosed,1), (bye,1), (this,3), (in,3), (switch,1), (your,3), (heard,1), (daughter,1), (off,1), (point,1), (worry,1), (are,6), (is,11), (oh,3), (why,1), (wait,1), (excuses,1), (honey,1), (try,1), (got,1), (am,3), (ringing,1), (want,2), (but,1), (sure,1), (what,2), (noise,1), (front,1), (line,1), (if,1), (someone's,1), (christmas,1), (happen,1), (saying,1), (big,1), (so,6), (keep,1), (minute,1), (do,4), (all,5), (scored,1), (crash,1), (just,1), (saw,1), (us,1), (it,6), (watch,2), (a,5), (movie,1), (office,1), (night,1), (bad,1), (inconsiderate,1), (thing,2), (she,2), (okay,1), (nice,1), (such,1), (trying,1), (finish,1), (way,2), (last,2), (don't,4), (i,19), (56th,1), (that,4), (hear,2), (to,10), (you,22), (know,1), (did,2), (falls,2), (keeping,1), (goodbye,2), (here,2), (talking,1), (was,1), (there,1), (70th,1), (at,1), (been,1), (sir,1), (can,2), (on,2), (cloud,1), (mind,1), (how,3), (my,3), (party,1), (take,3), (embarrassing,1), (floor,2), (me,1), (rocks,1), (by,1), (think,1), (new,1), (will,3), (much,1), (cause,1), (scala,1), (not,3), (with,2), (from,3), (still,1), (means,1), (what's,1), (money,1), (forget,1), (beijing,1), (joke,1), (news,1), (football,2), (skills,1), (file,1), (which,1), (say,3), (sorry,1), (be,1), (street,1), (get,1), (fine,3), (more,1), (haha,1), (hello,1), (persional,1), (catch,1), (about,3), (wife,1), (game,1), (thank,1), (morning,3), (call,1), (no,3), (very,1), (we,2), (some,2), (when,3), (deal,1), (learning,1), (college,1), (like,2), (of,1), (and,6), (right,1), (phone,2), (marking,1), (the,12), (those,1)) scala> results.flatten.foldLeft(0)(_+_._2) res3: Int = 552 scala> results.flatten.groupBy(_._1) res4: scala.collection.immutable.Map[String,List[(String, Int)]] = Map(beautiful -> List((beautiful,1), (beautiful,1)), down -> List((down,2), (down,4)), byond -> List((byond,1), (byond,1)), stairs -> List((stairs,1), (stairs,1)), people -> List((people,1), (people,1)), choosed -> List((choosed,1), (choosed,1)), bye -> List((bye,1)), this -> List((this,3), (this,3)), in -> List((in,2), (in,3)), switch -> List((switch,1), (switch,1)), your -> List((your,3), (your,3)), heard -> List((heard,1), (heard,1)), daughter -> List((daughter,1), (daughter,1)), off -> List((off,1), (off,1)), point -> List((point,1)), worry -> List((worry,1), (worry,1)), are -> List((are,6), (are,6)), is -> List((is,11), (is,11)), oh -> List((oh,3), (oh,3)), why -> List((why,1)), wait -> List((wait,1)), excuses -> Li... scala> results.flatten.groupBy(_._1).mapValues(_.foldLeft(0)(_+_._2)) res5: scala.collection.immutable.Map[String,Int] = Map(beautiful -> 2, down -> 6, byond -> 2, stairs -> 2, people -> 2, choosed -> 2, bye -> 1, this -> 6, in -> 5, switch -> 2, your -> 6, heard -> 2, daughter -> 2, off -> 2, point -> 1, worry -> 2, are -> 12, is -> 22, oh -> 6, why -> 1, wait -> 1, excuses -> 2, honey -> 2, try -> 1, got -> 1, am -> 6, ringing -> 2, want -> 3, but -> 1, sure -> 2, what -> 4, noise -> 2, front -> 2, line -> 1, if -> 1, someone's -> 2, christmas -> 2, happen -> 2, saying -> 1, big -> 2, so -> 11, keep -> 2, minute -> 2, do -> 7, all -> 5, scored -> 2, crash -> 1, just -> 1, saw -> 2, us -> 2, it -> 11, watch -> 4, a -> 10, movie -> 2, office -> 2, night -> 2, bad -> 1, inconsiderate -> 2, thing -> 3, she -> 4, okay -> 2, nice -> 2, such -> 2, trying -> 2,... scala> results.flatten.groupBy(_._1).mapValues(_.foldLeft(0)(_+_._2)).toList.sortBy(_._2).reverse res6: List[(String, Int)] = List((you,36), (i,31), (is,22), (the,19), (to,16), (are,12), (it,11), (so,11), (a,10), (and,9), (that,8), (don't,7), (do,7), (no,6), (about,6), (from,6), (am,6), (oh,6), (your,6), (this,6), (down,6), (morning,5), (fine,5), (not,5), (will,5), (my,5), (all,5), (in,5), (phone,4), (like,4), (when,4), (some,4), (football,4), (floor,4), (take,4), (how,4), (on,4), (can,4), (here,4), (did,4), (hear,4), (last,4), (way,4), (she,4), (watch,4), (what,4), (we,3), (say,3), (with,3), (goodbye,3), (thing,3), (want,3), (those,2), (marking,2), (right,2), (of,2), (college,2), (learning,2), (deal,2), (very,2), (thank,2), (game,2), (wife,2), (catch,2), (hello,2), (haha,2), (more,2), (get,2), (street,2), (sorry,2), (which,2), (file,2), (skills,2), (news,2), (beijing,2), (forget,2)... scala>