Scala_自己封装方法完成词频统计(WordCount)

需求：自己封装map，groupBy,sortBy方法！

package WordCount

import scala.collection.mutable
import scala.collection.mutable.ListBuffer
import scala.io.StdIn

//需求:map sortby groupby...

object Demo_1  {
  def main(args: Array[String]): Unit = {
    println("请输入要分割的单词字符串，单词之间用空格分开")
    val string = List(StdIn.readLine())
    split(string)
  }

  def split(string: List[String]): Unit={
    //println(string)
    // 首先按照空格切分并压平 flatMap -- map flatMap方法可以将一维数组分割成多个字符串存入列表
    val split_1 = string.flatMap(_.split(" "))
    println(split_1)

    // 每一个单词映射为元组
    val wordToOne = map1(split_1,x => (x,1)) //
    println(wordToOne)
    // 按单词分组
    val group = groupby(wordToOne)
    println(group)
    // 组内个数求和
    val groupInSum = map2(group)
    println(groupInSum)
    // map转化为list
//    val result = groupInSum.toList
//    println(result)
    // 排序
    //val result_1 = result.sortBy(_._2).reverse
    val result_1 = sortby(groupInSum)
    println(result_1)

  }
  def map1(lst:List[String],f:String=>(String,Int))={
    val res = new ListBuffer[(String,Int)]
    for(x<-lst){
      res.append(f(x))
    }
    res
  }

  def map2(lst:mutable.HashMap[String,ListBuffer[(String,Int)]])={
    //val res = new mutable.HashMap[String,ListBuffer[(String,Int)]]
    val res = new ListBuffer[(String,Int)]
    for((k,v) <- lst){
      res.append((k,v.size))
    }
    res
  }

def groupby(lst:ListBuffer[(String,Int)])={

  val res = new mutable.HashMap[String,ListBuffer[(String,Int)]]

  for(x<-lst){
    val res1:String = x._1
    val res2:Int = x._2
    val res3 = new ListBuffer[(String,Int)]
    res3.append((res1,res2))
    //var i= 0
    for( (k,v) <- res){ //遍历HashMap
      if(k==res1){
        //i+=1
        for(j <- 0 until v.size) { //不包含v.size  统计key相同的ListBuffer中相同单词的个数
          res3.append((res1, res2))
        }

      }
    }
    res.put(res1,res3)

  }
  res
}


  def sortby(lst:ListBuffer[(String,Int)])={
    //val ans: ListBuffer[(String,Int)] = ListBuffer()

    for( j <- 0 until( lst.length -1)){ // 冒泡排序

      for(i<- 0 until lst.length - 1 -j ){

        if(lst(i)._2 < lst(i+1)._2){
          val temp = lst(i)

          lst(i) = lst(i+1)
          lst(i+1) = temp
        }

      }
    }
    lst
  }

}

posted @ 2020-06-16 14:43 li-shan 阅读(470) 评论(0) 收藏举报

刷新页面返回顶部

木子山13

Scala_自己封装方法完成词频统计(WordCount)

公告