scala03

scala03课件

元组Tuple

Array ArrayByfffer

List ListBuffer

Map HashMap

Set

scala.collection.mutable._

sorted sortBy sortwith

scala> import scala.collection._

import scala.collection._

 

scala> var map = mutable.Map(""->1)

map: scala.collection.mutable.Map[String,Int] = Map("" -> 1)

 

scala> var map =  immutable.Map(1->1)

map: scala.collection.immutable.Map[Int,Int] = Map(1 -> 1)

 

集合上的方法

map方法

scala> val change=(x:Int)=>x*10

change: Int => Int = <function1>

 

scala> arr.map(change)

res1: Array[Int] = Array(10, 20, 30, 40, 50, 60, 70)

 

scala> arr.map(change)

res2: Array[Int] = Array(10, 20, 30, 40, 50, 60, 70)

 

scala> arr.map((x:Int)=>x*10)

res3: Array[Int] = Array(10, 20, 30, 40, 50, 60, 70)

 

scala> arr.map(x=>x*10)

res4: Array[Int] = Array(10, 20, 30, 40, 50, 60, 70)

 

scala> arr.map(_*10)

res5: Array[Int] = Array(10, 20, 30, 40, 50, 60, 70)

scala> arr.map(x=>if(x%2==1)x*10 else x)

res6: Array[Int] = Array(10, 2, 30, 4, 50, 6, 70)

 

scala> arr.map(if(_%2==1)_*10 else _)

<console>:16: error: missing parameter type for expanded function ((x$1) => x$1.$percent(2).$eq$eq(1))

       arr.map(if(_%2==1)_*10 else _)

                  ^

<console>:16: error: missing parameter type for expanded function ((x$2) => x$2.$times(10))

       arr.map(if(_%2==1)_*10 else _)

                         ^

 

scala> var arr = Array(1,3,2,4,13,11,9)

arr: Array[Int] = Array(1, 3, 2, 4, 13, 11, 9)

 

scala> arr.sortBy(x=>x)

res8: Array[Int] = Array(1, 2, 3, 4, 9, 11, 13)

 

scala> arr.sortBy(_)

<console>:16: error: missing parameter type for expanded function ((x$1) => arr.sortBy(x$1))

       arr.sortBy(_)

 

scala> arr.sortBy(-_)

res10: Array[Int] = Array(13, 11, 9, 4, 3, 2, 1)

 

scala> arr.sortBy(3*_)

res11: Array[Int] = Array(1, 2, 3, 4, 9, 11, 13)

下划线的使用

  1. 导包的时候通配包中的所有的内容
  2. 不能使用多次
  3. 不能单独使用,必须和别人关联在一起使用

map方法将一个集合中的每个元素都进行遍历处理,怎么处理是map方法中的函数处理结果,返回的集合类型不变,但是集合中的元素可能会发生改变,元素个数不变

 

scala> arr

res12: Array[Int] = Array(1, 3, 2, 4, 13, 11, 9)

 

scala> arr.map(_>3)

res13: Array[Boolean] = Array(false, false, false, true, true, true, true)

 

scala> var arr = Array(("zhangsan",2000),("lisi",3000),("zhaosi",3500))

arr: Array[(String, Int)] = Array((zhangsan,2000), (lisi,3000), (zhaosi,3500))

 

scala> arr.map(t=>t._2*10)

res14: Array[Int] = Array(20000, 30000, 35000)

 

scala> arr.map(t=>(t._1,t._2*10))

res15: Array[(String, Int)] = Array((zhangsan,20000), (lisi,30000), (zhaosi,35000))

list set map

 

filter进行条件过滤

scala> arr.filter(t=>t._2>2500)

res17: Array[(String, Int)] = Array((lisi,3000), (zhaosi,3500))

 

scala> var arr = Array(1,2,3,4,5,6,7,8)

arr: Array[Int] = Array(1, 2, 3, 4, 5, 6, 7, 8)

 

scala> arr.filter(_%2==0)

res18: Array[Int] = Array(2, 4, 6, 8)

 

scala> arr.filterNot(_%2==0)

res19: Array[Int] = Array(1, 3, 5, 7)

filterNotfilter相反,符合条件的元素都不要

 

压平flatten

scala> var arr = Array(1,2,3,4)

arr: Array[Int] = Array(1, 2, 3, 4)

 

scala> var arr1 = Array(arr,arr,arr)

arr1: Array[Array[Int]] = Array(Array(1, 2, 3, 4), Array(1, 2, 3, 4), Array(1, 2, 3, 4))

 

scala> arr1.map(t=>t.map(_*10))

res20: Array[Array[Int]] = Array(Array(10, 20, 30, 40), Array(10, 20, 30, 40), Array(10, 20, 30, 40))

 

scala> arr1.flatten

res21: Array[Int] = Array(1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4)

 

scala> var arr2 = Array(arr1,arr1,arr1)

arr2: Array[Array[Array[Int]]] = Array(Array(Array(1, 2, 3, 4), Array(1, 2, 3, 4), Array(1, 2, 3, 4)), Array(Array(1, 2, 3, 4), Array(1, 2, 3, 4), Array(1, 2, 3, 4)), Array(Array(1, 2, 3, 4), Array(1, 2, 3, 4), Array(1, 2, 3, 4)))

 

scala> arr2.flatten

res22: Array[Array[Int]] = Array(Array(1, 2, 3, 4), Array(1, 2, 3, 4), Array(1, 2, 3, 4), Array(1, 2, 3, 4), Array(1, 2, 3, 4), Array(1, 2, 3, 4), Array(1, 2, 3, 4), Array(1, 2, 3, 4), Array(1, 2, 3, 4))

flatten是压平的意思

scala> var arr = Array("hello world hello jack","hello tom tom spark","jerry lucy")

arr: Array[String] = Array(hello world hello jack, hello tom tom spark, jerry lucy)

 

scala> arr.map(_.split(" "))

res23: Array[Array[String]] = Array(Array(hello, world, hello, jack), Array(hello, tom, tom, spark), Array(jerry, lucy))

 

scala> res23.flatten

res24: Array[String] = Array(hello, world, hello, jack, hello, tom, tom, spark, jerry, lucy)

 

练习题:

scala> var arr = Array("a","b","c","d")

arr: Array[String] = Array(a, b, c, d)

 

scala> var arr = Array("e","f","c","d")

arr: Array[String] = Array(e, f, c, d)

 

scala> var arr1 = Array("a","b","c","d")

arr1: Array[String] = Array(a, b, c, d)

 

scala> arr zip arr1

res25: Array[(String, String)] = Array((e,a), (f,b), (c,c), (d,d))

变成 Array(a,b,c,d,e,f,....)

 

scala> res25.map(t=>Array(t._1,t._2))

res27: Array[Array[String]] = Array(Array(e, a), Array(f, b), Array(c, c), Array(d, d))

 

scala> res27.flatten

res28: Array[String] = Array(e, a, f, b, c, c, d, d)

 

flatten操作一般都携带map

flatMap = map+flatten

scala> res25.flatMap(t=>Array(t._1,t._2))

res31: Array[String] = Array(e, a, f, b, c, c, d, d)

 

groupby按照条件进行分组

scala> var arr = Array(("hello",1),("hello",1),("word",1))

arr: Array[(String, Int)] = Array((hello,1), (hello,1), (word,1))

 

scala> arr.groupBy(t=>t._1)

res32: scala.collection.immutable.Map[String,Array[(String, Int)]] = Map(word -> Array((word,1)), hello -> Array((hello,1), (hello,1)))

 

scala> var arr = Array(1,2,3,4,5,5,6,7,8)

arr: Array[Int] = Array(1, 2, 3, 4, 5, 5, 6, 7, 8)

 

scala> arr.groupBy(_>5)

res33: scala.collection.immutable.Map[Boolean,Array[Int]] = Map(false -> Array(1, 2, 3, 4, 5, 5), true -> Array(6, 7, 8))

 

scala> arr.groupBy(x=>if(x>5)"gt 5" else "lte 5")

res34: scala.collection.immutable.Map[String,Array[Int]] = Map(gt 5 -> Array(6, 7, 8), lte 5 -> Array(1, 2, 3, 4, 5, 5))

 

scala> var arr = Array("hello","hey","hi","spark","smart","sun")

scala> arr.groupBy(_.substring(0,1))

res36: scala.collection.immutable.Map[String,Array[String]] = Map(h -> Array(hello, hey, hi), s -> Array(spark, smart, sun))

 

scala> var arr = Array(("hello",5),("tom",3),("spark",32))

arr: Array[(String, Int)] = Array((hello,5), (tom,3), (spark,32))

 

scala> arr.groupBy(t=>t._2>10)

res37: scala.collection.immutable.Map[Boolean,Array[(String, Int)]] = Map(false -> Array((hello,5), (tom,3)), true -> Array((spark,32)))

groupby的返回值是一个map集合,key是分组的元素,value值是原来数据的一个片段

 

grouped分组(片段分组)

scala> arr

res46: Array[Int] = Array(1, 2, 3, 4, 5, 6, 7, 8)

 

scala> arr.grouped(2)

res47: Iterator[Array[Int]] = non-empty iterator

 

scala> res47.foreach(x=>println(x.toList))

List(1, 2)

List(3, 4)

List(5, 6)

List(7, 8)

 

scala> arr.grouped(2)

res49: Iterator[Array[Int]] = non-empty iterator

 

scala> res49.foreach(println(_.toList))

<console>:17: error: missing parameter type for expanded function ((x$1) => x$1.toList)

       res49.foreach(println(_.toList))

scala> arr.grouped(2)

res49: Iterator[Array[Int]] = non-empty iterator

 

scala> res49.foreach(println(_.toList))

<console>:17: error: missing parameter type for expanded function ((x$1) => x$1.toList)

       res49.foreach(println(_.toList))

                             ^

 

scala> val func = (x:Int)=>println(x)

func: Int => Unit = <function1>

 

scala> arr.foreach(println)

1

2

3

4

5

6

7

8

 

scala> arr.foreach(func)

注意函数和方法的调用规范,注意函数和方法的使用情况

var func = (x:Int)=>println(x)这种情况下printlnfunc函数中的一个内容,是其中调用的方法

arr.foreach(println) println方法作为一个函数在函数中作为参数了

arr.foreach(println(_)) 这个println作为方法被调用了,相当于在函数中调用方法

arr.foreach(func)

 

scala中的wordcount

object WordCount {
  def main(args: Array[String]): Unit = {
    //alt+enter
    //生成变量
    //.var生成变量  alt+t生成变量的泛型
     val source: BufferedSource = Source.fromFile("aa.txt")
     val lines: Iterator[String] = source.getLines()
     val flatData:Iterator[String] = lines.flatMap(_.split(" "))
//     iterator[hello,world,jack....]
    val wordAndOne:Iterator[(String,Int)] = flatData.map(t=>(t,1))
    val groupedDate:Map[String,List[(String,Int)]] = wordAndOne.toList.groupBy(_._1)
    val result:Map[String,Int] = groupedDate.map(t=>(t._1,t._2.length))
//    val result1:List[(String,Int)] = result.toList.sortBy(-_._2)
//    val result1 = result.toList.sortWith((a,b)=>a._2>b._2)
    val result1 = result.toList.sortWith(_._2>_._2)
    result1.foreach(println)
  }
}

 

集合的并集 交集 差集

union intersect diff

scala> var arr = Array(1,2,3,4,5)

arr: Array[Int] = Array(1, 2, 3, 4, 5)

 

scala> var arr1 = Array(3,4,5,6,7)

arr1: Array[Int] = Array(3, 4, 5, 6, 7)

 

scala> arr union arr1

res53: Array[Int] = Array(1, 2, 3, 4, 5, 3, 4, 5, 6, 7)

 

scala> arr intersect arr1

res54: Array[Int] = Array(3, 4, 5)

 

scala> arr diff arr1

res55: Array[Int] = Array(1, 2)

 

scala> arr1 diff arr

res56: Array[Int] = Array(6, 7)

 

count方法

scala> arr.count

   def count(p: Int => Boolean): Int

 

scala> arr.count(_>3)

res59: Int = 2

 

scala> arr.count(_%2==0)

res60: Int = 2

符合某个条件的元素有几个

 

find找寻,查找

scala> arr

res61: Array[Int] = Array(1, 2, 3, 4, 5)

 

scala> arr.find

   override def find(p: Int => Boolean): Option[Int]

 

scala> arr.find(_>3)

res62: Option[Int] = Some(4)

 

scala> arr.find(_>10)

res63: Option[Int] = None

 

scala> res62.get

res64: Int = 4

符合某个条件的元素,但是只能查询出来第一个

 

to  until 步长

scala> 0 to (10,2)

res71: scala.collection.immutable.Range.Inclusive = Range(0, 2, 4, 6, 8, 10)

 

scala> var arr = Array(1,2,3,4,5,6,7,8,9,10)

arr: Array[Int] = Array(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)

 

scala> for(e<- 1 to (arr.length-1,2))yield arr(e)

res72: scala.collection.immutable.IndexedSeq[Int] = Vector(2, 4, 6, 8, 10)

 

随堂练习:

1.写一个函数或者方法 getValue(x:Array[Int],v:Int)求出这个数组中大于v的值得个数,小于v得值得个数,等于v这个值得个数,要求一次性返回

 

  1. 存在这样一个数组Array(1,2,3,5,6,7,8) 返回2 1 5 3 7 6 8字符串

def getReverse2(arr:Array[Int]):String={
   if(arr.length%2==0){
     //偶数个
     var arrEven:Seq[Int] = for(e<- 1 to (arr.length-1,2))yield arr(e)
     var arrOdd:Seq[Int] = for(e<- 0 to (arr.length-1,2))yield arr(e)
     var tupleArr:Seq[(Int,Int)] = arrEven zip arrOdd
     var resArr:Seq[Int] = tupleArr.flatMap(t=>Array(t._1,t._2))
     var str = ""
     resArr.foreach(t=>str+=t+" ")
     str
   }else{
     //奇数个
     val groupdata:Iterator[Array[Int]] = arr.grouped(2)
     val resultArr:Iterator[Array[Int]] = groupdata.map(_.reverse)
     val flatArr:Iterator[Int] = resultArr.flatten
     val result = flatArr.mkString(" ")
     result
   }
}

mkString()可以指定集合中元素得分隔符,拼接成字符串

 

作业题:

val d1 = Array(("bj", 28.1), ("sh", 28.7), ("gz", 32.0), ("sz", 33.1))

    val d2 = Array(("bj", 27.3), ("sh", 30.1), ("gz", 33.3))

val d3 = Array(("bj", 28.2), ("sh", 29.1), ("gz", 32.0), ("sz", 30.5))

以上每个城市得温度,求出每个城市温度得平均值

 

第二题:

val lst = List("Id1-The Spark", "Id2-The Hadoop", "Id3-The Spark")

 

 

 

mapValue方法处理的是map集合中的数据

scala> arr.map(t=>(t._1,t._2+10))

res74: scala.collection.Map[String,Int] = Map(zhangsan -> 30, lisi -> 40, wangwu -> 35)

 

scala> arr.mapValues(_+10)

res75: scala.collection.Map[String,Int] = Map(zhangsan -> 30, lisi -> 40, wangwu -> 35)

 

scala> val arr = Array(("zhangsan",20),("lisi",30),("wangwu",25))

arr: Array[(String, Int)] = Array((zhangsan,20), (lisi,30), (wangwu,25))

 

scala> arr.mapValues(_+10)

<console>:16: error: value mapValues is not a member of Array[(String, Int)]

       arr.mapValues(_+10)

mapvalues是遍历map集合中每一个元素的value值,key不变,只能遍历map不能遍历数组形式的元组

 

reduce  reduceLeft  reduceRight

scala> arr.reduce

   def reduce[A1 >: Int](op: (A1, A1) => A1): A1

 

scala> arr.reduce((a,b)=>a+b)

res78: Int = 28

 

scala> arr.reduce(_+_)

res79: Int = 28

 

scala> var arr =Array(("zhangsan",30000),("reba",800000),("nazha",750000))

arr: Array[(String, Int)] = Array((zhangsan,30000), (reba,800000), (nazha,750000))

 

scala> arr.map(_._2.sum)

<console>:16: error: value sum is not a member of Int

       arr.map(_._2.sum)

                    ^

 

scala> arr.map(_._2).sum

res81: Int = 1580000

 

scala> arr.reduce((a,b)=>a._2+b._2)

<console>:16: error: type mismatch;

 found   : Int

 required: (String, Int)

       arr.reduce((a,b)=>a._2+b._2)

 

def reduce[A1 >: A](op: (A1, A1) => A1): A1

源码中规定reduce中放入的函数参数类型,返回值类型,都必须是调用reduce的集合中泛型的本类型和父类型

scala> arr.reduce((a,b)=>a)

res84: (String, Int) = (zhangsan,30000)

 

scala> arr.reduce((a,b)=>("",a._2+b._2))

res85: (String, Int) = ("",1580000)

 

scala> arr.reduce((a:Any,b:Any)=>a.asInstanceOf[(String,Int)]._2+b.asInstanceOf[(String,Int)]._2)

java.lang.ClassCastException: java.lang.Integer cannot be cast to scala.Tuple2

  at $anonfun$1.apply(<console>:16)

  at $anonfun$1.apply(<console>:16)

  at scala.collection.IndexedSeqOptimized$class.foldl(IndexedSeqOptimized.scala:57)

  at scala.collection.IndexedSeqOptimized$class.reduceLeft(IndexedSeqOptimized.scala:74)

  at scala.collection.mutable.ArrayOps$ofRef.reduceLeft(ArrayOps.scala:186)

  at scala.collection.TraversableOnce$class.reduce(TraversableOnce.scala:208)

  at scala.collection.mutable.ArrayOps$ofRef.reduce(ArrayOps.scala:186)

  ... 32 elided

 

scala> arr.reduce((a:Any,b:Any)=>("",a.asInstanceOf[(String,Int)]._2+b.asInstanceOf[(String,Int)]._2))

res88: (String, Int) = ("",1580000)

注意reduce接收的数据和返回值的数据必须是一个种类的

 

def reduce[A1 >: A](op: (A1, A1) => A1): A1 = reduceLeft(op)

reduce的底层使用的是reduceLeft

 

reduceLeftreduce是一个方法,但是reduceLeft允许参数和返回值不一样

 

scala> arr.reduce((a:Any,b:(String,Int))=>("",a.asInstanceOf[(String,Int)]._2+b._2).asInstanceOf[Any])

<console>:16: error: type mismatch;

 found   : (Any, (String, Int)) => Any

 required: (Any, Any) => Any

       arr.reduce((a:Any,b:(String,Int))=>("",a.asInstanceOf[(String,Int)]._2+b._2).asInstanceOf[Any])

                                        ^

 

scala> arr.reduceLeft((a:Any,b:(String,Int))=>("",a.asInstanceOf[(String,Int)]._2+b._2).asInstanceOf[Any])

res92: Any = ("",1580000)

 

 

posted @ 2019-09-16 14:30  lilixia  阅读(250)  评论(0编辑  收藏  举报