scala03
scala03课件
元组Tuple
Array ArrayByfffer
List ListBuffer
Map HashMap
Set
scala.collection.mutable._
sorted sortBy sortwith
scala> import scala.collection._ import scala.collection._
scala> var map = mutable.Map(""->1) map: scala.collection.mutable.Map[String,Int] = Map("" -> 1)
scala> var map = immutable.Map(1->1) map: scala.collection.immutable.Map[Int,Int] = Map(1 -> 1) |
集合上的方法
map方法
scala> val change=(x:Int)=>x*10 change: Int => Int = <function1>
scala> arr.map(change) res1: Array[Int] = Array(10, 20, 30, 40, 50, 60, 70)
scala> arr.map(change) res2: Array[Int] = Array(10, 20, 30, 40, 50, 60, 70)
scala> arr.map((x:Int)=>x*10) res3: Array[Int] = Array(10, 20, 30, 40, 50, 60, 70)
scala> arr.map(x=>x*10) res4: Array[Int] = Array(10, 20, 30, 40, 50, 60, 70)
scala> arr.map(_*10) res5: Array[Int] = Array(10, 20, 30, 40, 50, 60, 70) |
scala> arr.map(x=>if(x%2==1)x*10 else x) res6: Array[Int] = Array(10, 2, 30, 4, 50, 6, 70)
scala> arr.map(if(_%2==1)_*10 else _) <console>:16: error: missing parameter type for expanded function ((x$1) => x$1.$percent(2).$eq$eq(1)) arr.map(if(_%2==1)_*10 else _) ^ <console>:16: error: missing parameter type for expanded function ((x$2) => x$2.$times(10)) arr.map(if(_%2==1)_*10 else _) ^
scala> var arr = Array(1,3,2,4,13,11,9) arr: Array[Int] = Array(1, 3, 2, 4, 13, 11, 9)
scala> arr.sortBy(x=>x) res8: Array[Int] = Array(1, 2, 3, 4, 9, 11, 13)
scala> arr.sortBy(_) <console>:16: error: missing parameter type for expanded function ((x$1) => arr.sortBy(x$1)) arr.sortBy(_)
scala> arr.sortBy(-_) res10: Array[Int] = Array(13, 11, 9, 4, 3, 2, 1)
scala> arr.sortBy(3*_) res11: Array[Int] = Array(1, 2, 3, 4, 9, 11, 13) |
下划线的使用
- 导包的时候通配包中的所有的内容
- 不能使用多次
- 不能单独使用,必须和别人关联在一起使用
map方法将一个集合中的每个元素都进行遍历处理,怎么处理是map方法中的函数处理结果,返回的集合类型不变,但是集合中的元素可能会发生改变,元素个数不变
scala> arr res12: Array[Int] = Array(1, 3, 2, 4, 13, 11, 9)
scala> arr.map(_>3) res13: Array[Boolean] = Array(false, false, false, true, true, true, true)
scala> var arr = Array(("zhangsan",2000),("lisi",3000),("zhaosi",3500)) arr: Array[(String, Int)] = Array((zhangsan,2000), (lisi,3000), (zhaosi,3500))
scala> arr.map(t=>t._2*10) res14: Array[Int] = Array(20000, 30000, 35000)
scala> arr.map(t=>(t._1,t._2*10)) res15: Array[(String, Int)] = Array((zhangsan,20000), (lisi,30000), (zhaosi,35000)) |
list set map
filter进行条件过滤
scala> arr.filter(t=>t._2>2500) res17: Array[(String, Int)] = Array((lisi,3000), (zhaosi,3500))
scala> var arr = Array(1,2,3,4,5,6,7,8) arr: Array[Int] = Array(1, 2, 3, 4, 5, 6, 7, 8)
scala> arr.filter(_%2==0) res18: Array[Int] = Array(2, 4, 6, 8)
scala> arr.filterNot(_%2==0) res19: Array[Int] = Array(1, 3, 5, 7) |
filterNot与filter相反,符合条件的元素都不要
压平flatten
scala> var arr = Array(1,2,3,4) arr: Array[Int] = Array(1, 2, 3, 4)
scala> var arr1 = Array(arr,arr,arr) arr1: Array[Array[Int]] = Array(Array(1, 2, 3, 4), Array(1, 2, 3, 4), Array(1, 2, 3, 4))
scala> arr1.map(t=>t.map(_*10)) res20: Array[Array[Int]] = Array(Array(10, 20, 30, 40), Array(10, 20, 30, 40), Array(10, 20, 30, 40))
scala> arr1.flatten res21: Array[Int] = Array(1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4)
scala> var arr2 = Array(arr1,arr1,arr1) arr2: Array[Array[Array[Int]]] = Array(Array(Array(1, 2, 3, 4), Array(1, 2, 3, 4), Array(1, 2, 3, 4)), Array(Array(1, 2, 3, 4), Array(1, 2, 3, 4), Array(1, 2, 3, 4)), Array(Array(1, 2, 3, 4), Array(1, 2, 3, 4), Array(1, 2, 3, 4)))
scala> arr2.flatten res22: Array[Array[Int]] = Array(Array(1, 2, 3, 4), Array(1, 2, 3, 4), Array(1, 2, 3, 4), Array(1, 2, 3, 4), Array(1, 2, 3, 4), Array(1, 2, 3, 4), Array(1, 2, 3, 4), Array(1, 2, 3, 4), Array(1, 2, 3, 4)) |
flatten是压平的意思
scala> var arr = Array("hello world hello jack","hello tom tom spark","jerry lucy") arr: Array[String] = Array(hello world hello jack, hello tom tom spark, jerry lucy)
scala> arr.map(_.split(" ")) res23: Array[Array[String]] = Array(Array(hello, world, hello, jack), Array(hello, tom, tom, spark), Array(jerry, lucy))
scala> res23.flatten res24: Array[String] = Array(hello, world, hello, jack, hello, tom, tom, spark, jerry, lucy) |
练习题:
scala> var arr = Array("a","b","c","d") arr: Array[String] = Array(a, b, c, d)
scala> var arr = Array("e","f","c","d") arr: Array[String] = Array(e, f, c, d)
scala> var arr1 = Array("a","b","c","d") arr1: Array[String] = Array(a, b, c, d)
scala> arr zip arr1 res25: Array[(String, String)] = Array((e,a), (f,b), (c,c), (d,d)) 变成 Array(a,b,c,d,e,f,....)
scala> res25.map(t=>Array(t._1,t._2)) res27: Array[Array[String]] = Array(Array(e, a), Array(f, b), Array(c, c), Array(d, d))
scala> res27.flatten res28: Array[String] = Array(e, a, f, b, c, c, d, d) |
flatten操作一般都携带map
flatMap = map+flatten
scala> res25.flatMap(t=>Array(t._1,t._2)) res31: Array[String] = Array(e, a, f, b, c, c, d, d) |
groupby按照条件进行分组
scala> var arr = Array(("hello",1),("hello",1),("word",1)) arr: Array[(String, Int)] = Array((hello,1), (hello,1), (word,1))
scala> arr.groupBy(t=>t._1) res32: scala.collection.immutable.Map[String,Array[(String, Int)]] = Map(word -> Array((word,1)), hello -> Array((hello,1), (hello,1)))
scala> var arr = Array(1,2,3,4,5,5,6,7,8) arr: Array[Int] = Array(1, 2, 3, 4, 5, 5, 6, 7, 8)
scala> arr.groupBy(_>5) res33: scala.collection.immutable.Map[Boolean,Array[Int]] = Map(false -> Array(1, 2, 3, 4, 5, 5), true -> Array(6, 7, 8))
scala> arr.groupBy(x=>if(x>5)"gt 5" else "lte 5") res34: scala.collection.immutable.Map[String,Array[Int]] = Map(gt 5 -> Array(6, 7, 8), lte 5 -> Array(1, 2, 3, 4, 5, 5))
scala> var arr = Array("hello","hey","hi","spark","smart","sun") scala> arr.groupBy(_.substring(0,1)) res36: scala.collection.immutable.Map[String,Array[String]] = Map(h -> Array(hello, hey, hi), s -> Array(spark, smart, sun))
scala> var arr = Array(("hello",5),("tom",3),("spark",32)) arr: Array[(String, Int)] = Array((hello,5), (tom,3), (spark,32))
scala> arr.groupBy(t=>t._2>10) res37: scala.collection.immutable.Map[Boolean,Array[(String, Int)]] = Map(false -> Array((hello,5), (tom,3)), true -> Array((spark,32))) |
groupby的返回值是一个map集合,key是分组的元素,value值是原来数据的一个片段
grouped分组(片段分组)
scala> arr res46: Array[Int] = Array(1, 2, 3, 4, 5, 6, 7, 8)
scala> arr.grouped(2) res47: Iterator[Array[Int]] = non-empty iterator
scala> res47.foreach(x=>println(x.toList)) List(1, 2) List(3, 4) List(5, 6) List(7, 8)
scala> arr.grouped(2) res49: Iterator[Array[Int]] = non-empty iterator
scala> res49.foreach(println(_.toList)) <console>:17: error: missing parameter type for expanded function ((x$1) => x$1.toList) res49.foreach(println(_.toList)) |
scala> arr.grouped(2) res49: Iterator[Array[Int]] = non-empty iterator
scala> res49.foreach(println(_.toList)) <console>:17: error: missing parameter type for expanded function ((x$1) => x$1.toList) res49.foreach(println(_.toList)) ^
scala> val func = (x:Int)=>println(x) func: Int => Unit = <function1>
scala> arr.foreach(println) 1 2 3 4 5 6 7 8
scala> arr.foreach(func) 注意函数和方法的调用规范,注意函数和方法的使用情况 var func = (x:Int)=>println(x)这种情况下println是func函数中的一个内容,是其中调用的方法 arr.foreach(println) println方法作为一个函数在函数中作为参数了 arr.foreach(println(_)) 这个println作为方法被调用了,相当于在函数中调用方法 arr.foreach(func) |
scala中的wordcount
object WordCount { |
集合的并集 交集 差集
union intersect diff
scala> var arr = Array(1,2,3,4,5) arr: Array[Int] = Array(1, 2, 3, 4, 5)
scala> var arr1 = Array(3,4,5,6,7) arr1: Array[Int] = Array(3, 4, 5, 6, 7)
scala> arr union arr1 res53: Array[Int] = Array(1, 2, 3, 4, 5, 3, 4, 5, 6, 7)
scala> arr intersect arr1 res54: Array[Int] = Array(3, 4, 5)
scala> arr diff arr1 res55: Array[Int] = Array(1, 2)
scala> arr1 diff arr res56: Array[Int] = Array(6, 7) |
count方法
scala> arr.count def count(p: Int => Boolean): Int
scala> arr.count(_>3) res59: Int = 2
scala> arr.count(_%2==0) res60: Int = 2 |
符合某个条件的元素有几个
find找寻,查找
scala> arr res61: Array[Int] = Array(1, 2, 3, 4, 5)
scala> arr.find override def find(p: Int => Boolean): Option[Int]
scala> arr.find(_>3) res62: Option[Int] = Some(4)
scala> arr.find(_>10) res63: Option[Int] = None
scala> res62.get res64: Int = 4 |
符合某个条件的元素,但是只能查询出来第一个
to until 步长
scala> 0 to (10,2) res71: scala.collection.immutable.Range.Inclusive = Range(0, 2, 4, 6, 8, 10)
scala> var arr = Array(1,2,3,4,5,6,7,8,9,10) arr: Array[Int] = Array(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)
scala> for(e<- 1 to (arr.length-1,2))yield arr(e) res72: scala.collection.immutable.IndexedSeq[Int] = Vector(2, 4, 6, 8, 10) |
随堂练习:
1.写一个函数或者方法 getValue(x:Array[Int],v:Int)求出这个数组中大于v的值得个数,小于v得值得个数,等于v这个值得个数,要求一次性返回
- 存在这样一个数组Array(1,2,3,5,6,7,8) 返回2 1 5 3 7 6 8字符串
def getReverse2(arr:Array[Int]):String={ |
mkString()可以指定集合中元素得分隔符,拼接成字符串
作业题:
val d1 = Array(("bj", 28.1), ("sh", 28.7), ("gz", 32.0), ("sz", 33.1))
val d2 = Array(("bj", 27.3), ("sh", 30.1), ("gz", 33.3))
val d3 = Array(("bj", 28.2), ("sh", 29.1), ("gz", 32.0), ("sz", 30.5))
以上每个城市得温度,求出每个城市温度得平均值
第二题:
val lst = List("Id1-The Spark", "Id2-The Hadoop", "Id3-The Spark")
mapValue方法处理的是map集合中的数据
scala> arr.map(t=>(t._1,t._2+10)) res74: scala.collection.Map[String,Int] = Map(zhangsan -> 30, lisi -> 40, wangwu -> 35)
scala> arr.mapValues(_+10) res75: scala.collection.Map[String,Int] = Map(zhangsan -> 30, lisi -> 40, wangwu -> 35)
scala> val arr = Array(("zhangsan",20),("lisi",30),("wangwu",25)) arr: Array[(String, Int)] = Array((zhangsan,20), (lisi,30), (wangwu,25))
scala> arr.mapValues(_+10) <console>:16: error: value mapValues is not a member of Array[(String, Int)] arr.mapValues(_+10) |
mapvalues是遍历map集合中每一个元素的value值,key不变,只能遍历map不能遍历数组形式的元组
reduce reduceLeft reduceRight
scala> arr.reduce def reduce[A1 >: Int](op: (A1, A1) => A1): A1
scala> arr.reduce((a,b)=>a+b) res78: Int = 28
scala> arr.reduce(_+_) res79: Int = 28
scala> var arr =Array(("zhangsan",30000),("reba",800000),("nazha",750000)) arr: Array[(String, Int)] = Array((zhangsan,30000), (reba,800000), (nazha,750000))
scala> arr.map(_._2.sum) <console>:16: error: value sum is not a member of Int arr.map(_._2.sum) ^
scala> arr.map(_._2).sum res81: Int = 1580000
scala> arr.reduce((a,b)=>a._2+b._2) <console>:16: error: type mismatch; found : Int required: (String, Int) arr.reduce((a,b)=>a._2+b._2) |
def reduce[A1 >: A](op: (A1, A1) => A1): A1
源码中规定reduce中放入的函数参数类型,返回值类型,都必须是调用reduce的集合中泛型的本类型和父类型
scala> arr.reduce((a,b)=>a) res84: (String, Int) = (zhangsan,30000)
scala> arr.reduce((a,b)=>("",a._2+b._2)) res85: (String, Int) = ("",1580000) |
scala> arr.reduce((a:Any,b:Any)=>a.asInstanceOf[(String,Int)]._2+b.asInstanceOf[(String,Int)]._2) java.lang.ClassCastException: java.lang.Integer cannot be cast to scala.Tuple2 at $anonfun$1.apply(<console>:16) at $anonfun$1.apply(<console>:16) at scala.collection.IndexedSeqOptimized$class.foldl(IndexedSeqOptimized.scala:57) at scala.collection.IndexedSeqOptimized$class.reduceLeft(IndexedSeqOptimized.scala:74) at scala.collection.mutable.ArrayOps$ofRef.reduceLeft(ArrayOps.scala:186) at scala.collection.TraversableOnce$class.reduce(TraversableOnce.scala:208) at scala.collection.mutable.ArrayOps$ofRef.reduce(ArrayOps.scala:186) ... 32 elided
scala> arr.reduce((a:Any,b:Any)=>("",a.asInstanceOf[(String,Int)]._2+b.asInstanceOf[(String,Int)]._2)) res88: (String, Int) = ("",1580000) |
注意reduce接收的数据和返回值的数据必须是一个种类的
def reduce[A1 >: A](op: (A1, A1) => A1): A1 = reduceLeft(op)
reduce的底层使用的是reduceLeft
reduceLeft和reduce是一个方法,但是reduceLeft允许参数和返回值不一样
scala> arr.reduce((a:Any,b:(String,Int))=>("",a.asInstanceOf[(String,Int)]._2+b._2).asInstanceOf[Any]) <console>:16: error: type mismatch; found : (Any, (String, Int)) => Any required: (Any, Any) => Any arr.reduce((a:Any,b:(String,Int))=>("",a.asInstanceOf[(String,Int)]._2+b._2).asInstanceOf[Any]) ^
scala> arr.reduceLeft((a:Any,b:(String,Int))=>("",a.asInstanceOf[(String,Int)]._2+b._2).asInstanceOf[Any]) res92: Any = ("",1580000) |