第1集:大数据最火爆语言Scala光速入门

定义变量
scala> val age : Int = 0
age: Int = 0

scala> var age1, age2, age3 = 0
age1: Int = 0
age2: Int = 0
age3: Int = 0

基本类型的方法
scala> 10.to
toByte toChar toDouble toFloat toInt toLong toShort toString

scala> 10.to(20)
res0: scala.collection.immutable.Range.Inclusive = Range(10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20)

scala> import scala.math._
import scala.math._

scala> min(20,4)
res1: Int = 4

if表达式
scala> val result = if(age >= 18) "adult" else "child"
result: String = adult


块表达式
最后一行的值就是整个代码块的返回值

scala> var buffered = 0
buffered: Int = 0

scala> val result = if(age >= 18) {
| "adult"
| buffered = 10
| buffered
| }
<console>:13: warning: a pure expression does nothing in statement position; you may be omitting necessary parentheses
"adult"
^
result: AnyVal = 10

控制台输出函数
scala> println("Spark")
Spark

scala> print("\nSpark")

Spark
scala> printf(" %s is the future of Big Data Computation Framework.", "Spark")
Spark is the future of Big Data Computation Framework.

读取内容
scala> readLine("Please enter your password : ")
Please enter your password : res5: String = 123456

scala> readInt
res6: Int = 9

循环语句
scala> val element = 100
element: Int = 100

scala> for(i <- 0 to element) println(i)

scala> for(i <- 0 to element if i % 2 == 0) println(i)

终止循环
scala> import scala.util.control.Breaks._
import scala.util.control.Breaks._

scala> for(i <- 1 to 10) {
| if (i == 4) break
| println(i)
| }
1
2
3
scala.util.control.BreakControl

函数
val n = 5
def f1:Any = {
for(i <- 1 to 10){
if(i==n)return i
println(i)
}

带有默认参数的函数
scala> def f3(param1:String, param2:Int = 30) = param1 + param2
f3: (param1: String, param2: Int)String

scala> f3("Spark")
res13: String = Spark30

带名参数,可以调换参数顺序
scala> f3(param2 = 100, param1 = "Scala")
res14: String = Scala100

函数的可变参数
scala> def sum(numbers : Int*) = {var result = 0; for(element <- numbers) result += element;result}
sum: (numbers: Int*)Int

scala> sum(1,2,3,4,5,6,7,8,9,10)
res9: Int = 55

_*是把里面每个元素提取出来
scala> sum(1 to 100: _*)
res10: Int = 5050


过程的定义{}或者:Unit= 没有结果返回值为Unit
scala> def morning(content: String){println("Good " + content)}
morning: (content: String)Unit

scala> def morning(content: String): Unit = "Good " + content
morning: (content: String)Unit

而函数为
scala> def moring(content: String) = "Good " + content
moring: (content: String)String


lazy类型,只有变量第一次使用时才发生计算,例如打开文件,连接数据库,操作网络等耗时操作,是在使用时发生计算

scala> import scala.io.Source._
import scala.io.Source._

scala> lazy val content = fromFile("/tmp/1.txt").mkString
content: String = <lazy>

lazy类型变量在使用时才发生计算
scala> content
res15: String =
"Hello,World!
"

scala> val content = fromFile("/tmp/1.txt").mkString
content: String =
"Hello,World!
"

异常

import scala.io.Source._
import java.io.FileNotFoundException
try{
val content = fromFile("/root/txt").mkString
}catch{
case _: FileNotFoundException => println("Oops!!! File not found")
}finally{
println("This finally code")
}


scala> import scala.io.Source._
import scala.io.Source._

scala> import java.io.FileNotFoundException
import java.io.FileNotFoundException

scala> try{
| val content = fromFile("/root/txt".mkString)
| }catch{
| case _: FileNotFoundException => println("Oops!!! File not found")
| }finally{
| println("This finally code")
| }
Oops!!! File not found
This finally code

 

数组
scala> val arr = new Array[Int](5)
arr: Array[Int] = Array(0, 0, 0, 0, 0)

访问第4个元素
scala> arr(3)
res3: Int = 0

scala> val arr1 = Array("Scala", "Spark")
arr1: Array[String] = Array(Scala, Spark)

scala> val arr1 = Array.apply("Scala", "Spark")
arr1: Array[String] = Array(Scala, Spark)

可变数组
scala> import scala.collection.mutable._
import scala.collection.mutable._

scala> val arrBuffer = ArrayBuffer[Int]()
arrBuffer: scala.collection.mutable.ArrayBuffer[Int] = ArrayBuffer()

添加元素
scala> arrBuffer += 10
res6: arrBuffer.type = ArrayBuffer(10)

scala> arrBuffer += (11,12,13,4)
res7: arrBuffer.type = ArrayBuffer(10, 11, 12, 13, 4)

scala> arrBuffer ++= Array(1,2,3,4)
res8: arrBuffer.type = ArrayBuffer(10, 11, 12, 13, 4, 1, 2, 3, 4)

去掉后面3个元素
scala> arrBuffer.trimEnd(3)

在第5个位置插入元素
scala> arrBuffer.insert(5,100)
scala> arrBuffer.insert(5,1,2,3,4)

移除第1个位置的元素
scala> arrBuffer.remove(1)
res12: Int = 11

移除第2个位置的元素后面2个元素
scala> arrBuffer.remove(2,2)

转换为数组
scala> arrBuffer.toArray
res16: Array[Int] = Array(10, 13, 1)

数组转换为可变数组
scala> res16.toBuffer
res17: scala.collection.mutable.Buffer[Int] = ArrayBuffer(10, 13, 1)

遍历输出
scala> for (elem <- res16) println(elem)

倒序输出
scala> val arr2 = Array(10,11,1,3,4,100,1)
arr2: Array[Int] = Array(10, 11, 1, 3, 4, 100, 1)

scala> for(i <- (0 until arr2.length).reverse) println(arr2(i))
1
100
4
3
1
11
10

scala> for(i <- 0 until (arr2.length, 1)) println(arr2(i))
10
11
1
3
4
100
1

scala> for(i <- 0 until (arr2.length, 2)) println(arr2(i))
10
1
4
1

scala> for(i <- 0 until (arr2.length, 3)) println(arr2(i))
10
3
1

求和
scala> arr2.sum
res22: Int = 130

求最大值
scala> arr2.max
res23: Int = 100

排序
scala> scala.util.Sorting.quickSort(arr2)

scala> arr2
res25: Array[Int] = Array(1, 1, 3, 4, 10, 11, 100)

scala> arr2.mkString
res26: String = 11341011100

scala> arr2.mkString(", ")
res27: String = 1, 1, 3, 4, 10, 11, 100

scala> val arr3 = for(i <- arr2) yield i*i
arr3: Array[Int] = Array(1, 1, 9, 16, 100, 121, 10000)

scala> val arr3 = for(i <- arr2 if i % 3 == 0) yield i*i
arr3: Array[Int] = Array(9)

scala> arr2.filter(_%3 == 0).map(i => i*i)
res28: Array[Int] = Array(9)

Map的定义申明和使用
scala> val persons = Map("Spark" -> 6, "Hadoop" -> 11)
persons: scala.collection.immutable.Map[String,Int] = Map(Spark -> 6, Hadoop -> 11)

scala> persons("Hadoop")
res29: Int = 11

scala> val persons = scala.collection.mutable.Map("Spark" -> 6, "Hadoop" -> 11)
persons: scala.collection.mutable.Map[String,Int] = Map(Hadoop -> 11, Spark -> 6)

新增
scala> persons += ("Flink" -> 5)
res30: persons.type = Map(Hadoop -> 11, Spark -> 6, Flink -> 5)

移除
scala> persons -= "Flink"
res31: persons.type = Map(Hadoop -> 11, Spark -> 6)


取数据
scala> val sparkValue = if(persons.contains("Spark"))persons("Spark") else 1000
sparkValue: Int = 6

scala> val sparkValue = persons.getOrElse("Spark",1000)
sparkValue: Int = 6

遍历Map
scala> for((key,value) <- persons) println(key + " : " + value)
Hadoop : 11
Spark : 6

scala> for(key <- persons.keySet) println (key + " : ")
Hadoop :
Spark :

scala> val persons = scala.collection.immutable.SortedMap("Spark" -> 6, "Hadoop" -> 11)
persons: scala.collection.immutable.SortedMap[String,Int] = Map(Hadoop -> 11, Spark -> 6)

tuple类型
scala> val tuple = ("Spark",6,99.0)
tuple: (String, Int, Double) = (Spark,6,99.0)
索引从1开始的
scala> tuple._1
res35: String = Spark

Spark源码讲解
SparkContext


作业1:移除一个数组中第一个负数后的所有负数,不包括第一个负数
import scala.collection.mutable._
val array = ArrayBuffer[Int]()
array += (8, 3, 4, 5, -15, 8, -2, -100)
var firstNegative = false
var length = array.length
var indexed = 0
while(indexed < length) {
if(array(indexed) >= 0){
indexed += 1
} else {
if (!firstNegative) {
firstNegative = true
indexed += 1
} else {
array.remove(indexed)
length -= 1
}
}
}
发现第一个负数之后的每一个负数会立即进行移除,性能较差,多次移动数组

import scala.collection.mutable._
val array = ArrayBuffer[Int]()
array += (8, 3, 4, 5, -15, 8, -2, -100)
var firstNegative = false
val indexes = for(index <- 0 until array.length if !firstNegative || array(index) >= 0) yield {
if (array(index) < 0) firstNegative = true
index
}
for(index <- 0 until indexes.length) {
array(index) = array(indexes(index))
}
array.trimEnd(array.length - indexes.length)
记录所有不需要移除的元素的索引,最后一次性移除所有需要移除的元素,性能相对较高


def getFirstNegative(array:Array[Int]):Array[Int]={
val firstNegative = Array(0, 0)
for (i <- 0 until array.length) {
if (array(i) < 0) {
firstNegative(0) = i
firstNegative(1) = array(i)
return firstNegative
}
}
firstNegative
}
val array = Array(8, 3, 4, 5, -15, 8, -2, -100)
val firstNegative = getFirstNegative(array)
val result : scala.collection.mutable.Buffer[Int] = array.filter(_ >= 0).toBuffer
result.insert(firstNegative(0), firstNegative(1))
先找到第1个负数,移除所有负数后,再添加该负数

posted @ 2016-01-10 16:56  Jack葛  阅读(508)  评论(0编辑  收藏  举报