package com.summer.test
import org.apache.spark.sql.SparkSession
object test01 {
def main(args: Array[String]): Unit = {
val ss = SparkSession.builder().master("local[2]").appName("summer").getOrCreate()
val sc=ss.sparkContext
//(水果名#重量#单价)
val fruitListRdd=sc.parallelize(List("apple#1#1.0","banana#2#2.0","orange#3#3.0","apple#4#4.0","banana#5#5.0","apple#6#6.0","strawberry","pear#7#7.0"))
println("==fruitListRdd: "+fruitListRdd.collect.toBuffer)
//结果 ==fruitListRdd: ArrayBuffer(apple#1#1.0, banana#2#2.0, orange#3#3.0, apple#4#4.0, banana#5#5.0, apple#6#6.0, strawberry)
val fruitListRdd2=fruitListRdd.map(x=>{
try{
val arr=x.split("#")
val fName=arr(0)
val fWeight=arr(1).toDouble
val fPrice=arr(2).toDouble
(fName,fWeight+"#"+fPrice)
}catch{
case e:Exception =>("","")
}
}).filter(x=> !x._2.isEmpty && null!=x._2)//过滤掉不符合: 水果名#重量#单价
println("==fruitListRdd2: "+fruitListRdd2.collect.toBuffer)
//结果 ==fruitListRdd2: ArrayBuffer((apple,1.0#1.0), (banana,2.0#2.0), (orange,3.0#3.0), (apple,4.0#4.0), (banana,5.0#5.0), (apple,6.0#6.0))
val totalFruitRdd=fruitListRdd2.reduceByKey{ (x,y) => //reduceByKey,合并key相同的键值对
val arr1=x.split("#")
val xWeight=arr1(0).toDouble
val xPrice=arr1(1).toDouble
val arr2=y.split("#")
val yWeight=arr2(0).toDouble
val yPrice=arr2(1).toDouble
val totalNumber=xWeight+yWeight
val totalPrice=(xWeight*xPrice+yWeight*yPrice)/totalNumber
totalNumber+"#"+totalPrice.formatted("%.2f")
}
println("===totalFruitRdd: "+totalFruitRdd.collect.toBuffer)
//===totalFruitRdd: ArrayBuffer((orange,3.0#3.0), (apple,11.0#4.82), (banana,7.0#4.14))
}
}