Spark笔记(二):RDD操作02

package com.summer.test
import org.apache.spark.sql.SparkSession

object test01 {
  def main(args: Array[String]): Unit = {
    val ss = SparkSession.builder().master("local[2]").appName("summer").getOrCreate()
    val sc=ss.sparkContext
     //(水果名#重量#单价)
    val fruitListRdd=sc.parallelize(List("apple#1#1.0","banana#2#2.0","orange#3#3.0","apple#4#4.0","banana#5#5.0","apple#6#6.0","strawberry","pear#7#7.0"))
    println("==fruitListRdd: "+fruitListRdd.collect.toBuffer)
    //结果 ==fruitListRdd: ArrayBuffer(apple#1#1.0, banana#2#2.0, orange#3#3.0, apple#4#4.0, banana#5#5.0, apple#6#6.0, strawberry)

    val fruitListRdd2=fruitListRdd.map(x=>{
       try{
         val arr=x.split("#")
         val fName=arr(0)
         val fWeight=arr(1).toDouble
         val fPrice=arr(2).toDouble
         (fName,fWeight+"#"+fPrice)
       }catch{
         case e:Exception =>("","")
       }
    }).filter(x=> !x._2.isEmpty && null!=x._2)//过滤掉不符合: 水果名#重量#单价
    println("==fruitListRdd2:  "+fruitListRdd2.collect.toBuffer)
    //结果 ==fruitListRdd2:  ArrayBuffer((apple,1.0#1.0), (banana,2.0#2.0), (orange,3.0#3.0), (apple,4.0#4.0), (banana,5.0#5.0), (apple,6.0#6.0))

    val totalFruitRdd=fruitListRdd2.reduceByKey{ (x,y) => //reduceByKey,合并key相同的键值对
      val arr1=x.split("#")
      val xWeight=arr1(0).toDouble
      val xPrice=arr1(1).toDouble
      val arr2=y.split("#")
      val yWeight=arr2(0).toDouble
      val yPrice=arr2(1).toDouble
      val totalNumber=xWeight+yWeight
      val totalPrice=(xWeight*xPrice+yWeight*yPrice)/totalNumber
      totalNumber+"#"+totalPrice.formatted("%.2f")
    }
    println("===totalFruitRdd:  "+totalFruitRdd.collect.toBuffer)
    //===totalFruitRdd:  ArrayBuffer((orange,3.0#3.0), (apple,11.0#4.82), (banana,7.0#4.14))
   }
}

 

posted @ 2019-02-21 19:42  等木鱼的猫  阅读(157)  评论(0)    收藏  举报