第五章_Spark核心编程_Rdd_转换算子_keyValue型_join&leftOuterJoin&rightOuterJoin&fullOuterJoin

 


1. join

复制代码
  /*
  * 1.定义
  *     def join[W](other: RDD[(K, W)]): RDD[(K, (V, W))]
  *     def join[W](other: RDD[(K, W)], numPartitions: Int): RDD[(K, (V, W))]
  * 2.功能
  *     将两个 类型为(K,V)和(K,W)的RDD 进行join,返回一个相同 key 对应的所有元素连接在一起的 (K,(V,W))的 RDD
  * */
  object joinTest extends App {

    val sparkconf: SparkConf = new SparkConf().setMaster("local").setAppName("distinctTest")

    val sc: SparkContext = new SparkContext(sparkconf)

    val rdd1: RDD[(Int, String)] = sc.makeRDD(List((1, "刘备"), (2, "张飞"), (3, "关羽"), (4, "曹操"), (5, "赵云"), (7, "孙权")), 2)
    val rdd2: RDD[(Int, String)] = sc.makeRDD(List((1, "蜀国"), (2, "蜀国"), (3, "蜀国"), (4, "魏国"), (5, "蜀国"), (6, "吴国")), 3)

    private val joinRdd: RDD[(Int, (String, String))] = rdd1.join(rdd2)

    joinRdd.collect().foreach(println(_))

    /*(3,(关羽,蜀国))
      (4,(曹操,魏国))
      (1,(刘备,蜀国))
      (5,(赵云,蜀国))
      (2,(张飞,蜀国))
    */

    sc.stop()
  }
复制代码

2.leftOuterJoin

复制代码
  /*
  * 1.定义
  *     def leftOuterJoin[W](other: RDD[(K, W)]): RDD[(K, (V, Option[W]))]
  *     def leftOuterJoin[W](other: RDD[(K, W)],numPartitions: Int): RDD[(K, (V, Option[W]))]
  * 2.功能
  *     将两个 类型为(K,V)和(K,W)的RDD 进行leftouterjoin,返回一个相同 key 对应的所有元素连接在一起的 (K,(V,W))的 RDD
  * */
  object leftOuterJoinTest extends App {

    val sparkconf: SparkConf = new SparkConf().setMaster("local").setAppName("distinctTest")

    val sc: SparkContext = new SparkContext(sparkconf)

    val rdd1: RDD[(Int, String)] = sc.makeRDD(List((1, "刘备"), (2, "张飞"), (3, "关羽"), (4, "曹操"), (5, "赵云"), (7, "孙权")), 2)
    val rdd2: RDD[(Int, String)] = sc.makeRDD(List((1, "蜀国"), (2, "蜀国"), (3, "蜀国"), (4, "魏国"), (5, "蜀国"), (6, "吴国")), 3)

    private val joinRdd: RDD[(Int, (String, Option[String]))] = rdd1.leftOuterJoin(rdd2)

    joinRdd.collect().foreach(println(_))

    /*(3,(关羽,Some(蜀国)))
      (4,(曹操,Some(魏国)))
      (1,(刘备,Some(蜀国)))
      (7,(孙权,None))
      (5,(赵云,Some(蜀国)))
      (2,(张飞,Some(蜀国)))
    */

    sc.stop()
  }
复制代码

3.rightOuterJoin

复制代码
  /*
  * 1.定义
  *     def rightOuterJoin[W](other: RDD[(K, W)]): RDD[(K, (Option[V], W))]
  *     def rightOuterJoin[W](other: RDD[(K, W)],numPartitions: Int): RDD[(K, (Option[V], W))]
  * 2.功能
  *     将两个 类型为(K,V)和(K,W)的RDD 进行leftouterjoin,返回一个相同 key 对应的所有元素连接在一起的 (K,(V,W))的 RDD
  * */
  object rightOuterJoinTest extends App {

    val sparkconf: SparkConf = new SparkConf().setMaster("local").setAppName("distinctTest")

    val sc: SparkContext = new SparkContext(sparkconf)

    val rdd1: RDD[(Int, String)] = sc.makeRDD(List((1, "刘备"), (2, "张飞"), (3, "关羽"), (4, "曹操"), (5, "赵云"), (7, "孙权")), 2)
    val rdd2: RDD[(Int, String)] = sc.makeRDD(List((1, "蜀国"), (2, "蜀国"), (3, "蜀国"), (4, "魏国"), (5, "蜀国"), (6, "吴国")), 3)

    private val joinRdd: RDD[(Int, (Option[String], String))] = rdd1.rightOuterJoin(rdd2)

    joinRdd.collect().foreach(println(_))

    /*(6,(None,吴国))
      (3,(Some(关羽),蜀国))
      (4,(Some(曹操),魏国))
      (1,(Some(刘备),蜀国))
      (5,(Some(赵云),蜀国))
      (2,(Some(张飞),蜀国))
    */

    sc.stop()
  }
复制代码

4.fullOuterJoin

复制代码
  /*
    * 1.定义
    *     def fullOuterJoin[W](other: RDD[(K, W)]): RDD[(K, (Option[V], Option[W]))]
    *     def fullOuterJoin[W](other: RDD[(K, W)],numPartitions: Int): RDD[(K, (Option[V], Option[W]))]
    * 2.功能
    *     将两个 类型为(K,V)和(K,W)的RDD 进行leftouterjoin,返回一个相同 key 对应的所有元素连接在一起的 (K,(V,W))的 RDD
    * */
  object fullOuterJoinTest extends App {

    val sparkconf: SparkConf = new SparkConf().setMaster("local").setAppName("distinctTest")

    val sc: SparkContext = new SparkContext(sparkconf)

    val rdd1: RDD[(Int, String)] = sc.makeRDD(List((1, "刘备"), (2, "张飞"), (3, "关羽"), (4, "曹操"), (5, "赵云"), (7, "孙权")), 2)
    val rdd2: RDD[(Int, String)] = sc.makeRDD(List((1, "蜀国"), (2, "蜀国"), (3, "蜀国"), (4, "魏国"), (5, "蜀国"), (6, "吴国")), 3)

    private val joinRdd = rdd1.fullOuterJoin(rdd2)

    joinRdd.collect().foreach(println(_))

    /*(6,(None,Some(吴国)))
      (3,(Some(关羽),Some(蜀国)))
      (4,(Some(曹操),Some(魏国)))
      (1,(Some(刘备),Some(蜀国)))
      (7,(Some(孙权),None))
      (5,(Some(赵云),Some(蜀国)))
      (2,(Some(张飞),Some(蜀国)))
    */

    sc.stop()
  }
复制代码

 

posted @   学而不思则罔!  阅读(47)  评论(0编辑  收藏  举报
相关博文:
阅读排行:
· 10年+ .NET Coder 心语 ── 封装的思维:从隐藏、稳定开始理解其本质意义
· 地球OL攻略 —— 某应届生求职总结
· 提示词工程——AI应用必不可少的技术
· 字符编码:从基础到乱码解决
· SpringCloud带你走进微服务的世界
点击右上角即可分享
微信分享提示