RDD的依赖关系

Posted on 2018-08-09 16:49  打杂滴  阅读(144)  评论(0编辑  收藏  举报


scala> val personRDD=sc.textFile("/tmp/person.txt")
personRDD: org.apache.spark.rdd.RDD[String] = /tmp/person.txt MapPartitionsRDD[39] at textFile at <console>:25

scala> val ageRDD=personRDD.map(x=>{val arr=x.split(",");(arr(2),1)})
ageRDD: org.apache.spark.rdd.RDD[(String, Int)] = MapPartitionsRDD[41] at map at <console>:27

scala> val grouprdd=ageRDD.groupByKey()
grouprdd: org.apache.spark.rdd.RDD[(String, Iterable[Int])] = ShuffledRDD[40] at groupByKey at <console>:29

scala> grouprdd.dependencies.foreach(dep=>{println(dep.getClass);println(dep.rdd);println(dep.rdd.partitions);println(dep.rdd.partitions.size)})
class org.apache.spark.ShuffleDependency
MapPartitionsRDD[34] at map at <console>:27
[Lorg.apache.spark.Partition;@2e33dd0d
2

scala> personRDD.dependencies.foreach(dep=>{println(dep.getClass);println(dep.rdd);println(dep.rdd.partitions);println(dep.rdd.partitions.size)})
class org.apache.spark.OneToOneDependency
/tmp/person.txt HadoopRDD[38] at textFile at <console>:25
[Lorg.apache.spark.Partition;@5b0f052f
2

scala> ageRDD.dependencies.foreach(dep=>{println(dep.getClass);println(dep.rdd);println(dep.rdd.partitions);println(dep.rdd.partitions.size)})
class org.apache.spark.OneToOneDependency
/tmp/person.txt MapPartitionsRDD[39] at textFile at <console>:25
[Lorg.apache.spark.Partition;@5b0f052f
2

 

Copyright © 2024 打杂滴
Powered by .NET 8.0 on Kubernetes