spark streaming中join操作
/** * Return a new DStream by applying 'join' between RDDs of `this` DStream and `other` DStream. * The supplied org.apache.spark.Partitioner is used to control the partitioning of each RDD. */ def join[W: ClassTag]( other: DStream[(K, W)], partitioner: Partitioner ): DStream[(K, (V, W))] = ssc.withScope { self.transformWith( other, (rdd1: RDD[(K, V)], rdd2: RDD[(K, W)]) => rdd1.join(rdd2, partitioner)
实际还是调用RDD的join操作
) }
当应用于两个DStream(一个包含(K,V)键值对,一个包含(K,W)键值对),返回一个包含(K, (V, W))键值对的新Dstream
val conf: SparkConf = new SparkConf().setMaster("local[*]").setAppName("wordcount") val sc = new StreamingContext(conf, Duration(10000)) val lineStream: ReceiverInputDStream[String] = sc.socketTextStream("localhost", 9999) val DS1: DStream[(String, Int)] = lineStream.flatMap(_.split(" ")).map((_, 1)) val lineStream2: ReceiverInputDStream[String] = sc.socketTextStream("localhost", 8888) val DS2: DStream[(String, Int)] = lineStream2.flatMap(_.split(" ")).map((_, 1)) val DsSum: DStream[(String, (Int, Int))] = DS1.join(DS2) DsSum.print() sc.start() sc.awaitTermination()