spark streaming中join操作

  /**
   * Return a new DStream by applying 'join' between RDDs of `this` DStream and `other` DStream.
   * The supplied org.apache.spark.Partitioner is used to control the partitioning of each RDD.
   */
  def join[W: ClassTag](
      other: DStream[(K, W)],
      partitioner: Partitioner
    ): DStream[(K, (V, W))] = ssc.withScope {
    self.transformWith(
      other,
      (rdd1: RDD[(K, V)], rdd2: RDD[(K, W)]) => rdd1.join(rdd2, partitioner)
    实际还是调用RDD的join操作
) }

当应用于两个DStream(一个包含(K,V)键值对,一个包含(K,W)键值对),返回一个包含(K, (V, W))键值对的新Dstream

    val conf: SparkConf = new SparkConf().setMaster("local[*]").setAppName("wordcount")
    val sc = new StreamingContext(conf, Duration(10000))
    val lineStream: ReceiverInputDStream[String] = sc.socketTextStream("localhost", 9999)
    val DS1: DStream[(String, Int)] = lineStream.flatMap(_.split(" ")).map((_, 1))
    val lineStream2: ReceiverInputDStream[String] = sc.socketTextStream("localhost", 8888)
    val DS2: DStream[(String, Int)] = lineStream2.flatMap(_.split(" ")).map((_, 1))
    val DsSum: DStream[(String, (Int, Int))] = DS1.join(DS2)
    DsSum.print()
    sc.start()
    sc.awaitTermination()

 

posted @ 2024-01-23 16:44  会飞的猪仔  阅读(28)  评论(0编辑  收藏  举报