Transform

  /**
   * Return a new DStream in which each RDD is generated by applying a function
   * on each RDD of 'this' DStream.
　　在调用的stream的RDD中应用传入的函数，返回一个新的DStream 
   */
  def transform[U: ClassTag](transformFunc: RDD[T] => RDD[U]): DStream[U] = ssc.withScope {
    // because the DStream is reachable from the outer object here, and because
    // DStreams can't be serialized with closures, we can't proactively check
    // it for serializability and so we pass the optional false to SparkContext.clean
    val cleanedF = context.sparkContext.clean(transformFunc, false)
    transform((r: RDD[T], _: Time) => cleanedF(r))
  }

  /**
   * Return a new DStream in which each RDD is generated by applying a function
   * on each RDD of 'this' DStream.
   */
  def transform[U: ClassTag](transformFunc: (RDD[T], Time) => RDD[U]): DStream[U] = ssc.withScope {
    // because the DStream is reachable from the outer object here, and because
    // DStreams can't be serialized with closures, we can't proactively check
    // it for serializability and so we pass the optional false to SparkContext.clean
    val cleanedF = context.sparkContext.clean(transformFunc, false)
    val realTransformFunc = (rdds: Seq[RDD[_]], time: Time) => {
      assert(rdds.length == 1)
      cleanedF(rdds.head.asInstanceOf[RDD[T]], time)
    }
    new TransformedDStream[U](Seq(this), realTransformFunc)
  }

Transform 允许 DStream 上执行任意的 RDD-to-RDD 函数。即使这些函数并没有在 DStream的 API 中暴露出来，通过该函数可以方便的扩展 Spark API。该函数每一批次调度一次。其实也就是对 DStream 中的 RDD 应用转换。通俗的讲可以对每个批次中的DStream封装的RDD进行操作，可以直接使用RDD的算子

    val conf: SparkConf = new SparkConf().setMaster("local[*]").setAppName("wordcount")
    val sc = new StreamingContext(conf, Duration(3000))
    val lineStream: ReceiverInputDStream[String] = sc.socketTextStream("localhost", 9999)
    val wordAndCountDs: DStream[(String, Int)] = lineStream.transform(rdd => {
      val words: RDD[String] = rdd.flatMap(_.split(" "))
      val wordMap: RDD[(String, Int)] = words.map((_, 1))
      val reduceSum: RDD[(String, Int)] = wordMap.reduceByKey(_ + _)
      reduceSum
    })
    wordAndCountDs.print()
    sc.start()
    sc.awaitTermination()

posted @ 2024-01-23 15:45 会飞的猪仔阅读(27) 评论(0) 编辑收藏举报

刷新页面返回顶部

会飞的猪仔

Transform

公告