转换算子1---map,flatMap
- RDD支持的转换算子DStream大部分都是支持的
map、flatMap、filter、distinct、union、join、reduceByKey......
RDD中部分行动算子DStream会当作转换算子使用,算子的执行逻辑是一样的
| package SparkStreaming.trans |
| |
| import org.apache.spark.SparkConf |
| import org.apache.spark.storage.StorageLevel |
| import org.apache.spark.streaming.dstream.DStream |
| import org.apache.spark.streaming.{Milliseconds, StreamingContext} |
| |
| object Transform1 { |
| def main(args: Array[String]): Unit = { |
| val conf = new SparkConf().setAppName("trans1").setMaster("local[3]") |
| val ssc = new StreamingContext(conf, Milliseconds(5000)) |
| val ds = ssc.socketTextStream("node1", 44444, StorageLevel.MEMORY_ONLY_SER) |
| |
| |
| |
| |
| val ds1 = ds.map((line: String) => { |
| val str = line.replaceAll("操", "*").replaceAll("草", "*") |
| str |
| }) |
| |
| val ds2 = ds.filter((line: String) => { |
| if (line.contains("屮") || line.contains("尼玛")) { |
| false |
| } else { |
| true |
| } |
| }) |
| println("-------") |
| ds1.print() |
| println("-------") |
| ds2.print() |
| println("-------") |
| |
| ssc.start() |
| ssc.awaitTermination() |
| } |
| } |
转换算子2 count reduce
| package SparkStreaming.trans |
| |
| import org.apache.spark.SparkConf |
| import org.apache.spark.storage.StorageLevel |
| import org.apache.spark.streaming.dstream.DStream |
| import org.apache.spark.streaming.{Milliseconds, StreamingContext} |
| |
| |
| |
| |
| object Transform2 { |
| def main(args: Array[String]): Unit = { |
| val conf = new SparkConf().setAppName("trans2").setMaster("local[2]") |
| val ssc = new StreamingContext(conf, Milliseconds(3000)) |
| |
| val ds = ssc.socketTextStream("node1", 44444, StorageLevel.MEMORY_ONLY) |
| val ds1: DStream[Long] = ds.count() |
| val ds2 = ds.reduce(_ + _) |
| ds1.print() |
| ds2.print() |
| |
| ssc.start() |
| ssc.awaitTermination() |
| } |
| } |
- DStream中一个特殊转换算子transform
| package SparkStreaming.trans |
| |
| import org.apache.spark.SparkConf |
| import org.apache.spark.SparkContext |
| import org.apache.spark.rdd.RDD |
| import org.apache.spark.storage.StorageLevel |
| import org.apache.spark.streaming.dstream.DStream |
| import org.apache.spark.streaming.{Seconds, StreamingContext} |
| |
| object Transform3 { |
| def main(args: Array[String]): Unit = { |
| val conf = new SparkConf().setMaster("local[3]").setAppName("transform3") |
| val ssc = new StreamingContext(conf, Seconds(3)) |
| val ds = ssc.socketTextStream("node1", 44444, StorageLevel.MEMORY_ONLY) |
| val ds1:DStream[(String,Int)] = ds.flatMap(_.split(" ")).map((_, 1)).reduceByKey(_ + _) |
| val ds3 = ds.transform((rdd: RDD[String]) => { |
| val ds2: RDD[(String, Int)] = rdd.flatMap(_.split(" ")).map((_, 1)).reduceByKey(_ + _) |
| ds2 |
| }) |
| ds1.print() |
| ds3.print() |
| |
| ssc.start() |
| ssc.awaitTermination() |
| } |
| } |
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· TypeScript + Deepseek 打造卜卦网站:技术与玄学的结合
· Manus的开源复刻OpenManus初探
· AI 智能体引爆开源社区「GitHub 热点速览」
· 三行代码完成国际化适配,妙~啊~
· .NET Core 中如何实现缓存的预热?