map与flatMap的区别

spark版本:spark 2.0.2

scala版本:2.11.8

服务器版本:CentOS 6.7

对比map和flatMap在RDD中的使用:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
val rdd1 = sc.parallelize(Seq(("one two three four five six seven"), ("one two three four five six seven"), ("one two three four five six seven")))
 
 
rdd1.map(_.split(" ")).collect
/*
res6: Array[Array[String]] = Array(Array(one, two, three, four, five, six, seven),
                                  Array(one, two, three, four, five, six, seven),
                                  Array(one, two, three, four, five, six, seven))
*/
 
rdd1.flatMap(_.split(" ")).collect
/*
res7: Array[String] = Array(one, two, three, four, five, six, seven,
                            one, two, three, four, five, six, seven,
                            one, two, three, four, five, six, seven)
*/
 
 
val rdd2 = sc.parallelize(Seq((1, "one two three four five six seven"), (2, "one two three four five six seven"), (3, "one two three four five six seven")))
 
rdd2.map(x => (x._1, x._2.split(" "))).collect
/*
res14: Array[(Int, Array[String])] = Array((1,Array(one, two, three, four, five, six, seven)),
                                           (2,Array(one, two, three, four, five, six, seven)),
                                           (3,Array(one, two, three, four, five, six, seven)))
*/
 
 
rdd2.map(x => {
val x2 = x._2.split(" ")
(x._1, x2.toIterable)}).collect
/*
res4: Array[(Int, Iterable[String])] = Array((1,WrappedArray(one, two, three, four, five, six, seven)),
                                             (2,WrappedArray(one, two, three, four, five, six, seven)),
                                             (3,WrappedArray(one, two, three, four, five, six, seven)))
*/
 
rdd2.map(x => {
val x2 = x._2.split(" ")
(x._1, x2.toIterable)}).flatMap{x =>    
val y = x._2
for (w <- y) yield (x._1, w)}.collect
 
/*
res7: Array[(Int, String)] = Array((1,one), (1,two), (1,three), (1,four), (1,five), (1,six), (1,seven),
                                   (2,one), (2,two), (2,three), (2,four), (2,five), (2,six), (2,seven),
                                   (3,one), (3,two), (3,three), (3,four), (3,five), (3,six), (3,seven))
*/

  

posted @   强迫症重症患者  阅读(8215)  评论(0编辑  收藏  举报
编辑推荐:
· AI与.NET技术实操系列:向量存储与相似性搜索在 .NET 中的实现
· 基于Microsoft.Extensions.AI核心库实现RAG应用
· Linux系列:如何用heaptrack跟踪.NET程序的非托管内存泄露
· 开发者必知的日志记录最佳实践
· SQL Server 2025 AI相关能力初探
阅读排行:
· winform 绘制太阳,地球,月球 运作规律
· 震惊!C++程序真的从main开始吗?99%的程序员都答错了
· AI与.NET技术实操系列(五):向量存储与相似性搜索在 .NET 中的实现
· 超详细:普通电脑也行Windows部署deepseek R1训练数据并当服务器共享给他人
· 【硬核科普】Trae如何「偷看」你的代码?零基础破解AI编程运行原理
点击右上角即可分享
微信分享提示