Spark2 Dataset分析函数--排名函数row_number,rank,dense_rank,percent_rank
select gender,
age,
row_number() over(partition by gender order by age) as rowNumber,
rank() over(partition by gender order by age) as ranks,
dense_rank() over(partition by gender order by age) as denseRank,
percent_rank() over(partition by gender order by age) as percentRank
from Affairs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 | val spark = SparkSession.builder().appName( "Spark SQL basic example" ).config( "spark.some.config.option" , "some-value" ).getOrCreate() // For implicit conversions like converting RDDs to DataFrames import spark.implicits. _ val dataList : List[(Double, String, Double, Double, String, Double, Double, Double, Double)] = List( ( 0 , "male" , 37 , 10 , "no" , 3 , 18 , 7 , 4 ), ( 0 , "female" , 27 , 4 , "no" , 4 , 14 , 6 , 4 ), ( 0 , "female" , 32 , 15 , "yes" , 1 , 12 , 1 , 4 ), ( 0 , "male" , 57 , 15 , "yes" , 5 , 18 , 6 , 5 ), ( 0 , "male" , 22 , 0.75 , "no" , 2 , 17 , 6 , 3 ), ( 0 , "female" , 32 , 1.5 , "no" , 2 , 17 , 5 , 5 ), ( 0 , "female" , 22 , 0.75 , "no" , 2 , 12 , 1 , 3 ), ( 0 , "male" , 57 , 15 , "yes" , 2 , 14 , 4 , 4 ), ( 0 , "female" , 32 , 15 , "yes" , 4 , 16 , 1 , 2 ), ( 0 , "male" , 22 , 1.5 , "no" , 4 , 14 , 4 , 5 ), ( 0 , "male" , 37 , 15 , "yes" , 2 , 20 , 7 , 2 ), ( 0 , "male" , 27 , 4 , "yes" , 4 , 18 , 6 , 4 ), ( 0 , "male" , 47 , 15 , "yes" , 5 , 17 , 6 , 4 ), ( 0 , "female" , 22 , 1.5 , "no" , 2 , 17 , 5 , 4 ), ( 0 , "female" , 27 , 4 , "no" , 4 , 14 , 5 , 4 ), ( 0 , "female" , 37 , 15 , "yes" , 1 , 17 , 5 , 5 ), ( 0 , "female" , 37 , 15 , "yes" , 2 , 18 , 4 , 3 ), ( 0 , "female" , 22 , 0.75 , "no" , 3 , 16 , 5 , 4 ), ( 0 , "female" , 22 , 1.5 , "no" , 2 , 16 , 5 , 5 ), ( 0 , "female" , 27 , 10 , "yes" , 2 , 14 , 1 , 5 ), ( 0 , "female" , 22 , 1.5 , "no" , 2 , 16 , 5 , 5 ), ( 0 , "female" , 22 , 1.5 , "no" , 2 , 16 , 5 , 5 ), ( 0 , "female" , 27 , 10 , "yes" , 4 , 16 , 5 , 4 ), ( 0 , "female" , 32 , 10 , "yes" , 3 , 14 , 1 , 5 ), ( 0 , "male" , 37 , 4 , "yes" , 2 , 20 , 6 , 4 )) val data = dataList.toDF( "affairs" , "gender" , "age" , "yearsmarried" , "children" , "religiousness" , "education" , "occupation" , "rating" ) data.printSchema() // 创建视图 data.createOrReplaceTempView( "Affairs" ) val s 1 = "row_number() over(partition by gender order by age) as rowNumber," val s 2 = "rank() over(partition by gender order by age) as ranks," val s 3 = "dense_rank() over(partition by gender order by age) as denseRank," val s 4 = "percent_rank() over(partition by gender order by age) as percentRank" val df 8 = spark.sql( "select gender,age," +s 1 +s 2 +s 3 +s 4 + " from Affairs" ) df 8 .show( 50 ) +------+----+---------+-----+---------+------------------+ |gender| age|rowNumber|ranks|denseRank| percentRank| +------+----+---------+-----+---------+------------------+ |female| 22.0 | 1 | 1 | 1 | 0.0 | |female| 22.0 | 2 | 1 | 1 | 0.0 | |female| 22.0 | 3 | 1 | 1 | 0.0 | |female| 22.0 | 4 | 1 | 1 | 0.0 | |female| 22.0 | 5 | 1 | 1 | 0.0 | |female| 22.0 | 6 | 1 | 1 | 0.0 | |female| 27.0 | 7 | 7 | 2 | 0.4 | |female| 27.0 | 8 | 7 | 2 | 0.4 | |female| 27.0 | 9 | 7 | 2 | 0.4 | |female| 27.0 | 10 | 7 | 2 | 0.4 | |female| 32.0 | 11 | 11 | 3 | 0.6666666666666666 | |female| 32.0 | 12 | 11 | 3 | 0.6666666666666666 | |female| 32.0 | 13 | 11 | 3 | 0.6666666666666666 | |female| 32.0 | 14 | 11 | 3 | 0.6666666666666666 | |female| 37.0 | 15 | 15 | 4 | 0.9333333333333333 | |female| 37.0 | 16 | 15 | 4 | 0.9333333333333333 | | male| 22.0 | 1 | 1 | 1 | 0.0 | | male| 22.0 | 2 | 1 | 1 | 0.0 | | male| 27.0 | 3 | 3 | 2 | 0.25 | | male| 37.0 | 4 | 4 | 3 | 0.375 | | male| 37.0 | 5 | 4 | 3 | 0.375 | | male| 37.0 | 6 | 4 | 3 | 0.375 | | male| 47.0 | 7 | 7 | 4 | 0.75 | | male| 57.0 | 8 | 8 | 5 | 0.875 | | male| 57.0 | 9 | 8 | 5 | 0.875 | +------+----+---------+-----+---------+------------------+ |
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 从 HTTP 原因短语缺失研究 HTTP/2 和 HTTP/3 的设计差异
· AI与.NET技术实操系列:向量存储与相似性搜索在 .NET 中的实现
· 基于Microsoft.Extensions.AI核心库实现RAG应用
· Linux系列:如何用heaptrack跟踪.NET程序的非托管内存泄露
· 开发者必知的日志记录最佳实践
· TypeScript + Deepseek 打造卜卦网站:技术与玄学的结合
· Manus的开源复刻OpenManus初探
· AI 智能体引爆开源社区「GitHub 热点速览」
· C#/.NET/.NET Core技术前沿周刊 | 第 29 期(2025年3.1-3.9)
· 从HTTP原因短语缺失研究HTTP/2和HTTP/3的设计差异