sparksql中的集合类型
df = spark.createDataFrame([('LC7-H6116BCF-R-GL-201116V750Fans', '张三', 88), ('语文', '张三', 92), ('英语', '张三', 77), ('数学', '王五', 65), ('语文', '王五', 87), ('英语', '王五', 90), ('数学', '李雷', 67), ('语文', '李雷', 33), ('英语', '李雷', 24), ('数学', '宫九', 77), ('语文', '宫九', 87) ], ['subject', 'name', 'score']) df.createOrReplaceTempView('t1') spark.sql( "select subject,collect_set(Array(name,score)) as c" " from t1" " group by subject" ).show()