案例 读取分析json格式的Array

【1】数据  文件jsonArrayFile 数据如下
{"name":"zhangsan","age":18,"scores":[{"xueqi":1,"yuwen":98,"shuxue":90,"yingyu":100},{"xueqi":2,"yuwen":98,"shuxue":78,"yingyu":100}]}
{"name":"lisi","age":19,"scores":[{"xueqi":1,"yuwen":58,"shuxue":50,"yingyu":78},{"xueqi":2,"yuwen":56,"shuxue":76,"yingyu":13}]}
{"name":"wangwu","age":17,"scores":[{"xueqi":1,"yuwen":18,"shuxue":90,"yingyu":45},{"xueqi":2,"yuwen":76,"shuxue":42,"yingyu":45}]}
{"name":"zhaoliu","age":20,"scores":[{"xueqi":1,"yuwen":68,"shuxue":23,"yingyu":63},{"xueqi":2,"yuwen":23,"shuxue":45,"yingyu":87}]}
{"name":"tianqi","age":22,"scores":[{"xueqi":1,"yuwen":88,"shuxue":91,"yingyu":41},{"xueqi":2,"yuwen":56,"shuxue":79,"yingyu":45}]}

【2】scala代码实现

 
 1 package com.it.baizhan.scalacode.sparksql.examples
 2 
 3 import org.apache.spark.sql.SparkSession
 4 
 5 /**
 6   *  读取分析json格式的Array :
 7   *  explode(集合) : 一对多,将集合中数据转换成一行行的数据
 8   */
 9 object ReadJsonArrayData {
10   def main(args: Array[String]): Unit = {
11     val session = SparkSession.builder().appName("test").master("local").getOrCreate()
12     val frame = session.read.json("./data/jsonArrayFile")
13 
14     /**
15      * +---+--------+------------------------------------+
16      * |age|name    |scores                              |
17      * +---+--------+------------------------------------+
18      * |18 |zhangsan|[[90, 1, 100, 98], [78, 2, 100, 98]]|
19      * |19 |lisi    |[[50, 1, 78, 58], [76, 2, 13, 56]]  |
20      * |17 |wangwu  |[[90, 1, 45, 18], [42, 2, 45, 76]]  |
21      * |20 |zhaoliu |[[23, 1, 63, 68], [45, 2, 87, 23]]  |
22      * |22 |tianqi  |[[91, 1, 41, 88], [79, 2, 45, 56]]  |
23      * +---+--------+------------------------------------+
24      */
25     import session.implicits._
26     import org.apache.spark.sql.functions._
27 
28     val df1 = frame.select(frame.col("name"),frame.col("age"),explode(frame.col("scores")).as("el"))
29     df1.select($"name",col("age"),col("el.xueqi"),col("el.yuwen"),
30       col("el.shuxue"),col("el.yingyu")).show()
31 
32     /**
33      * +--------+---+-----+-----+------+------+
34      * |    name|age|xueqi|yuwen|shuxue|yingyu|
35      * +--------+---+-----+-----+------+------+
36      * |zhangsan| 18|    1|   98|    90|   100|
37      * |zhangsan| 18|    2|   98|    78|   100|
38      * |    lisi| 19|    1|   58|    50|    78|
39      * |    lisi| 19|    2|   56|    76|    13|
40      * |  wangwu| 17|    1|   18|    90|    45|
41      * |  wangwu| 17|    2|   76|    42|    45|
42      * | zhaoliu| 20|    1|   68|    23|    63|
43      * | zhaoliu| 20|    2|   23|    45|    87|
44      * |  tianqi| 22|    1|   88|    91|    41|
45      * |  tianqi| 22|    2|   56|    79|    45|
46      * +--------+---+-----+-----+------+------+
47      */
48 
49 
50 //    frame.createTempView("temp")
51 //    val df = session.sql(
52 //      """
53 //        | select
54 //        |   name,age ,el.xueqi,el.yuwen,el.shuxue,el.yingyu
55 //        | from
56 //        | (select name,age,explode(scores) as el  from temp ) t
57 //      """.stripMargin).show()
58   }
59 
60 }

 


posted @ 2021-03-04 11:40  大数据程序员  阅读(355)  评论(0编辑  收藏  举报