SparkSQL行列变化

【1】数据  文件 rowcolumnData 数据如下

username,item,price
zhangsan,A,1
zhangsan,B,2
zhangsan,C,3
lisi,A,4
lisi,C,5
zhangsan,D,6
lisi,B,7
wangwu,C,8

【2】scala代码实现

  1 package com.it.baizhan.scalacode.sparksql.examples
  2 
  3 import org.apache.spark.sql.SparkSession
  4 
  5 /**
  6   *  SparkSQL行列变化:
  7   *     +--------+----+-----+
  8   *     |username|item|price|          +--------+----+----+---+----+
  9   *     +--------+----+-----+          |username|   A|   B|  C|   D|
 10   *     |zhangsan|   A|    1|          +--------+----+----+---+----+
 11   *     |zhangsan|   B|    2|          |  wangwu|null|null|  8|null|
 12   *     |zhangsan|   C|    3|          |zhangsan|   1|   2|  3|   6|
 13   *     |    lisi|   A|    4|          |    lisi|   4|   7|  5|null|
 14   *     |    lisi|   C|    5|          +--------+----+----+---+----+
 15   *     |zhangsan|   D|    6|
 16   *     |    lisi|   B|    7|
 17   *     |  wangwu|   C|    8|
 18   *     +--------+----+-----+
 19   *    SQL函数:
 20   *     str_to_map(字段,分隔符1,分隔符2) : 把当前字符串字段按照分隔符1切分成多条数据,再对每条数据按照分隔符2切割成K,V格式的数据组成Map
 21   *     map(K1,V1,K2,V2,K3,V3... ...) : 得到一个map集合
 22   */
 23 object RowColumnTransfer2 {
 24   def main(args: Array[String]): Unit = {
 25     val session = SparkSession.builder().appName("test").master("local").getOrCreate()
 26     session.sparkContext.setLogLevel("Error")
 27     val frame = session.read.option("header",true).csv("./data/rowcolumnData")
 28     frame.createTempView("temp1")
 29     session.sql(
 30       """
 31         |  select
 32         |     username,concat_ws("#",collect_list(concat(item,",",price))) as cw
 33         |  from temp1
 34         |  group by username
 35       """.stripMargin).createTempView("temp2")
 36 
 37     /**
 38      * +--------+---------------+
 39      * |username|cw             |
 40      * +--------+---------------+
 41      * |wangwu  |C,8            |
 42      * |zhangsan|A,1#B,2#C,3#D,6|
 43      * |lisi    |A,4#C,5#B,7    |
 44      * +--------+---------------+
 45      */
 46     session.sql(
 47       """
 48         | select
 49         |   username,str_to_map(cw,"#",",") as mp
 50         | from temp2
 51       """.stripMargin).createTempView("temp3")
 52 
 53     /**
 54      * +--------+--------------------------------+
 55      * |username|mp                              |
 56      * +--------+--------------------------------+
 57      * |wangwu  |[C -> 8]                        |
 58      * |zhangsan|[A -> 1, B -> 2, C -> 3, D -> 6]|
 59      * |lisi    |[A -> 4, C -> 5, B -> 7]        |
 60      * +--------+--------------------------------+
 61      */
 62 
 63     session.sql(
 64       """
 65         | select username ,mp['A'] as A,mp['B'] as B ,mp['C'] as C ,mp['D'] as D
 66         | from temp3
 67       """.stripMargin).createTempView("temp4")
 68 
 69     /**
 70      * +--------+----+----+---+----+
 71      * |username|A   |B   |C  |D   |
 72      * +--------+----+----+---+----+
 73      * |wangwu  |null|null|8  |null|
 74      * |zhangsan|1   |2   |3  |6   |
 75      * |lisi    |4   |7   |5  |null|
 76      * +--------+----+----+---+----+
 77      */
 78 
 79     session.sql(
 80       """
 81         | select username,item,price
 82         | from
 83         | (select
 84         |   username,explode(map("A",A,"B",B,"C",C,"D",D)) as (item,price)
 85         | from temp4) ttt
 86         | where price is not null
 87       """.stripMargin).show(false)
 88 
 89     /**
 90      * +--------+----+-----+
 91      * |username|item|price|
 92      * +--------+----+-----+
 93      * |wangwu  |C   |8    |
 94      * |zhangsan|A   |1    |
 95      * |zhangsan|B   |2    |
 96      * |zhangsan|C   |3    |
 97      * |zhangsan|D   |6    |
 98      * |lisi    |A   |4    |
 99      * |lisi    |B   |7    |
100      * |lisi    |C   |5    |
101      * +--------+----+-----+
102      */
103   }
104 
105 }
posted @ 2021-03-04 16:30  大数据程序员  阅读(121)  评论(0编辑  收藏  举报