读取 vpnlog 日志文件,计算这一天,每个小时在线用户数和各个用户的在线总时长,在线次数,最大在线时长

【1】数据 文件vpnlog 数据如下
{"userName":"a","ts":"2020-04-07T00:12:02.000Z","type":"logout"}
{"userName":"a","ts":"2020-04-07T00:25:36.000Z","type":"login"}
{"userName":"a","ts":"2020-04-07T01:45:36.000Z","type":"logout"}
{"userName":"a","ts":"2020-04-07T03:15:23.000Z","type":"login"}
{"userName":"a","ts":"2020-04-07T04:25:57.000Z","type":"logout"}
{"userName":"a","ts":"2020-04-07T05:04:36.000Z","type":"login"}
{"userName":"a","ts":"2020-04-07T07:08:32.000Z","type":"logout"}
{"userName":"a","ts":"2020-04-07T08:09:00.000Z","type":"login"}
{"userName":"a","ts":"2020-04-07T12:15:43.000Z","type":"logout"}
{"userName":"a","ts":"2020-04-07T16:35:18.000Z","type":"login"}
{"userName":"a","ts":"2020-04-07T19:48:36.000Z","type":"logout"}
{"userName":"a","ts":"2020-04-07T21:25:36.000Z","type":"login"}
{"userName":"a","ts":"2020-04-07T21:35:36.000Z","type":"logout"}
{"userName":"a","ts":"2020-04-07T21:40:36.000Z","type":"login"}
{"userName":"a","ts":"2020-04-07T22:15:36.000Z","type":"logout"}
{"userName":"a","ts":"2020-04-07T23:17:21.000Z","type":"login"}
{"userName":"b","ts":"2020-04-07T00:25:36.000Z","type":"login"}
{"userName":"b","ts":"2020-04-07T01:45:36.000Z","type":"logout"}
{"userName":"b","ts":"2020-04-07T03:15:23.000Z","type":"login"}
{"userName":"b","ts":"2020-04-07T04:25:57.000Z","type":"logout"}
{"userName":"b","ts":"2020-04-07T05:04:36.000Z","type":"login"}
{"userName":"b","ts":"2020-04-07T07:08:32.000Z","type":"logout"}
{"userName":"b","ts":"2020-04-07T10:08:32.000Z","type":"login"}
{"userName":"b","ts":"2020-04-07T12:15:43.000Z","type":"logout"}
{"userName":"b","ts":"2020-04-07T16:35:18.000Z","type":"login"}
{"userName":"b","ts":"2020-04-07T19:48:36.000Z","type":"logout"}
{"userName":"b","ts":"2020-04-07T21:25:36.000Z","type":"login"}
{"userName":"b","ts":"2020-04-07T21:35:36.000Z","type":"logout"}
{"userName":"b","ts":"2020-04-07T21:50:36.000Z","type":"login"}
{"userName":"b","ts":"2020-04-07T22:15:36.000Z","type":"logout"}
{"userName":"b","ts":"2020-04-07T23:17:21.000Z","type":"login"}

【2】Scala代码实现

 

  1 package com.it.baizhan.scalacode.sparksql.examples
  2 
  3 import java.text.SimpleDateFormat
  4 import java.util.Calendar
  5 
  6 import org.apache.spark.sql.SparkSession
  7 
  8 import scala.collection.mutable.ListBuffer
  9 
 10 /**
 11   *  需求:
 12   *     读取 vpnlog 日志文件,其中userName为用户名,ts为记录时间,当type为login时,为登入时间,type为logout为登出时间
 13   *     问题:
 14   *     1)这一天,每个小时在线用户数
 15   *     2)计算这一天,各个用户的在线总时长,在线次数,最大在线时长
 16   *     (如果用户一天开始是登出记录,则认为他零点登入,如果一天结束时登入日志,则认为他24点登出)
 17   *     要求使用SparkSQL实现,并给出计算逻辑说明
 18   *
 19   *     SQL函数:
 20   *       replace(列,字符串1,字符串2) :对某列的数据查找字符串1替换成字符串2
 21   */
 22 object VpnLog {
 23   def main(args: Array[String]): Unit = {
 24     val session = SparkSession.builder().master("local").appName("test").getOrCreate()
 25     session.sparkContext.setLogLevel("Error")
 26     //读取json格式的数据
 27     val df = session.read.json("./data/vpnlog")
 28     df.createTempView("temp1")
 29 
 30     //对时间进行转换
 31     session.sql(
 32       """
 33         | select
 34         |   username,replace(replace(ts,"T"," "),".000Z","") as ts,type
 35         | from temp1
 36       """.stripMargin).createTempView("temp2")
 37 
 38     /**
 39      * +--------+-------------------+------+
 40      * |username|ts                 |type  |
 41      * +--------+-------------------+------+
 42      * |a       |2020-04-07 00:12:02|logout|
 43      * |a       |2020-04-07 00:25:36|login |
 44      * |a       |2020-04-07 01:45:36|logout|
 45      * |a       |2020-04-07 03:15:23|login |
 46      * |a       |2020-04-07 04:25:57|logout|
 47      * |a       |2020-04-07 05:04:36|login |
 48      * |a       |2020-04-07 07:08:32|logout|
 49      * |a       |2020-04-07 08:09:00|login |
 50      * |a       |2020-04-07 12:15:43|logout|
 51      * |a       |2020-04-07 16:35:18|login |
 52      * |a       |2020-04-07 19:48:36|logout|
 53      * |a       |2020-04-07 21:25:36|login |
 54      * |a       |2020-04-07 21:35:36|logout|
 55      * |a       |2020-04-07 21:40:36|login |
 56      * |a       |2020-04-07 22:15:36|logout|
 57      * |a       |2020-04-07 23:17:21|login |
 58      * |b       |2020-04-07 00:25:36|login |
 59      * |b       |2020-04-07 01:45:36|logout|
 60      * |b       |2020-04-07 03:15:23|login |
 61      * |b       |2020-04-07 04:25:57|logout|
 62      * |b       |2020-04-07 05:04:36|login |
 63      * |b       |2020-04-07 07:08:32|logout|
 64      * |b       |2020-04-07 10:08:32|login |
 65      * |b       |2020-04-07 12:15:43|logout|
 66      * |b       |2020-04-07 16:35:18|login |
 67      * |b       |2020-04-07 19:48:36|logout|
 68      * |b       |2020-04-07 21:25:36|login |
 69      * |b       |2020-04-07 21:35:36|logout|
 70      * |b       |2020-04-07 21:50:36|login |
 71      * |b       |2020-04-07 22:15:36|logout|
 72      * |b       |2020-04-07 23:17:21|login |
 73      * +--------+-------------------+------+
 74      */
 75 
 76     //给表中的数据打标号
 77     session.sql(
 78       """
 79         | select
 80         |     username,ts,type,row_number() over(partition by username order by ts ) as rank
 81         | from temp2
 82       """.stripMargin).createTempView("temp3")
 83 
 84     /**
 85      * +--------+-------------------+------+----+
 86      * |username|ts                 |type  |rank|
 87      * +--------+-------------------+------+----+
 88      * |b       |2020-04-07 00:25:36|login |1   |
 89      * |b       |2020-04-07 01:45:36|logout|2   |
 90      * |b       |2020-04-07 03:15:23|login |3   |
 91      * |b       |2020-04-07 04:25:57|logout|4   |
 92      * |b       |2020-04-07 05:04:36|login |5   |
 93      * |b       |2020-04-07 07:08:32|logout|6   |
 94      * |b       |2020-04-07 10:08:32|login |7   |
 95      * |b       |2020-04-07 12:15:43|logout|8   |
 96      * |b       |2020-04-07 16:35:18|login |9   |
 97      * |b       |2020-04-07 19:48:36|logout|10  |
 98      * |b       |2020-04-07 21:25:36|login |11  |
 99      * |b       |2020-04-07 21:35:36|logout|12  |
100      * |b       |2020-04-07 21:50:36|login |13  |
101      * |b       |2020-04-07 22:15:36|logout|14  |
102      * |b       |2020-04-07 23:17:21|login |15  |
103      * |a       |2020-04-07 00:12:02|logout|1   |
104      * |a       |2020-04-07 00:25:36|login |2   |
105      * |a       |2020-04-07 01:45:36|logout|3   |
106      * |a       |2020-04-07 03:15:23|login |4   |
107      * |a       |2020-04-07 04:25:57|logout|5   |
108      * |a       |2020-04-07 05:04:36|login |6   |
109      * |a       |2020-04-07 07:08:32|logout|7   |
110      * |a       |2020-04-07 08:09:00|login |8   |
111      * |a       |2020-04-07 12:15:43|logout|9   |
112      * |a       |2020-04-07 16:35:18|login |10  |
113      * |a       |2020-04-07 19:48:36|logout|11  |
114      * |a       |2020-04-07 21:25:36|login |12  |
115      * |a       |2020-04-07 21:35:36|logout|13  |
116      * |a       |2020-04-07 21:40:36|login |14  |
117      * |a       |2020-04-07 22:15:36|logout|15  |
118      * |a       |2020-04-07 23:17:21|login |16  |
119      * +--------+-------------------+------+----+
120      */
121 
122     //进行自关联,错位匹配
123     session.sql(
124       """
125         | select a.username as username1 ,a.ts as ts1,a.type as type1,a.rank as rank1,
126         |        b.username as username2 ,b.ts as ts2,b.type as type2,b.rank as rank2
127         | from temp3 a full outer join temp3 b on a.username = b.username and a.rank = b.rank-1
128         | order by a.username,a.ts
129       """.stripMargin).createTempView("temp4")
130 
131     /**
132      * +---------+-------------------+------+-----+---------+-------------------+------+-----+
133      * |username1|ts1                |type1 |rank1|username2|ts2                |type2 |rank2|
134      * +---------+-------------------+------+-----+---------+-------------------+------+-----+
135      * |null     |null               |null  |null |b        |2020-04-07 00:25:36|login |1    |
136      * |null     |null               |null  |null |a        |2020-04-07 00:12:02|logout|1    |
137      * |a        |2020-04-07 00:12:02|logout|1    |a        |2020-04-07 00:25:36|login |2    |
138      * |a        |2020-04-07 00:25:36|login |2    |a        |2020-04-07 01:45:36|logout|3    |
139      * |a        |2020-04-07 01:45:36|logout|3    |a        |2020-04-07 03:15:23|login |4    |
140      * |a        |2020-04-07 03:15:23|login |4    |a        |2020-04-07 04:25:57|logout|5    |
141      * |a        |2020-04-07 04:25:57|logout|5    |a        |2020-04-07 05:04:36|login |6    |
142      * |a        |2020-04-07 05:04:36|login |6    |a        |2020-04-07 07:08:32|logout|7    |
143      * |a        |2020-04-07 07:08:32|logout|7    |a        |2020-04-07 08:09:00|login |8    |
144      * |a        |2020-04-07 08:09:00|login |8    |a        |2020-04-07 12:15:43|logout|9    |
145      * |a        |2020-04-07 12:15:43|logout|9    |a        |2020-04-07 16:35:18|login |10   |
146      * |a        |2020-04-07 16:35:18|login |10   |a        |2020-04-07 19:48:36|logout|11   |
147      * |a        |2020-04-07 19:48:36|logout|11   |a        |2020-04-07 21:25:36|login |12   |
148      * |a        |2020-04-07 21:25:36|login |12   |a        |2020-04-07 21:35:36|logout|13   |
149      * |a        |2020-04-07 21:35:36|logout|13   |a        |2020-04-07 21:40:36|login |14   |
150      * |a        |2020-04-07 21:40:36|login |14   |a        |2020-04-07 22:15:36|logout|15   |
151      * |a        |2020-04-07 22:15:36|logout|15   |a        |2020-04-07 23:17:21|login |16   |
152      * |a        |2020-04-07 23:17:21|login |16   |null     |null               |null  |null |
153      * |b        |2020-04-07 00:25:36|login |1    |b        |2020-04-07 01:45:36|logout|2    |
154      * |b        |2020-04-07 01:45:36|logout|2    |b        |2020-04-07 03:15:23|login |3    |
155      * |b        |2020-04-07 03:15:23|login |3    |b        |2020-04-07 04:25:57|logout|4    |
156      * |b        |2020-04-07 04:25:57|logout|4    |b        |2020-04-07 05:04:36|login |5    |
157      * |b        |2020-04-07 05:04:36|login |5    |b        |2020-04-07 07:08:32|logout|6    |
158      * |b        |2020-04-07 07:08:32|logout|6    |b        |2020-04-07 10:08:32|login |7    |
159      * |b        |2020-04-07 10:08:32|login |7    |b        |2020-04-07 12:15:43|logout|8    |
160      * |b        |2020-04-07 12:15:43|logout|8    |b        |2020-04-07 16:35:18|login |9    |
161      * |b        |2020-04-07 16:35:18|login |9    |b        |2020-04-07 19:48:36|logout|10   |
162      * |b        |2020-04-07 19:48:36|logout|10   |b        |2020-04-07 21:25:36|login |11   |
163      * |b        |2020-04-07 21:25:36|login |11   |b        |2020-04-07 21:35:36|logout|12   |
164      * |b        |2020-04-07 21:35:36|logout|12   |b        |2020-04-07 21:50:36|login |13   |
165      * |b        |2020-04-07 21:50:36|login |13   |b        |2020-04-07 22:15:36|logout|14   |
166      * |b        |2020-04-07 22:15:36|logout|14   |b        |2020-04-07 23:17:21|login |15   |
167      * |b        |2020-04-07 23:17:21|login |15   |null     |null               |null  |null |
168      * +---------+-------------------+------+-----+---------+-------------------+------+-----+
169      */
170 
171     //数据填补
172     session.sql(
173       """
174         | select
175         |   username1 ,ts1,type1,username2,ts2,type2
176         | from
177         | (select
178         |   case when username1 is null then username2 else username1 end username1 ,
179         |   case when ts1 is null then concat(split(ts2," ")[0],' 00:00:00') else ts1 end ts1,
180         |   case when type1 is null then 'login' else type1 end type1,
181         |   case when username2 is null then username1 else username2 end username2,
182         |   case when ts2 is null then concat(split(ts1," ")[0],' 23:59:59') else ts2 end ts2,
183         |   case when type2 is null then 'logout' else type2 end type2
184         | from temp4) t
185         | where type1 = 'login' and type2 = 'logout'
186       """.stripMargin).createTempView("temp5")
187 
188     /**
189      * +---------+-------------------+-----+---------+-------------------+------+
190      * |username1|ts1                |type1|username2|ts2                |type2 |
191      * +---------+-------------------+-----+---------+-------------------+------+
192      * |a        |2020-04-07 00:00:00|login|a        |2020-04-07 00:12:02|logout|
193      * |a        |2020-04-07 00:25:36|login|a        |2020-04-07 01:45:36|logout|
194      * |a        |2020-04-07 03:15:23|login|a        |2020-04-07 04:25:57|logout|
195      * |a        |2020-04-07 05:04:36|login|a        |2020-04-07 07:08:32|logout|
196      * |a        |2020-04-07 08:09:00|login|a        |2020-04-07 12:15:43|logout|
197      * |a        |2020-04-07 16:35:18|login|a        |2020-04-07 19:48:36|logout|
198      * |a        |2020-04-07 21:25:36|login|a        |2020-04-07 21:35:36|logout|
199      * |a        |2020-04-07 21:40:36|login|a        |2020-04-07 22:15:36|logout|
200      * |a        |2020-04-07 23:17:21|login|a        |2020-04-07 23:59:59|logout|
201      * |b        |2020-04-07 00:25:36|login|b        |2020-04-07 01:45:36|logout|
202      * |b        |2020-04-07 03:15:23|login|b        |2020-04-07 04:25:57|logout|
203      * |b        |2020-04-07 05:04:36|login|b        |2020-04-07 07:08:32|logout|
204      * |b        |2020-04-07 10:08:32|login|b        |2020-04-07 12:15:43|logout|
205      * |b        |2020-04-07 16:35:18|login|b        |2020-04-07 19:48:36|logout|
206      * |b        |2020-04-07 21:25:36|login|b        |2020-04-07 21:35:36|logout|
207      * |b        |2020-04-07 21:50:36|login|b        |2020-04-07 22:15:36|logout|
208      * |b        |2020-04-07 23:17:21|login|b        |2020-04-07 23:59:59|logout|
209      * +---------+-------------------+-----+---------+-------------------+------+
210      */
211 
212     session.udf.register("myudf",(t:String,count:Int)=>{
213       val list = new ListBuffer[String]()
214       val sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss")
215       val calendar: Calendar = Calendar.getInstance()
216       calendar.setTime(sdf.parse(t))
217       for(i <- 0 to count){
218         list.append(sdf.format(calendar.getTime))
219         calendar.add(Calendar.HOUR,1)
220       }
221       list
222     })
223 
224     //对数据进行一对多转换
225     session.sql(
226       """
227         | select  username1 as username,ts1,type1,username2,ts2,type2,explode(myudf(ts1,hour(ts2)-hour(ts1))) as tt
228         | from temp5
229       """.stripMargin).createTempView("temp6")
230 
231     /**
232      * +--------+-------------------+-----+---------+-------------------+------+-------------------+
233      * |username|ts1                |type1|username2|ts2                |type2 |tt                 |
234      * +--------+-------------------+-----+---------+-------------------+------+-------------------+
235      * |a       |2020-04-07 00:00:00|login|a        |2020-04-07 00:12:02|logout|2020-04-07 00:00:00|
236      * |a       |2020-04-07 00:25:36|login|a        |2020-04-07 01:45:36|logout|2020-04-07 00:25:36|
237      * |a       |2020-04-07 00:25:36|login|a        |2020-04-07 01:45:36|logout|2020-04-07 01:25:36|
238      * |a       |2020-04-07 03:15:23|login|a        |2020-04-07 04:25:57|logout|2020-04-07 03:15:23|
239      * |a       |2020-04-07 03:15:23|login|a        |2020-04-07 04:25:57|logout|2020-04-07 04:15:23|
240      * |a       |2020-04-07 05:04:36|login|a        |2020-04-07 07:08:32|logout|2020-04-07 05:04:36|
241      * |a       |2020-04-07 05:04:36|login|a        |2020-04-07 07:08:32|logout|2020-04-07 06:04:36|
242      * |a       |2020-04-07 05:04:36|login|a        |2020-04-07 07:08:32|logout|2020-04-07 07:04:36|
243      * |a       |2020-04-07 08:09:00|login|a        |2020-04-07 12:15:43|logout|2020-04-07 08:09:00|
244      * |a       |2020-04-07 08:09:00|login|a        |2020-04-07 12:15:43|logout|2020-04-07 09:09:00|
245      * |a       |2020-04-07 08:09:00|login|a        |2020-04-07 12:15:43|logout|2020-04-07 10:09:00|
246      * |a       |2020-04-07 08:09:00|login|a        |2020-04-07 12:15:43|logout|2020-04-07 11:09:00|
247      * |a       |2020-04-07 08:09:00|login|a        |2020-04-07 12:15:43|logout|2020-04-07 12:09:00|
248      * |a       |2020-04-07 16:35:18|login|a        |2020-04-07 19:48:36|logout|2020-04-07 16:35:18|
249      * |a       |2020-04-07 16:35:18|login|a        |2020-04-07 19:48:36|logout|2020-04-07 17:35:18|
250      * |a       |2020-04-07 16:35:18|login|a        |2020-04-07 19:48:36|logout|2020-04-07 18:35:18|
251      * |a       |2020-04-07 16:35:18|login|a        |2020-04-07 19:48:36|logout|2020-04-07 19:35:18|
252      * |a       |2020-04-07 21:25:36|login|a        |2020-04-07 21:35:36|logout|2020-04-07 21:25:36|
253      * |a       |2020-04-07 21:40:36|login|a        |2020-04-07 22:15:36|logout|2020-04-07 21:40:36|
254      * |a       |2020-04-07 21:40:36|login|a        |2020-04-07 22:15:36|logout|2020-04-07 22:40:36|
255      * |a       |2020-04-07 23:17:21|login|a        |2020-04-07 23:59:59|logout|2020-04-07 23:17:21|
256      * |b       |2020-04-07 00:25:36|login|b        |2020-04-07 01:45:36|logout|2020-04-07 00:25:36|
257      * |b       |2020-04-07 00:25:36|login|b        |2020-04-07 01:45:36|logout|2020-04-07 01:25:36|
258      * |b       |2020-04-07 03:15:23|login|b        |2020-04-07 04:25:57|logout|2020-04-07 03:15:23|
259      * |b       |2020-04-07 03:15:23|login|b        |2020-04-07 04:25:57|logout|2020-04-07 04:15:23|
260      * |b       |2020-04-07 05:04:36|login|b        |2020-04-07 07:08:32|logout|2020-04-07 05:04:36|
261      * |b       |2020-04-07 05:04:36|login|b        |2020-04-07 07:08:32|logout|2020-04-07 06:04:36|
262      * |b       |2020-04-07 05:04:36|login|b        |2020-04-07 07:08:32|logout|2020-04-07 07:04:36|
263      * |b       |2020-04-07 10:08:32|login|b        |2020-04-07 12:15:43|logout|2020-04-07 10:08:32|
264      * |b       |2020-04-07 10:08:32|login|b        |2020-04-07 12:15:43|logout|2020-04-07 11:08:32|
265      * |b       |2020-04-07 10:08:32|login|b        |2020-04-07 12:15:43|logout|2020-04-07 12:08:32|
266      * |b       |2020-04-07 16:35:18|login|b        |2020-04-07 19:48:36|logout|2020-04-07 16:35:18|
267      * |b       |2020-04-07 16:35:18|login|b        |2020-04-07 19:48:36|logout|2020-04-07 17:35:18|
268      * |b       |2020-04-07 16:35:18|login|b        |2020-04-07 19:48:36|logout|2020-04-07 18:35:18|
269      * |b       |2020-04-07 16:35:18|login|b        |2020-04-07 19:48:36|logout|2020-04-07 19:35:18|
270      * |b       |2020-04-07 21:25:36|login|b        |2020-04-07 21:35:36|logout|2020-04-07 21:25:36|
271      * |b       |2020-04-07 21:50:36|login|b        |2020-04-07 22:15:36|logout|2020-04-07 21:50:36|
272      * |b       |2020-04-07 21:50:36|login|b        |2020-04-07 22:15:36|logout|2020-04-07 22:50:36|
273      * |b       |2020-04-07 23:17:21|login|b        |2020-04-07 23:59:59|logout|2020-04-07 23:17:21|
274      * +--------+-------------------+-----+---------+-------------------+------+-------------------+
275      */
276 
277     //对时间进行转换
278     session.sql(
279       """
280         |  select distinct username, from_unixtime(unix_timestamp(tt,'yyyy-MM-dd HH:mm:ss'),'yyyy-MM-dd HH') as transtime
281         |  from temp6
282         |  order by username,transtime
283       """.stripMargin).createTempView("temp7")
284 
285     /**
286      * +--------+-------------+
287      * |username|transtime    |
288      * +--------+-------------+
289      * |a       |2020-04-07 00|
290      * |a       |2020-04-07 01|
291      * |a       |2020-04-07 03|
292      * |a       |2020-04-07 04|
293      * |a       |2020-04-07 05|
294      * |a       |2020-04-07 06|
295      * |a       |2020-04-07 07|
296      * |a       |2020-04-07 08|
297      * |a       |2020-04-07 09|
298      * |a       |2020-04-07 10|
299      * |a       |2020-04-07 11|
300      * |a       |2020-04-07 12|
301      * |a       |2020-04-07 16|
302      * |a       |2020-04-07 17|
303      * |a       |2020-04-07 18|
304      * |a       |2020-04-07 19|
305      * |a       |2020-04-07 21|
306      * |a       |2020-04-07 22|
307      * |a       |2020-04-07 23|
308      * |b       |2020-04-07 00|
309      * |b       |2020-04-07 01|
310      * |b       |2020-04-07 03|
311      * |b       |2020-04-07 04|
312      * |b       |2020-04-07 05|
313      * |b       |2020-04-07 06|
314      * |b       |2020-04-07 07|
315      * |b       |2020-04-07 10|
316      * |b       |2020-04-07 11|
317      * |b       |2020-04-07 12|
318      * |b       |2020-04-07 16|
319      * |b       |2020-04-07 17|
320      * |b       |2020-04-07 18|
321      * |b       |2020-04-07 19|
322      * |b       |2020-04-07 21|
323      * |b       |2020-04-07 22|
324      * |b       |2020-04-07 23|
325      * +--------+-------------+
326      */
327 
328     //统计每个小时段在线的用户数
329     session.sql(
330       """
331         | select  transtime,count(username) as usercount
332         | from temp7
333         | group by transtime
334         | order by transtime
335       """.stripMargin)
336 
337     /**
338      * +-------------+---------+
339      * |    transtime|usercount|
340      * +-------------+---------+
341      * |2020-04-07 00|        2|
342      * |2020-04-07 01|        2|
343      * |2020-04-07 03|        2|
344      * |2020-04-07 04|        2|
345      * |2020-04-07 05|        2|
346      * |2020-04-07 06|        2|
347      * |2020-04-07 07|        2|
348      * |2020-04-07 08|        1|
349      * |2020-04-07 09|        1|
350      * |2020-04-07 10|        2|
351      * |2020-04-07 11|        2|
352      * |2020-04-07 12|        2|
353      * |2020-04-07 16|        2|
354      * |2020-04-07 17|        2|
355      * |2020-04-07 18|        2|
356      * |2020-04-07 19|        2|
357      * |2020-04-07 21|        2|
358      * |2020-04-07 22|        2|
359      * |2020-04-07 23|        2|
360      * +-------------+---------+
361      */
362 
363     //统计每次访问的时长 , 分钟为单位
364     session.sql(
365       """
366         | select
367         |   username1 as username ,ts1,ts2,
368         |   cast (((hour(ts2)*60*60+minute(ts2)*60+second(ts2)*1) - (hour(ts1)*60*60+minute(ts1)*60+second(ts1)*1))/60 as int) as dur
369         | from temp5
370       """.stripMargin).createTempView("temp10")
371 
372     /**
373      * +--------+-------------------+-------------------+---+
374      * |username|ts1                |ts2                |dur|
375      * +--------+-------------------+-------------------+---+
376      * |a       |2020-04-07 00:00:00|2020-04-07 00:12:02|12 |
377      * |a       |2020-04-07 00:25:36|2020-04-07 01:45:36|80 |
378      * |a       |2020-04-07 03:15:23|2020-04-07 04:25:57|70 |
379      * |a       |2020-04-07 05:04:36|2020-04-07 07:08:32|123|
380      * |a       |2020-04-07 08:09:00|2020-04-07 12:15:43|246|
381      * |a       |2020-04-07 16:35:18|2020-04-07 19:48:36|193|
382      * |a       |2020-04-07 21:25:36|2020-04-07 21:35:36|10 |
383      * |a       |2020-04-07 21:40:36|2020-04-07 22:15:36|35 |
384      * |a       |2020-04-07 23:17:21|2020-04-07 23:59:59|42 |
385      * |b       |2020-04-07 00:25:36|2020-04-07 01:45:36|80 |
386      * |b       |2020-04-07 03:15:23|2020-04-07 04:25:57|70 |
387      * |b       |2020-04-07 05:04:36|2020-04-07 07:08:32|123|
388      * |b       |2020-04-07 10:08:32|2020-04-07 12:15:43|127|
389      * |b       |2020-04-07 16:35:18|2020-04-07 19:48:36|193|
390      * |b       |2020-04-07 21:25:36|2020-04-07 21:35:36|10 |
391      * |b       |2020-04-07 21:50:36|2020-04-07 22:15:36|25 |
392      * |b       |2020-04-07 23:17:21|2020-04-07 23:59:59|42 |
393      * +--------+-------------------+-------------------+---+
394      */
395     //统计各个指标
396     session.sql(
397       """
398         | select
399         |    username,sum(dur) as totaldur,count(*) as totalcount,max(dur) as maxdur
400         | from temp10
401         | group by username
402       """.stripMargin).show(100)
403 
404     /**
405      * +--------+--------+----------+------+
406      * |username|totaldur|totalcount|maxdur|
407      * +--------+--------+----------+------+
408      * |       b|     670|         8|   193|
409      * |       a|     811|         9|   246|
410      * +--------+--------+----------+------+
411      */
412 
413   }
414 
415 }
posted @ 2021-03-04 17:22  大数据程序员  阅读(565)  评论(0编辑  收藏  举报