6.道路卡口车流量统计

import java.util.Properties
import org.apache.spark.SparkConf
import org.apache.spark.sql.{SaveMode, SparkSession}

object DayFlow {
  def main(args: Array[String]): Unit = {
    val sparkConf = new SparkConf().setMaster("local[*]").setAppName("dayFlow")
    val session = SparkSession.builder().config(sparkConf).enableHiveSupport().getOrCreate()

    val properties = new Properties()
    properties.setProperty("user", "root")
    properties.setProperty("password", "Jsq123456...")
  }
}

1.统计每天不同卡口的车流量指标

/**
* 1、基于时间纬度--统计每天不同卡口的车流量指标
*    针对数据表flow_detail
*    以日期和卡口id为分区条件 求不同分组中的总数即可实现分区
*/
val frame = session.sql("select flow_date, monitor_id, count(*) as flow_num from traffic.flow_detail group by flow_date, monitor_id order by flow_num desc")
frame.write.mode(SaveMode.Append).jdbc("jdbc:mysql://192.168.200.111:3306/project?serverTimezone=UTC", "day_flow", properties)
session.stop()

2.统计每天每小时不同卡口的车流量指标

/**
 * 2、基于时间纬度--统计每天每小时不同卡口的车流量指标
 *    针对数据表flow_detail
 *    以日期、小时和卡口id为分区条件 求不同分组中的总数即可实现分区
 */
val frame1 = session.sql("select flow_date, action_hour, monitor_id, count(*) as flow_num " +
  "from traffic.flow_detail group by flow_date, monitor_id, action_hour order by flow_num desc")
frame1.write.mode(SaveMode.Append).jdbc("jdbc:mysql://192.168.200.111:3306/project?serverTimezone=UTC", "hour_flow", properties)

3.每天每个区域的车流量指标

/**
* 3、基于时间维度和区域维度的功能
*     每天每个区域的车流量指标
*/
val frame2 = session.sql("select flow_date, area_id, count(*) as flow_num " +
"from traffic.flow_detail group by flow_date, area_id order by flow_num desc")
frame2.write.mode(SaveMode.Append).jdbc("jdbc:mysql://192.168.200.111:3306/project?serverTimezone=UTC", "area_flow", properties)

4. 每天每个区域的每个道路的车流量指标

/**
 * 4、基于时间维度和道路维度的功能(道路是和区域挂钩)
 *     每天每个区域的每个道路的车流量指标
 */
val frame3 = session.sql("select flow_date, area_id, road_id, count(*) as flow_num " +
  "from traffic.flow_detail group by flow_date, area_id, road_id order by flow_num desc")
frame3.write.mode(SaveMode.Append).jdbc("jdbc:mysql://192.168.200.111:3306/project?serverTimezone=UTC", "road_flow", properties)

5.每一个区域每天的车流量压力最大的前十条道路信息

/**
 * 5、扩展功能,求每一个区域每天的车流量压力最大的前十条道路信息  排名函数和开窗函数来完成
 *    在第四个功能基础之上,增加一个开窗函数(以区域id作为开窗条件,以道路的车流量作为每一个窗口的排序条件)然后结合 dense_rank
 */
val frame3 = session.sql("select flow_date, area_id, road_id, count(*) as flow_num " +
"from traffic.flow_detail group by flow_date, area_id, road_id order by flow_num desc")
frame3.createOrReplaceTempView("road_flow_temp")
val frame4 = session.sql("select * " +
"from (select flow_date, area_id, road_id, flow_num, dense_rank() over(partition by area_id order by flow_num desc) as rank from road_flow_temp) as tt " +
"where rank <= 10")
frame4.write.mode(SaveMode.Append).jdbc("jdbc:mysql://192.168.200.111:3306/project?serverTimezone=UTC", "area_top10_road", properties)

posted @ 2022-09-15 11:40  jsqup  阅读(226)  评论(0编辑  收藏  举报