import java.util.Properties
import org.apache.spark.SparkConf
import org.apache.spark.sql.{SaveMode, SparkSession}
object DayFlow {
def main (args: Array[String]) : Unit = {
val sparkConf = new SparkConf ().setMaster("local[*]" ).setAppName("dayFlow" )
val session = SparkSession.builder().config(sparkConf).enableHiveSupport().getOrCreate()
val properties = new Properties ()
properties.setProperty("user" , "root" )
properties.setProperty("password" , "Jsq123456..." )
}
}
1.统计每天不同卡口的车流量指标
/**
* 1 、基于时间纬度
* 针对数据表flow_detail
* 以日期和卡口id为分区条件 求不同分组中的总数即可实现分区
*/
val frame = session.sql("select flow_date, monitor_id, count(*) as flow_num from traffic.flow_detail group by flow_date, monitor_id order by flow_num desc" )
frame.write .mode(SaveMode.Append).jdbc("jdbc:mysql://192.168.200.111:3306/project?serverTimezone=UTC" , "day_flow" , properties)
session.stop()
2.统计每天每小时不同卡口的车流量指标
/**
* 2 、基于时间纬度
* 针对数据表flow_detail
* 以日期、小时和卡口id为分区条件 求不同分组中的总数即可实现分区
*/
val frame1 = session.sql("select flow_date, action_hour, monitor_id, count(*) as flow_num " +
"from traffic.flow_detail group by flow_date, monitor_id, action_hour order by flow_num desc" )
frame1.write .mode(SaveMode.Append).jdbc("jdbc:mysql://192.168.200.111:3306/project?serverTimezone=UTC" , "hour_flow" , properties)
3.每天每个区域的车流量指标
val frame2 = session.sql("select flow_date, area_id, count(*) as flow_num " +
"from traffic.flow_detail group by flow_date, area_id order by flow_num desc" )
frame2.write.mode(SaveMode .Append ).jdbc("jdbc:mysql://192.168.200.111:3306/project?serverTimezone=UTC" , "area_flow" , properties)
4. 每天每个区域的每个道路的车流量指标
val frame3 = session.sql("select flow_date, area_id, road_id, count(*) as flow_num " +
"from traffic.flow_detail group by flow_date, area_id, road_id order by flow_num desc" )
frame3.write.mode(SaveMode .Append ).jdbc("jdbc:mysql://192.168.200.111:3306/project?serverTimezone=UTC" , "road_flow" , properties)
5.每一个区域每天的车流量压力最大的前十条道路信息
/**
* 5 、扩展功能,求每一个区域每天的车流量压力最大的前十条道路信息 排名函数和开窗函数来完成
* 在第四个功能基础之上,增加一个开窗函数(以区域id 作为开窗条件,以道路的车流量作为每一个窗口的排序条件)然后结合 dense_rank
*/
val frame3 = session.sql("select flow_date, area_id, road_id, count(*) as flow_num " +
"from traffic.flow_detail group by flow_date, area_id, road_id order by flow_num desc" )
frame3.createOrReplaceTempView("road_flow_temp" )
val frame4 = session.sql("select * " +
"from (select flow_date, area_id, road_id, flow_num, dense_rank() over(partition by area_id order by flow_num desc) as rank from road_flow_temp) as tt " +
"where rank <= 10" )
frame4.write.mode(SaveMode.Append).jdbc("jdbc:mysql://192.168.200.111:3306/project?serverTimezone=UTC" , "area_top10_road" , properties)
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· TypeScript + Deepseek 打造卜卦网站:技术与玄学的结合
· Manus的开源复刻OpenManus初探
· AI 智能体引爆开源社区「GitHub 热点速览」
· 三行代码完成国际化适配,妙~啊~
· .NET Core 中如何实现缓存的预热?