Spark 写Hive指定动态分区

1、设置

1
2
3
4
5
6
7
8
SparkSession.Builder builder = SparkSession
             .builder()
             .config(conf)
             .master("yarn")
             .config("hive.exec.dynamici.partition", true)
             .config("hive.exec.dynamic.partition.mode", "nonstrict")
             .appName(taskName)
             .enableHiveSupport();

  

2、sql
  

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
/**
   * 同步数据到clickhouse
   * 按照日期分区
   *
   * @param dataFrame
   */
  private void sinkToHive(Dataset<Row> dataFrame) {
      dataFrame.createOrReplaceTempView("soh_tmp");
      String selectSql = "select " + getSelectHiveColumn() + " from soh_tmp";
      String sql = "INSERT INTO TABLE d_vehicle_data_charging_u_d partition(part_key) " + selectSql;
      sparkSession.sql(sql);
  }
 
  /**
   * @return
   */
  private String getSelectHiveColumn() {
      return
              "vin,\n" +
                      "batch,\n" +
                      "tbox_type,\n" +
                      "collect_time,\n" +
                      "`current_timestamp` AS receive_time,\n" +
                      "battery_voltage_data,\n" +
                      "battery_temp_data,\n" +
                      "charging_status,\n" +
                      "total_current,\n" +
                      "battery_soc,\n" +
                      "max_temp,\n" +
                      "min_temp,\n" +
                      "partition_key,\n" +
                      "total_mileage,\n" +
                      "max_cell_voltage,\n" +
                      "min_cell_voltage,\n" +
                      "level_alarm,\n" +
                      "max_voltage_bat_sys_no,\n" +
                      "max_voltage_bat_cell_no," +
                      "mix_voltage_bat_sys_no,\n" +
                      "min_voltage_bat_cell_no,\n" +
                      "vehicle_status,\n" +
                      "insulation_resistance,\n" +
                      "insulation_Alarm, " +
                      "`current_timestamp`() as create_time,\n" +
                      " partition_key as part_key\n"
              ;
  }

  



posted on   滚动的蛋  阅读(1167)  评论(0编辑  收藏  举报

编辑推荐:
· 基于Microsoft.Extensions.AI核心库实现RAG应用
· Linux系列:如何用heaptrack跟踪.NET程序的非托管内存泄露
· 开发者必知的日志记录最佳实践
· SQL Server 2025 AI相关能力初探
· Linux系列:如何用 C#调用 C方法造成内存泄露
阅读排行:
· 无需6万激活码!GitHub神秘组织3小时极速复刻Manus,手把手教你使用OpenManus搭建本
· Manus爆火,是硬核还是营销?
· 终于写完轮子一部分:tcp代理 了,记录一下
· 别再用vector<bool>了!Google高级工程师:这可能是STL最大的设计失误
· 单元测试从入门到精通

导航

< 2025年3月 >
23 24 25 26 27 28 1
2 3 4 5 6 7 8
9 10 11 12 13 14 15
16 17 18 19 20 21 22
23 24 25 26 27 28 29
30 31 1 2 3 4 5
点击右上角即可分享
微信分享提示