Hudi-通过Hive查询hudi表数据

环境准备

集成jar包:hudi-hadoop-mr-bundle-0.10.1.jar,放入$HIVE_HOME/lib目录下

建外部表

复制代码
create database db_hudi;

use db_hudi;

CREATE EXTERNAL TABLE IF NOT EXISTS tbl_hudi_didi(
    order_id BIGINT,
    product_id INT,
    city_id INT,
    district INT,
    county INT,
    type INT,
    combo_type INT,
    traffic_type INT,
    passenger_count INT,
    driver_product_id INT,
    start_dest_distance INT,
    arrive_time STRING,
    departure_time STRING,
    pre_total_fee DOUBLE,
    normal_time STRING,
    bubble_trace_id STRING,
    product_1level INT,
    dest_lng DOUBLE,
    dest_lat DOUBLE,
    starting_lng DOUBLE,
    starting_lat DOUBLE,
    ts BIGINT,
    partitionpath STRING
)
PARTITIONED BY(
    date_str string
)
ROW FORMAT SERDE
    'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
STORED AS INPUTFORMAT
    'org.apache.hudi.hadoop.HoodieParquetInputFormat'
OUTPUTFORMAT
    'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'
LOCATION
    '/hudi-warehouse/tbl_didi_haikou';
复制代码

手动加入分区

复制代码
--手动添加分区
ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-5-22') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-5-22';
ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-5-23') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-5-23';
ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-5-24') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-5-24';
ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-5-25') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-5-25';
ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-5-26') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-5-26';
ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-5-27') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-5-27';
ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-5-28') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-5-28';
ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-5-29') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-5-29';
ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-5-30') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-5-30';
ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-5-31') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-5-31';
ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-6-1') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-6-1';
ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-6-2') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-6-2';
ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-6-3') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-6-3';
ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-6-4') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-6-4';
ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-6-5') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-6-5';
ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-6-6') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-6-6';
ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-6-7') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-6-7';
ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-6-8') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-6-8';
ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-6-9') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-6-9';
ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-6-10') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-6-10';
复制代码

查看分区

SHOW PARTITIONS db_hudi.tbl_hudi_didi;
 

指标统计

复制代码
-- 开发测试,设置运行模式为本地模式
set hive.exec.mode.local.auto = true;

set hive.exec.mode.local.auto.tasks.max = 10;
set hive.exec.mode.local.auto.inputbytes.max=88801103;
set hive.exec.mode.local.auto.input.files.max=50;
SET hive.mapred.mode=nonstrict;
-- 指标一:订单类型统计
WITH tmp as (
    SELECT
        product_id,
        COUNT(1) AS total
    FROM db_hudi.tbl_hudi_didi
    GROUP BY product_id
)
SELECT
    CASE product_id
        WHEN 1 THEN "滴滴专车"
        WHEN 2 THEN "滴滴企业专车"
        WHEN 3 THEN "滴滴快车"
        WHEN 4 THEN "滴滴企业快车"
        ELSE "未知"
    END AS order_type,
    total
FROM tmp
;

-- 指标二:订单时效性统计
WITH tmp as (
    SELECT
        type,
        COUNT(1) AS total
    FROM db_hudi.tbl_hudi_didi
    GROUP BY type
)
SELECT
    CASE type
        WHEN 0 THEN "实时"
        WHEN 1 THEN "预约"
        ELSE "未知"
    END AS order_type,
    total
FROM tmp
;

--指标三:订单交通类型统计
SELECT
    traffic_type,
    COUNT(1) AS total
FROM db_hudi.tbl_hudi_didi
GROUP BY traffic_type;

-- 指标五:订单价格统计,先将价格划分区间,再统计,此处使用WHEN函数和SUM函数
SELECT
    SUM(
        CASE WHEN pre_total_fee BETWEEN 0 AND 15 THEN 1 ELSE 0 END
    ) AS 0_15,
    SUM(
        CASE WHEN pre_total_fee BETWEEN 16 AND 30 THEN 1 ELSE 0 END
    ) AS 16_30,
    SUM(
        CASE WHEN pre_total_fee BETWEEN 31 AND 50 THEN 1 ELSE 0 END
    ) AS 31_50,
    SUM(
        CASE WHEN pre_total_fee BETWEEN 51 AND 100 THEN 1 ELSE 0 END
    ) AS 51_100,
    SUM(
        CASE WHEN pre_total_fee > 100 THEN 1 ELSE 0 END
    ) AS 100_
FROM db_hudi.tbl_hudi_didi;
复制代码

 

 

 

posted on   嘣嘣嚓  阅读(1761)  评论(0编辑  收藏  举报

相关博文:
阅读排行:
· 分享4款.NET开源、免费、实用的商城系统
· 全程不用写代码,我用AI程序员写了一个飞机大战
· MongoDB 8.0这个新功能碉堡了,比商业数据库还牛
· 白话解读 Dapr 1.15:你的「微服务管家」又秀新绝活了
· 记一次.NET内存居高不下排查解决与启示
历史上的今天:
2020-03-03 机器学习-朴素贝叶斯原理及Python实现
2020-03-03 机器学习-文本聚类实例-kmeans

导航

< 2025年3月 >
23 24 25 26 27 28 1
2 3 4 5 6 7 8
9 10 11 12 13 14 15
16 17 18 19 20 21 22
23 24 25 26 27 28 29
30 31 1 2 3 4 5
点击右上角即可分享
微信分享提示