hive元数据及相关查询

hive 元数据的数据模型

 
hive元数据的数据模型.png

hive元数据的查用查询语句

通过中文字段找表
select d.NAME,a.TBL_NAME,e.PARAM_VALUE,c.COLUMN_NAME,c.TYPE_NAME,c.COMMENT,c.INTEGER_IDX from TBLS a 
join SDS b 
on (a.SD_ID=b.SD_ID) 
join 
(select * from COLUMNS_V2 where comment like '%xxxx%') c --通过列名中文注释找表名 
ON (c.CD_ID=b.CD_ID) 
join 
DBS d 
on (a.DB_ID=d.DB_ID) 
join 
(select TBL_ID,PARAM_VALUE from TABLE_PARAMS where PARAM_KEY='comment') e 
on (a.TBL_ID=e.TBL_ID) 
order by TBL_NAME,INTEGER_IDX
根据表中文注释找表
select d.NAME,a.TBL_NAME,b.PARAM_VALUE from 
TBLS a 
join 
(select TBL_ID,PARAM_VALUE from TABLE_PARAMS where PARAM_KEY='comment' and PARAM_VALUE like '%xxx%') b --表中文注释
on (a.TBL_ID=b.TBL_ID) 
join 
DBS d 
on (a.DB_ID=d.DB_ID) 
where d.NAME='dw_idl' order by a.TBL_NAME
查找该表授权的视图
select d.NAME,a.TBL_NAME from TBLS a 
join 
DBS d 
on (a.DB_ID=d.DB_ID) 
where VIEW_ORIGINAL_TEXT like "%xxx%"
分区分桶表元数据查询
SELECT DBS.NAME as '数据库名' 
, TBLS.TBL_NAME as '表/视图名' 
, COMM.PARAM_VALUE as '表注释' 
, FROM_UNIXTIME(TBLS.CREATE_TIME,'%Y-%m-%d') as '表创建时间' 
, TBLS.OWNER as '表属主用户' 
, FROM_UNIXTIME(TBL_PRIVS.CREATE_TIME,'%Y-%m-%d') as '表被授权时间' 
, TBL_PRIVS.PRINCIPAL_NAME as '被授权用户' 
, PARTITION_KEYS.PKEY_NAME as '分区字段' 
, BUCKETING_COLS.BUCKET_COL_NAME as '分桶字段' 
, SDS.NUM_BUCKETS as '分桶数' 
, TBLS.LAST_ACCESS_TIME as '最近一次访问时间' 
FROM TBLS 
JOIN DBS ON TBLS.DB_ID = DBS.DB_ID 
JOIN SDS ON TBLS.SD_ID = SDS.SD_ID 
JOIN CDS ON SDS.CD_ID = CDS.CD_ID 
JOIN TBL_PRIVS ON TBLS.TBL_ID = TBL_PRIVS.TBL_ID 
LEFT JOIN PARTITION_KEYS ON PARTITION_KEYS.TBL_ID = TBLS.TBL_ID 
LEFT JOIN BUCKETING_COLS ON TBLS.SD_ID = BUCKETING_COLS.SD_ID 
LEFT JOIN ( SELECT TBL_ID , PARAM_VALUE FROM TABLE_PARAMS WHERE PARAM_KEY = 'comment') COMM 
ON (COMM.TBL_ID = TBLS.TBL_ID) 
统计各库表数量
select d.NAME,count(*) cnt from 
TBLS a 
join 
DBS d 
on (a.DB_ID=d.DB_ID)
group by d.NAME order by  cnt  desc
统计某一时间后未变化过的表
select DB_NAME,count(*) from 
(
select b.NAME DB_NAME,a.TBL_NAME,c.PARAM_VALUE comment,from_unixtime(cast(a.CREATE_TIME as SIGNED)) CREATE_TIME,from_unixtime(cast(d.PARAM_VALUE as SIGNED)) last_modified_time,from_unixtime(cast(e.PARAM_VALUE as SIGNED)) transient_lastDdlTime from
TBLS a 
join
DBS b
on (a.DB_ID=b.DB_ID)
join
(select TBL_ID,PARAM_VALUE from TABLE_PARAMS where PARAM_KEY='transient_lastDdlTime' and PARAM_VALUE<'1577808000') e
on (a.TBL_ID=e.TBL_ID) 
left join
(select TBL_ID,PARAM_VALUE from TABLE_PARAMS where PARAM_KEY='comment') c
on (a.TBL_ID=c.TBL_ID)
left join
(select TBL_ID,PARAM_VALUE from TABLE_PARAMS where PARAM_KEY='last_modified_time') d
on (a.TBL_ID=d.TBL_ID)
) f group by DB_NAME
 

参考文档

posted @ 2022-11-29 14:17  葫芦杯  阅读(304)  评论(0编辑  收藏  举报