hive 存储格式及压缩
1 -- 设置参数 2 set hivevar:target_db_name=db_dw; 3 use ${hivevar:target_db_name}; 4 5 -- 创建textfile表 6 create table file_format_textfile 7 row format delimited fields terminated by '\001' 8 stored as textfile 9 as 10 select * from smple_table; 11 12 -- 测试各种压缩的orc表 13 create table file_format_orc_zlib 14 row format delimited fields terminated by '\001' 15 stored as orc tblproperties ("orc.compress"="ZLIB") 16 as 17 select * from file_format_textfile 18 ; 19 20 create table file_format_orc_snappy 21 row format delimited fields terminated by '\001' 22 stored as orc tblproperties ("orc.compress"="SNAPPY") 23 as 24 select * from file_format_orc_zlib 25 ; 26 27 create table file_format_orc_none 28 row format delimited fields terminated by '\001' 29 stored as orc tblproperties ("orc.compress"="NONE") 30 as 31 select * from file_format_orc_zlib 32 ; 33 34 create table file_format_orc_default 35 row format delimited fields terminated by '\001' 36 stored as orc 37 as 38 select * from file_format_orc_zlib 39 ; 40 41 -- 测试各种压缩的parquet表 42 create table file_format_parquet_zlib 43 row format delimited fields terminated by '\001' 44 stored as parquet tblproperties ("parquet.compress"="ZLIB") 45 as 46 select * from file_format_orc_zlib 47 ; 48 49 create table file_format_parquet_snappy 50 row format delimited fields terminated by '\001' 51 stored as parquet tblproperties ("parquet.compress"="SNAPPY") 52 as 53 select * from file_format_orc_zlib 54 ; 55 56 create table file_format_parquet_none 57 row format delimited fields terminated by '\001' 58 stored as parquet tblproperties ("parquet.compress"="NONE") 59 as 60 select * from file_format_orc_zlib 61 ; 62 63 create table file_format_parquet_default 64 row format delimited fields terminated by '\001' 65 stored as parquet 66 as 67 select * from file_format_orc_zlib 68 ; 69 70 -- 测试各种压缩的rcfile表(可能参数没生效,各种压缩后大小一致) 71 create table file_format_rcfile_zlib 72 row format delimited fields terminated by '\001' 73 stored as rcfile tblproperties ("rcfile.compress"="ZLIB") 74 as 75 select * from file_format_orc_zlib 76 ; 77 78 create table file_format_rcfile_snappy 79 row format delimited fields terminated by '\001' 80 stored as rcfile tblproperties ("rcfile.compress"="SNAPPY") 81 as 82 select * from file_format_orc_zlib 83 ; 84 85 create table file_format_rcfile_none 86 row format delimited fields terminated by '\001' 87 stored as rcfile tblproperties ("rcfile.compress"="NONE") 88 as 89 select * from file_format_orc_zlib 90 ; 91 92 create table file_format_rcfile_default 93 row format delimited fields terminated by '\001' 94 stored as rcfile 95 as 96 select * from file_format_orc_zlib 97 ;
-- 查看各种压缩下的格式大小 dfs -du -s /user/hive/warehouse/${hivevar:target_db_name}.db/file_format_textfile; dfs -du -s /user/hive/warehouse/${hivevar:target_db_name}.db/file_format_orc_zlib; dfs -du -s /user/hive/warehouse/${hivevar:target_db_name}.db/file_format_orc_snappy; dfs -du -s /user/hive/warehouse/${hivevar:target_db_name}.db/file_format_orc_none; dfs -du -s /user/hive/warehouse/${hivevar:target_db_name}.db/file_format_orc_default; dfs -du -s /user/hive/warehouse/${hivevar:target_db_name}.db/file_format_parquet_zlib; dfs -du -s /user/hive/warehouse/${hivevar:target_db_name}.db/file_format_parquet_snappy; dfs -du -s /user/hive/warehouse/${hivevar:target_db_name}.db/file_format_parquet_none; dfs -du -s /user/hive/warehouse/${hivevar:target_db_name}.db/file_format_parquet_default; dfs -du -s /user/hive/warehouse/${hivevar:target_db_name}.db/file_format_rcfile_zlib; dfs -du -s /user/hive/warehouse/${hivevar:target_db_name}.db/file_format_rcfile_snappy; dfs -du -s /user/hive/warehouse/${hivevar:target_db_name}.db/file_format_rcfile_none; dfs -du -s /user/hive/warehouse/${hivevar:target_db_name}.db/file_format_rcfile_default;
-- 统计数据,原文件见文件中的附件