sql 加工后--小文件解决方案

10.24.8.5
# 切换用户
su - hive

# 查看表文件
[hive@hadoop-0001 ~]$ hdfs dfs -ls /user/hive/warehouse/bibase.db/

# 查看某一张表磁盘上的小文件
[hive@hadoop-0001 ~]$ hdfs dfs -du -h /user/hive/warehouse/bibase.db/bs_user_annual_report_consume_part
17.4 M  52.1 M  /user/hive/warehouse/bibase.db/bs_user_annual_report_consume_part/000000_0
17.4 M  52.2 M  /user/hive/warehouse/bibase.db/bs_user_annual_report_consume_part/000001_0
17.4 M  52.2 M  /user/hive/warehouse/bibase.db/bs_user_annual_report_consume_part/000002_0
17.4 M  52.1 M  /user/hive/warehouse/bibase.db/bs_user_annual_report_consume_part/000003_0
17.4 M  52.1 M  /user/hive/warehouse/bibase.db/bs_user_annual_report_consume_part/000004_0
17.4 M  52.1 M  /user/hive/warehouse/bibase.db/bs_user_annual_report_consume_part/000005_0
17.4 M  52.2 M  /user/hive/warehouse/bibase.db/bs_user_annual_report_consume_part/000006_0
17.4 M  52.2 M  /user/hive/warehouse/bibase.db/bs_user_annual_report_consume_part/000007_0

# 解决办法
# 在sql语句前面设置参数 将小文件合并为256M大小的文件
set hive.merge.mapfiles=true;
set hive.merge.mapredfiles=true;
posted @ 2022-12-26 16:06  jsqup  阅读(18)  评论(0编辑  收藏  举报