namenode元数据管理之查找小文件

一,背景

hdfs的海量文件信息都是存储在datanode上,datanode会定时发送心跳到namenode,namenode会把这些文件,目录,节点信息都以内存对象形式存在内存中,如果小文件过多(小文件是指大小远小于dfs.blocksize,一般在大集群中都设置为128M以上,默认值64M),会消耗大量的namenode内存,给集群的管理带来很大的压力,

另外一方面namendoe会将所有客户端的写操作记录在editlog,secondary namenode会周期性合并快照文件fsimage和edits成新的fsimage,具体过程如下图所示

1,secondary namenode 通知primary namenode将事务id滚动存入editlog文件,primary namenode将以事务起始id和结束id做edit文件名后缀更新新的edit文件

2,secondary namenode 通过http get的方式从primary namenode获取到最新的fsimage和edit文件

3,secondary namenode将fsimage 文件加载到内存中,从edit文件中合并新的事务id,生成一个新的fsimage文件

4,secondary namenode 通过http put的方式将最新的fsimage传到primary namenode,primary namenode以此为临时.ckpt文件

5, primary namenode重命名该临时文件为fsimage文件

二,小文件的查找

现在从fsimage文件对hdfs中的文件大小做一个简单的统计,方便后续的小文件排查

1,如下命令从fsimage文件解析出文本格式的文件,再导入到hive文件(生产环境最好是将fsimage传到低负载节点,加载fsimage会非常消耗内存)

HADOOP_CLIENT_OPTS="-Xmx60000M" ;   如果报oom错误需要将client 的jvm调大
nohup hdfs oiv -p Delimited -i ./fsimage_0000000016419636404 -o ./fsimage_text &

2,创建hive表,存储fsimage文件

create table default.fsimage (
path string,
num_replication int,
modification_time string,
access_time string,
block_size int,
num_blocks int,
num_bytes BIGINT,
ns_quota int,
ds_quota string,
permission string,
username string,
groupname string
) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' STORED AS TEXTFILE;

--如果导入的是本地路径,执行
load data local inpath '/data/fsimage_text' into table default.fsimage;

--如果导入的是hdfs路径,执行
load data inpath '/data/fsimage_text' into table default.fsimage;

3, 如果需要统计出表级表的小文件数可以再套一个子查询

select 
case when instr(dir,'.db/')>0 then regexp_replace(dir,'(/user/hive/warehouse/[0-9-A-z_]+\.db/[0-9-A-z_]+)(/.*)',"\$1") else dir end,
sum(fnum_1M) fnum_1M,
sum(fnum_1M_10M) fnum_1M_10M,
count(1) numpartitions
from (
select
    regexp_replace(path,'/[0-9-A-z_\.=]+$',"") dir,
    sum(case when num_bytes < 1024*1024*1  then 1 else 0 end ) fnum_1M,
    sum(case when num_bytes >= 1024*1024*1 and num_bytes < 1024*1024*10 then 1 else 0 end ) fnum_1M_10M
from default.fsimage
where num_blocks >0
group by regexp_replace(path,'/[0-9-A-z_\.=]+$',"")
) a
group by case when instr(dir,'.db/')>0 then regexp_replace(dir,'(/user/hive/warehouse/[0-9-A-z_]+\.db/[0-9-A-z_]+)(/.*)',"\$1") else dir end
order by fnum_1M desc limit 50

4,另外还找出第三级目录下,每个目录的文件数,或者大小倒序(可以筛选出要压缩的大表或者大文件)

SELECT
regexp_extract(path,'(/[0-9-A-z_\.=]+/[0-9-A-z_\.=]+/[0-9-A-z_\.=]+)',1) dir,
count(*) filescn,
sum(num_bytes)/(1024*1024*1024*1024.0) sizeTB
from default.fsimage
where num_blocks >0
group by
regexp_extract(path,'(/[0-9-A-z_\.=]+/[0-9-A-z_\.=]+/[0-9-A-z_\.=]+)',1)
order by filescn desc
limit 50;

5, 从fsimage表中对hdfs 路径归类,统计

  • Trash 目录统计到第四级目录
  • 数仓目录单独统计,注意default库中的表路径在/user/hive/warehouse/ 里
  • 其他目录最多统计到第三级目录

    对找出的小文件排出整改计划

select
case 
when instr(dir,'.Trash')>0 then regexp_replace(dir,'([0-9-A-z_/]+/.Trash/[0-9-A-z_]+)(.*)',"\$1") 
when dir regexp '/user/hive/warehouse/[0-9-A-z_\.]+\.db/[0-9-A-z_\.]+' then regexp_replace(dir,'(/user/hive/warehouse/[0-9-A-z_]+\.db/[0-9-A-z_\.]+)(.*)',"\$1")
when dir regexp '/user/hive/warehouse/[0-9-A-z_\.]+\.db' then regexp_replace(dir,'(/user/hive/warehouse/[0-9-A-z_]+\.db)(.*)',"\$1")
when dir regexp '/user/hive/warehouse/[0-9-A-z_\.]+' then regexp_replace(dir,'(/user/hive/warehouse/[0-9-A-z_\.]+)(.*)',"\$1") 
when dir regexp '/[0-9-A-z_=\.]+/[0-9-A-z_=\.]+/[0-9-A-z_=\.]+/.*' then regexp_replace(dir,'(/[0-9-A-z_=\.]+)(/[0-9-A-z_=\.]+)(/[0-9-A-z_=\.]+)(.*)',"\$1\$2\$3")
else dir end hdfspath,
max(case when rn=1 then username else null end) username,
case 
when instr(dir,'.Trash')>0 then 0 
when dir regexp '/user/hive/warehouse/[0-9-A-z_]+' then 1 
else 0 end ishivetable,
case 
when instr(dir,'.Trash')>0 then null 
when dir regexp '/user/hive/warehouse/[0-9-A-z_]+\.db' then regexp_replace(dir,'(/user/hive/warehouse/)([0-9-A-z_]+)(\.db)(.*)',"\$2") 
when dir regexp '/user/hive/warehouse/[0-9-A-z_]+' then "default" 
else null end dbname,
case 
when instr(dir,'.Trash')>0 then null 
when dir regexp '/user/hive/warehouse/[0-9-A-z_]+\.db/[0-9-A-z_\.]+' then regexp_replace(dir,'(/user/hive/warehouse/)([0-9-A-z_]+\.db/)([0-9-A-z_\.]+)(.*)',"\$3")
when dir regexp '/user/hive/warehouse/[0-9-A-z_]+\.db' then null
when dir regexp '/user/hive/warehouse/[0-9-A-z_\.]+' then regexp_replace(dir,'(/user/hive/warehouse/)([0-9-A-z_\.]+)(.*)',"\$2")
else null end tablename,
cast(sum(dir_size)/(1024*1024*1024) as decimal(10,2)) dir_size,
count(1) num_dirs,
sum(num_blocks) num_blocks,
sum(fnum) num_files,
sum(fnum_null) num_f_null,
sum(fnum_1m) num_f_1m,
sum(fnum_1m_10m) num_f_1m_10m,
sum(fnum_10m_100m) num_f_10m_100m,
sum(fnum_100m) num_f_100m,
from_unixtime(unix_timestamp(max(lastmtime),'yyyy-MM-dd HH:mm')) lastmtime,
from_unixtime(unix_timestamp(max(lastatime),'yyyy-MM-dd HH:mm')) lastatime
from (
    select 
    username,
    row_number() over (partition by dir order by fnum desc) as rn,
    dir,
    dir_size,
    num_blocks,
    fnum,
    fnum_null,
    fnum_1m,
    fnum_1m_10m,
    fnum_10m_100m,
    fnum_100m,
    lastmtime,
    lastatime
    from (
        select
            username,                            
            case when num_blocks !=0 then regexp_replace(path,'/[0-9-A-z_\.=]+$',"") else path end dir,
            sum(num_bytes) dir_size,
            sum(num_blocks ) num_blocks,
            sum(case when num_blocks  != 0 then 1 else 0 end) fnum,
            sum(case when num_blocks  != 0 and num_bytes = 0 then 1 else 0 end) fnum_null,
            sum(case when num_bytes > 0 and num_bytes < 1024*1024*1  then 1 else 0 end ) fnum_1m,
            sum(case when num_bytes >= 1024*1024*1 and num_bytes < 1024*1024*10 then 1 else 0 end ) fnum_1m_10m,
            sum(case when num_bytes >= 1024*1024*10 and num_bytes < 1024*1024*100 then 1 else 0 end ) fnum_10m_100m,
            sum(case when num_bytes >= 1024*1024*100  then 1 else 0 end ) fnum_100m,
            max(modification_time ) lastmtime,
            max(access_time ) lastatime
        from default.fsimage where fdate="2021-09-22"
            group by
            username, 
            case when num_blocks !=0 then regexp_replace(path,'/[0-9-A-z_\.=]+$',"") else path end
    ) a
) a
group by 
case 
when instr(dir,'.Trash')>0 then regexp_replace(dir,'([0-9-A-z_/]+/.Trash/[0-9-A-z_]+)(.*)',"\$1") 
when dir regexp '/user/hive/warehouse/[0-9-A-z_\.]+\.db/[0-9-A-z_\.]+' then regexp_replace(dir,'(/user/hive/warehouse/[0-9-A-z_]+\.db/[0-9-A-z_\.]+)(.*)',"\$1")
when dir regexp '/user/hive/warehouse/[0-9-A-z_\.]+\.db' then regexp_replace(dir,'(/user/hive/warehouse/[0-9-A-z_]+\.db)(.*)',"\$1")
when dir regexp '/user/hive/warehouse/[0-9-A-z_\.]+' then regexp_replace(dir,'(/user/hive/warehouse/[0-9-A-z_\.]+)(.*)',"\$1") 
when dir regexp '/[0-9-A-z_=\.]+/[0-9-A-z_=\.]+/[0-9-A-z_=\.]+/.*' then regexp_replace(dir,'(/[0-9-A-z_=\.]+)(/[0-9-A-z_=\.]+)(/[0-9-A-z_=\.]+)(.*)',"\$1\$2\$3")
else dir end,
case 
when instr(dir,'.Trash')>0 then 0 
when dir regexp '/user/hive/warehouse/[0-9-A-z_]+' then 1 
else 0 end,
case 
when instr(dir,'.Trash')>0 then null 
when dir regexp '/user/hive/warehouse/[0-9-A-z_]+\.db' then regexp_replace(dir,'(/user/hive/warehouse/)([0-9-A-z_]+)(\.db)(.*)',"\$2")  
when dir regexp '/user/hive/warehouse/[0-9-A-z_]+' then "default" 
else null end,
case 
when instr(dir,'.Trash')>0 then null 
when dir regexp '/user/hive/warehouse/[0-9-A-z_]+\.db/[0-9-A-z_\.]+' then regexp_replace(dir,'(/user/hive/warehouse/)([0-9-A-z_]+\.db/)([0-9-A-z_\.]+)(.*)',"\$3")
when dir regexp '/user/hive/warehouse/[0-9-A-z_]+\.db' then null
when dir regexp '/user/hive/warehouse/[0-9-A-z_\.]+' then regexp_replace(dir,'(/user/hive/warehouse/)([0-9-A-z_\.]+)(.*)',"\$2")
else null end

 

6,找出以某种特征结尾的文件, 有些空文件可以清理(比如_SUCCESS结尾)

select
path
from default.fsimage
where num_blocks >0 and regexp_extract(path,'.*_SUCCESS$',0)<>''
limit 50

 

 

http://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-hdfs/HdfsImageViewer.html

posted @ 2019-03-30 23:33  .狂飙的蜗牛  阅读(923)  评论(0编辑  收藏  举报