hadoop块的存储方式

hadoop上默认块的大小为64M

当文件小于64M时,以一个block计算,在实际的文件系统中,仅占原始文件的大小,外加一个.meta文件

例如:

中等文件,40M

[root@ungeo12 current]# 

[root@ungeo8 xliu]# /usr/local/hadoop/bin/hadoop dfs -put  /home/xliu/hadoop-0.20.3-dev.tar.gz  /

[root@ungeo8 xliu]# /usr/local/hadoop/bin/hadoop dfs -lsr /

-rw-r--r--   3 root supergroup   40972760 2012-10-27 23:00 /hadoop-0.20.3-dev.tar.gz

[root@ungeo8 xliu]# hadoop fsck /hadoop-0.20.3-dev.tar.gz -blocks -locations -files

/hadoop-0.20.3-dev.tar.gz 40972760 bytes, 1 block(s):  OK

0. blk_-6680210538719580844_1005 len=40972760 repl=3 [192.168.1.11:50010, 192.168.1.10:50010, 192.168.1.12:50010]

 

Status: HEALTHY

 Total size:    40972760 B

 Total dirs:    0

 Total files:   1

 Total blocks (validated):      1 (avg. block size 40972760 B)

 Minimally replicated blocks:   1 (100.0 %)

 Over-replicated blocks:        0 (0.0 %)

 Under-replicated blocks:       0 (0.0 %)

 Mis-replicated blocks:         0 (0.0 %)

 Default replication factor:    3

 Average block replication:     3.0

 Corrupt blocks:                0

 Missing replicas:              0 (0.0 %)

 Number of data-nodes:          3

 Number of racks:               1

 

 

The filesystem under path '/hadoop-0.20.3-dev.tar.gz' is HEALTHY

[root@ungeo8 xliu]# du -sh /home/xliu/hadoop-0.20.3-dev.tar.gz 

40M     /home/xliu/hadoop-0.20.3-dev.tar.gz  ##在linux系统上原始的文件大小!

登陆到其中的一台datanode上查看相关的文件信息:

[root@ungeo10 current]# find . -name "*6680210538719580844*"

./blk_-6680210538719580844

./blk_-6680210538719580844_1005.meta

[root@ungeo10 current]# du -sh *6680210538719580844*

40M     blk_-6680210538719580844

320K    blk_-6680210538719580844_1005.meta

[root@ungeo10 current]# 

 

大文件:81M

测试一个大于64M的文件

[root@ungeo8 xliu]# du -sh /home/xliu/jdk-6u21-linux-i586.bin 

81M     /home/xliu/jdk-6u21-linux-i586.bin

[root@ungeo8 xliu]# /usr/local/hadoop/bin/hadoop dfs -put  /home/xliu/jdk-6u21-linux-i586.bin  /

[root@ungeo8 xliu]# /usr/local/hadoop/bin/hadoop dfs -lsr /

-rw-r--r--   3 root supergroup   40972760 2012-10-27 23:00 /hadoop-0.20.3-dev.tar.gz

-rw-r--r--   3 root supergroup   83854743 2012-10-27 23:12 /jdk-6u21-linux-i586.bin

 

[root@ungeo8 xliu]# hadoop fsck /jdk-6u21-linux-i586.bin -blocks -locations -files;

/jdk-6u21-linux-i586.bin 83854743 bytes, 2 block(s):  OK

0. blk_6729201486232919162_1006 len=67108864 repl=3 [192.168.1.10:50010, 192.168.1.11:50010, 192.168.1.12:50010]

1. blk_-2661147997145735854_1006 len=16745879 repl=3 [192.168.1.10:50010, 192.168.1.11:50010, 192.168.1.12:50010]

 

Status: HEALTHY

 Total size:    83854743 B

 Total dirs:    0

 Total files:   1

 Total blocks (validated):      2 (avg. block size 41927371 B)

 Minimally replicated blocks:   2 (100.0 %)

 Over-replicated blocks:        0 (0.0 %)

 Under-replicated blocks:       0 (0.0 %)

 Mis-replicated blocks:         0 (0.0 %)

 Default replication factor:    3

 Average block replication:     3.0

 Corrupt blocks:                0

 Missing replicas:              0 (0.0 %)

 Number of data-nodes:          3

 Number of racks:               1

 

 

The filesystem under path '/jdk-6u21-linux-i586.bin' is HEALTHY

[root@ungeo8 xliu]# 

 

[root@ungeo12 ~]# cd   /usr/local/hadoop

[root@ungeo12 hadoop]# ls

cAPACHE-README.txt  CHANGES.txt  hadoop-0.20.1-dev-ant.jar       hadoop-0.20.1-dev-tools.jar  LICENSE.txt  tmp

bin                conf         hadoop-0.20.1-dev-core.jar      ivy                          NOTICE.txt   webapps

block              contrib      hadoop-0.20.1-dev-examples.jar  ivy.xml                      README.txt   YAHOO-CHANGES.txt

build.xml          docs         hadoop-0.20.1-dev-test.jar      lib                          src

[root@ungeo12 hadoop]# cd block/

[root@ungeo12 block]# ls

blocksBeingWritten  current  detach  in_use.lock  storage  tmp

[root@ungeo12 block]# cd current/

[root@ungeo12 current]# ls

blk_-2661147997145735854            blk_-6680210538719580844            blk_6729201486232919162            dncp_block_verification.log.curr

blk_-2661147997145735854_1006.meta  blk_-6680210538719580844_1005.meta  blk_6729201486232919162_1006.meta  VERSION

[root@ungeo12 current]# pwd

/usr/local/hadoop/block/current

[root@ungeo12 current]# ls

blk_-2661147997145735854            blk_-6680210538719580844            blk_6729201486232919162            dncp_block_verification.log.curr

blk_-2661147997145735854_1006.meta  blk_-6680210538719580844_1005.meta  blk_6729201486232919162_1006.meta  VERSION

[root@ungeo12 current]# find .  -name "*6729201486232919162*"

./blk_6729201486232919162_1006.meta

./blk_6729201486232919162

[root@ungeo12 current]# du -sh *6729201486232919162*

65M     blk_6729201486232919162  ##块1

520K    blk_6729201486232919162_1006.meta

[root@ungeo12 current]# 

[root@ungeo12 current]# du  -sh *2661147997145735854*

16M     blk_-2661147997145735854 ##块2

132K    blk_-2661147997145735854_1006.meta

原文地址:http://blog.chinaunix.net/uid-20776139-id-3388475.html

posted @ 2012-11-05 22:45  出发一路向北  阅读(662)  评论(0编辑  收藏  举报