Nutch2.3分布执行过程中Mongodb中数据的变化
inject
$ nutch inject /opt/nutch/runtime/local/urls/
> db.stats()
{
"db" : "nutch",
"collections" : 3,
"objects" : 11,
"avgObjSize" : 176,
"dataSize" : 1936,
"storageSize" : 24576,
"numExtents" : 3,
"indexes" : 1,
"indexSize" : 8176,
"fileSize" : 16777216,
"nsSizeMB" : 16,
"dataFileVersion" : {
"major" : 4,
"minor" : 5
},
"extentFreeList" : {
"num" : 0,
"totalSize" : 0
},
"ok" : 1
}
generate
$ nutch generate -topN 10
> db.stats()
{
"db" : "nutch",
"collections" : 3,
"objects" : 11,
"avgObjSize" : 315.6363636363636,
"dataSize" : 3472,
"storageSize" : 24576,
"numExtents" : 3,
"indexes" : 1,
"indexSize" : 8176,
"fileSize" : 16777216,
"nsSizeMB" : 16,
"dataFileVersion" : {
"major" : 4,
"minor" : 5
},
"extentFreeList" : {
"num" : 0,
"totalSize" : 0
},
"ok" : 1
}
fetch
$ nutch fetch -all -threads 30
> db.stats()
{
"db" : "nutch",
"collections" : 3,
"objects" : 11,
"avgObjSize" : 6536.727272727273,
"dataSize" : 71904,
"storageSize" : 1073152,
"numExtents" : 4,
"indexes" : 1,
"indexSize" : 8176,
"fileSize" : 16777216,
"nsSizeMB" : 16,
"dataFileVersion" : {
"major" : 4,
"minor" : 5
},
"extentFreeList" : {
"num" : 0,
"totalSize" : 0
},
"ok" : 1
}
parse
$ nutch parse -all
> db.stats()
{
"db" : "nutch",
"collections" : 3,
"objects" : 11,
"avgObjSize" : 6536.727272727273,
"dataSize" : 71904,
"storageSize" : 1073152,
"numExtents" : 4,
"indexes" : 1,
"indexSize" : 8176,
"fileSize" : 16777216,
"nsSizeMB" : 16,
"dataFileVersion" : {
"major" : 4,
"minor" : 5
},
"extentFreeList" : {
"num" : 0,
"totalSize" : 0
},
"ok" : 1
}
updatedb
$ nutch update -all
> db.stats()
{
"db" : "nutch",
"collections" : 3,
"objects" : 89,
"avgObjSize" : 1233.9775280898875,
"dataSize" : 109824,
"storageSize" : 1073152,
"numExtents" : 4,
"indexes" : 1,
"indexSize" : 8176,
"fileSize" : 16777216,
"nsSizeMB" : 16,
"dataFileVersion" : {
"major" : 4,
"minor" : 5
},
"extentFreeList" : {
"num" : 0,
"totalSize" : 0
},
"ok" : 1
}
index
$ nutch index -all
> db.stats()
{
"db" : "nutch",
"collections" : 3,
"objects" : 89,
"avgObjSize" : 1233.9775280898875,
"dataSize" : 109824,
"storageSize" : 1073152,
"numExtents" : 4,
"indexes" : 1,
"indexSize" : 8176,
"fileSize" : 16777216,
"nsSizeMB" : 16,
"dataFileVersion" : {
"major" : 4,
"minor" : 5
},
"extentFreeList" : {
"num" : 0,
"totalSize" : 0
},
"ok" : 1
}
作者:无言
如果您认为阅读这篇博客让您有些收获,不妨点击一下右下角的【推荐】
如果您希望与我交流互动,欢迎微博互粉
本文版权归作者和博客园共有,欢迎转载,但未经作者同意必须保留此段声明,且在文章页面明显位置给出原文连接,否则保留追究法律责任的权利。