MongoDB索引的操作和优化索引查询
目录
1. mongodb索引
- 索引通常能够极大的提高查询的效率, 如果没有索引, MongoDB在读取数据时必须扫描集合中的每个文件并选取那些符合查询条件的记录.
- 这种扫描全集合的查询效率是非常低的, 特别在处理大量的数据时, 查询可以要花费几十秒甚至几分钟, 这对网站的性能是非常致命的.
- 索引是特殊的数据结构, 索引存储在一个易于遍历读取的数据集合中, 索引是对数据库表中一列或多列的值进行排序的一种结构.
2. 索引的增删查改
2.1 增加索引
# 1是正排, -1 是倒排
db.$collection.createIndex({$field:1});
# 建立复合索引
db.$collection.createIndex({$field1:1, $field2:1});
2.2 查看索引
db.$collection.getIndexes();
2.3 删除索引
db.$collecion.dropIndex($index);
# 删除全部索引
db.$collection.dropIndexes();
3. 创建索引前后的对比
4. explain的参数详解以及重点查看参数
{
"queryPlanner" : { // 查询计划
"plannerVersion" : 1,
"namespace" : "test.testindex", // 查询的表名
"indexFilterSet" : false, //针对该query是否有indexfilter ???
"parsedQuery" : { // 查询条件
"id" : {
"$lt" : 97
}
},
"winningPlan" : {// 查询优化器针对该query所返回的最优执行计划的详细内容。
"stage" : "FETCH",// FETC: 可以理解为通过返回的index位置去检索具体的文档???
"inputStage" : {
"stage" : "IXSCAN", // 表示进行的是index scanning
"keyPattern" : { // 索引内容
"id" : 1,
"age" : 1
},
"indexName" : "id_1_age_1", // 使用的索引
"isMultiKey" : false, // 非复合索引 如果索引建立在array上,此处将是true
"multiKeyPaths" : {
"id" : [ ],
"age" : [ ]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward", //此query的查询顺序,此处是forward,如果用了.sort({$filed:-1})将显示backward
"indexBounds" : {
"id" : [
"[-inf.0, 97.0)" // winningplan所扫描的索引范围 $lt:97
],
"age" : [
"[MinKey, MaxKey]"
]
}
}
},
"rejectedPlans" : [ ]
},
"executionStats" : {
"executionSuccess" : true, // 是否执行成功
"nReturned" : 97, // 结果返回条数
"executionTimeMillis" : 0, // 整体执行时间 单位毫秒
"totalKeysExamined" : 97, // 索引扫描个数
"totalDocsExamined" : 97, // 文档扫描个数
"executionStages" : {
"stage" : "FETCH",
"nReturned" : 97,
"executionTimeMillisEstimate" : 0, //该查询根据index去检索document获取97条具体数据的时间
"works" : 98,
"advanced" : 97,
"needTime" : 0,
"needYield" : 0,
"saveState" : 0,
"restoreState" : 0,
"isEOF" : 1,
"invalidates" : 0,
"docsExamined" : 97,
"alreadyHasObj" : 0,
"inputStage" : {
"stage" : "IXSCAN",
"nReturned" : 97, //最理想状态 nReturned=totalKeysExamined & totalDocsExamined=0 / nReturned=totalKeysExamined=totalDocsExamined
"executionTimeMillisEstimate" : 0,//该查询扫描97行index所用时间
"works" : 98,
"advanced" : 97,
"needTime" : 0,
"needYield" : 0,
"saveState" : 0,
"restoreState" : 0,
"isEOF" : 1,
"invalidates" : 0,
"keyPattern" : {
"id" : 1,
"age" : 1
},
"indexName" : "id_1_age_1",
"isMultiKey" : false,
"multiKeyPaths" : {
"id" : [ ],
"age" : [ ]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"id" : [
"[-inf.0, 97.0)"
],
"age" : [
"[MinKey, MaxKey]"
]
},
"keysExamined" : 97,
"seeks" : 1,
"dupsTested" : 0,
"dupsDropped" : 0,
"seenInvalidated" : 0
}
}
},
"serverInfo" : {
"host" : "centos01",
"port" : 27017,
"version" : "4.0.9",
"gitVersion" : "fc525e2d9b0e4bceff5c2201457e564362909765"
},
"ok" : 1
}
- stage的含义
- COLLSCAN
全表扫描- XSCAN
索引扫描- ETCH
根据索引去检索指定document- SHARD_MERGE
将各个分片返回数据进行merge, 但是根据源码中的信息,个人还总结了文档中没有的如下几类(常用如下,由于是通过源码查找,可能有所遗漏)- SORT
表明在内存中进行了排序(与老版本的scanAndOrder:true一致)- LIMIT
使用limit限制返回数- SKIP
使用skip进行跳过- IDHACK
针对_id进行查询- SHARDING_FILTER
通过mongos对分片数据进行查询- COUNT
利用db.coll.explain().count()之类进行count运算- COUNTSCAN
count不使用用Index进行count时的stage返回- COUNT_SCAN
count使用了Index进行count时的stage返回- SUBPLA
未使用到索引的$or查询的stage返回- TEXT
使用全文索引进行查询时候的stage返回- PROJECTION
限定返回字段时候stage的返回
- 我们希望看到的stage的组合
Fetch+IDHACK
Fetch+ixscan
Limit+(Fetch+ixscan)
PROJECTION+ixscan
SHARDING_FILTER+ixscan
等
- 不希望看到包含如下的stage:
COLLSCAN(全表扫),SORT(使用sort但是无index),不合理的SKIP,SUBPLA(未用到index的$or)
对于count查询,希望看到的有:
COUNT_SCAN不希望看到的有:
COUNTSCAN
5. Explain 分析实例
5.1 准备数据
{ "_id" : ObjectId("5ce371f2d9a9a24e713bedd3"), "a" : 0, "b" : 38, "c" : 10 }
{ "_id" : ObjectId("5ce371f2d9a9a24e713bedd4"), "a" : 1, "b" : 64, "c" : 9 }
{ "_id" : ObjectId("5ce371f2d9a9a24e713bedd5"), "a" : 2, "b" : 28, "c" : 8 }
{ "_id" : ObjectId("5ce371f2d9a9a24e713bedd6"), "a" : 0, "b" : 73, "c" : 7 }
{ "_id" : ObjectId("5ce371f2d9a9a24e713bedd7"), "a" : 1, "b" : 95, "c" : 6 }
{ "_id" : ObjectId("5ce371f2d9a9a24e713bedd8"), "a" : 2, "b" : 11, "c" : 5 }
{ "_id" : ObjectId("5ce371f2d9a9a24e713bedd9"), "a" : 0, "b" : 73, "c" : 4 }
{ "_id" : ObjectId("5ce371f2d9a9a24e713bedda"), "a" : 1, "b" : 14, "c" : 3 }
{ "_id" : ObjectId("5ce371f2d9a9a24e713beddb"), "a" : 2, "b" : 73, "c" : 2 }
{ "_id" : ObjectId("5ce371f2d9a9a24e713beddc"), "a" : 0, "b" : 0, "c" : 1 }
- 测试的查询语句
db.testindex.find({a:2,b:{$lt:90}}).sort({c:-1});
5.2 没有index的查询计划
db.testindex.find({a:2,b:{$lt:90}}).sort({c:-1}).explain("executionStats");
{
"executionStats" : {
"executionSuccess" : true,
"nReturned" : 3,
"executionTimeMillis" : 0,
"totalKeysExamined" : 0,
"totalDocsExamined" : 10,
"executionStages" : {
"stage" : "SORT",
"nReturned" : 3,
"executionTimeMillisEstimate" : 0,
"works" : 17,
"advanced" : 3,
"needTime" : 13,
"needYield" : 0,
"saveState" : 0,
"restoreState" : 0,
"isEOF" : 1,
"invalidates" : 0,
"sortPattern" : {
"c" : -1
},
"memUsage" : 153,
"memLimit" : 33554432,
"inputStage" : {
"stage" : "SORT_KEY_GENERATOR",
"nReturned" : 3,
"executionTimeMillisEstimate" : 0,
"works" : 13,
"advanced" : 3,
"needTime" : 9,
"needYield" : 0,
"saveState" : 0,
"restoreState" : 0,
"isEOF" : 1,
"invalidates" : 0,
"inputStage" : {
"stage" : "COLLSCAN",
"filter" : {
"$and" : [
{
"a" : {
"$eq" : 2
}
},
{
"b" : {
"$lt" : 90
}
}
]
},
"nReturned" : 3,
"executionTimeMillisEstimate" : 0,
"works" : 12,
"advanced" : 3,
"needTime" : 8,
"needYield" : 0,
"saveState" : 0,
"restoreState" : 0,
"isEOF" : 1,
"invalidates" : 0,
"direction" : "forward",
"docsExamined" : 10
}
}
}
},
"serverInfo" : {
"host" : "centos01",
"port" : 27017,
"version" : "4.0.9",
"gitVersion" : "fc525e2d9b0e4bceff5c2201457e564362909765"
},
"ok" : 1
}
-
nReturned为3,符合的条件的返回为3条.
-
totalKeysExamined为0,没有使用index.
-
totalDocsExamined为10,扫描了所有记录.
-
executionStages.stage为SORT,未使用index的sort,占用的内存与内存限制为”memUsage” : 153, “memLimit” : 33554432.
-
executionStages.inputStage.stage为COLLSCAN,全表扫描,扫描条件为
{
"filter":{
"$and":[
{
"a":{
"$eq":2
}
},
{
"b":{
"$lt":90
}
}
]
}
}
5.3 增加字段c的索引
db.testindex.createIndex({c:1});
{
"executionStats" : {
"executionSuccess" : true,
"nReturned" : 3,
"executionTimeMillis" : 0,
"totalKeysExamined" : 10,
"totalDocsExamined" : 10,
"executionStages" : {
"stage" : "FETCH",
"filter" : {
"$and" : [
{
"a" : {
"$eq" : 2
}
},
{
"b" : {
"$lt" : 90
}
}
]
},
"nReturned" : 3,
"executionTimeMillisEstimate" : 0,
"works" : 11,
"advanced" : 3,
"needTime" : 7,
"needYield" : 0,
"saveState" : 0,
"restoreState" : 0,
"isEOF" : 1,
"invalidates" : 0,
"docsExamined" : 10,
"alreadyHasObj" : 0,
"inputStage" : {
"stage" : "IXSCAN",
"nReturned" : 10,
"executionTimeMillisEstimate" : 0,
"works" : 11,
"advanced" : 10,
"needTime" : 0,
"needYield" : 0,
"saveState" : 0,
"restoreState" : 0,
"isEOF" : 1,
"invalidates" : 0,
"keyPattern" : {
"c" : 1
},
"indexName" : "c_1",
"isMultiKey" : false,
"multiKeyPaths" : {
"c" : [ ]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "backward",
"indexBounds" : {
"c" : [
"[MaxKey, MinKey]"
]
},
"keysExamined" : 10,
"seeks" : 1,
"dupsTested" : 0,
"dupsDropped" : 0,
"seenInvalidated" : 0
}
}
},
"serverInfo" : {
"host" : "centos01",
"port" : 27017,
"version" : "4.0.9",
"gitVersion" : "fc525e2d9b0e4bceff5c2201457e564362909765"
},
"ok" : 1
}
我们发现,Stage没有了SORT,因为我们sort字段有了index,但是由于查询还是没有index,故totalDocsExamined还是10,但是由于sort用了index,totalKeysExamined也是10,但是仅对sort排序做了优化,查询性能还是一样的低效。
5.4 使用db.testindex.ensureIndex({b:1,a:1,c:1})索引的执行计划
{
"executionStats" : {
"executionSuccess" : true,
"nReturned" : 3,
"executionTimeMillis" : 0,
"totalKeysExamined" : 10,
"totalDocsExamined" : 10,
"executionStages" : {
"stage" : "FETCH",
"filter" : {
"$and" : [
{
"a" : {
"$eq" : 2
}
},
{
"b" : {
"$lt" : 90
}
}
]
},
"nReturned" : 3,
"executionTimeMillisEstimate" : 0,
"works" : 12,
"advanced" : 3,
"needTime" : 7,
"needYield" : 0,
"saveState" : 0,
"restoreState" : 0,
"isEOF" : 1,
"invalidates" : 0,
"docsExamined" : 10,
"alreadyHasObj" : 0,
"inputStage" : {
"stage" : "IXSCAN",
"nReturned" : 10,
"executionTimeMillisEstimate" : 0,
"works" : 11,
"advanced" : 10,
"needTime" : 0,
"needYield" : 0,
"saveState" : 0,
"restoreState" : 0,
"isEOF" : 1,
"invalidates" : 0,
"keyPattern" : {
"c" : 1
},
"indexName" : "c_1",
"isMultiKey" : false,
"multiKeyPaths" : {
"c" : [ ]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "backward",
"indexBounds" : {
"c" : [
"[MaxKey, MinKey]"
]
},
"keysExamined" : 10,
"seeks" : 1,
"dupsTested" : 0,
"dupsDropped" : 0,
"seenInvalidated" : 0
}
}
}
这个索引被拒绝了, 用的还是c_1的索引.
5.5 使用db.testindex.ensureIndex({a:1,b:1,c:1})索引的执行计划
{
"executionStats" : {
"executionSuccess" : true,
"nReturned" : 3,
"executionTimeMillis" : 0,
"totalKeysExamined" : 3,
"totalDocsExamined" : 3,
"executionStages" : {
"stage" : "SORT",
"nReturned" : 3,
"executionTimeMillisEstimate" : 0,
"works" : 10,
"advanced" : 3,
"needTime" : 5,
"needYield" : 0,
"saveState" : 0,
"restoreState" : 0,
"isEOF" : 1,
"invalidates" : 0,
"sortPattern" : {
"c" : 1
},
"memUsage" : 153,
"memLimit" : 33554432,
"inputStage" : {
"stage" : "SORT_KEY_GENERATOR",
"nReturned" : 3,
"executionTimeMillisEstimate" : 0,
"works" : 5,
"advanced" : 3,
"needTime" : 1,
"needYield" : 0,
"saveState" : 0,
"restoreState" : 0,
"isEOF" : 1,
"invalidates" : 0,
"inputStage" : {
"stage" : "FETCH",
"nReturned" : 3,
"executionTimeMillisEstimate" : 0,
"works" : 4,
"advanced" : 3,
"needTime" : 0,
"needYield" : 0,
"saveState" : 0,
"restoreState" : 0,
"isEOF" : 1,
"invalidates" : 0,
"docsExamined" : 3,
"alreadyHasObj" : 0,
"inputStage" : {
"stage" : "IXSCAN",
"nReturned" : 3,
"executionTimeMillisEstimate" : 0,
"works" : 4,
"advanced" : 3,
"needTime" : 0,
"needYield" : 0,
"saveState" : 0,
"restoreState" : 0,
"isEOF" : 1,
"invalidates" : 0,
"keyPattern" : {
"a" : 1,
"b" : 1,
"c" : 1
},
"indexName" : "a_1_b_1_c_1",
"isMultiKey" : false,
"multiKeyPaths" : {
"a" : [ ],
"b" : [ ],
"c" : [ ]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"a" : [
"[2.0, 2.0]"
],
"b" : [
"[-inf.0, 90.0)"
],
"c" : [
"[MinKey, MaxKey]"
]
},
"keysExamined" : 3,
"seeks" : 1,
"dupsTested" : 0,
"dupsDropped" : 0,
"seenInvalidated" : 0
}
}
}
}
},
"serverInfo" : {
"host" : "centos01",
"port" : 27017,
"version" : "4.0.9",
"gitVersion" : "fc525e2d9b0e4bceff5c2201457e564362909765"
},
"ok" : 1
}
我们可以看到
- nReturned为3,返回3条记录
- totalKeysExamined为3,扫描了3个index
- totalDocsExamined为3,扫描了3个docs
- 此时nReturned=totalDocsExamined=totalKeysExamined,符合我们的期望。看起来很美吧?
- 但是,但是,但是!重要的事情说三遍!executionStages.Stage为Sort,在内存中进行排序了,这个在生产环境中尤其是在数据量较大的时候,是非常消耗性能的,这个千万不能忽视了,我们需要改进这个点。
5.6 使用db.testindex.ensureIndex({a:1, c:1, b:1)
{
"executionStats" : {
"executionSuccess" : true,
"nReturned" : 3,
"executionTimeMillis" : 0,
"totalKeysExamined" : 3,
"totalDocsExamined" : 3,
"executionStages" : {
"stage" : "FETCH",
"nReturned" : 3,
"executionTimeMillisEstimate" : 0,
"works" : 5,
"advanced" : 3,
"needTime" : 0,
"needYield" : 0,
"saveState" : 0,
"restoreState" : 0,
"isEOF" : 1,
"invalidates" : 0,
"docsExamined" : 3,
"alreadyHasObj" : 0,
"inputStage" : {
"stage" : "IXSCAN",
"nReturned" : 3,
"executionTimeMillisEstimate" : 0,
"works" : 4,
"advanced" : 3,
"needTime" : 0,
"needYield" : 0,
"saveState" : 0,
"restoreState" : 0,
"isEOF" : 1,
"invalidates" : 0,
"keyPattern" : {
"a" : 1,
"c" : 1,
"b" : 1
},
"indexName" : "a_1_c_1_b_1",
"isMultiKey" : false,
"multiKeyPaths" : {
"a" : [ ],
"c" : [ ],
"b" : [ ]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"a" : [
"[2.0, 2.0]"
],
"c" : [
"[MinKey, MaxKey]"
],
"b" : [
"[-inf.0, 90.0)"
]
},
"keysExamined" : 3,
"seeks" : 1,
"dupsTested" : 0,
"dupsDropped" : 0,
"seenInvalidated" : 0
}
}
},
"serverInfo" : {
"host" : "centos01",
"port" : 27017,
"version" : "4.0.9",
"gitVersion" : "fc525e2d9b0e4bceff5c2201457e564362909765"
},
"ok" : 1
}
我们可以看到
- nReturned为3,返回3条记录
- totalKeysExamined为3,扫描了3个index
- totalDocsExamined为3,扫描了3个docs
- nReturned=totalKeysExamined=totalDocsExamined,Stage无Sort,即利用了index进行排序,而非内存,这个性能的提升高于多扫几个index的代价。
- 综上可以有一个小结论,当查询覆盖精确匹配,范围查询与排序的时候,
精确匹配字段,排序字段,范围查询字段这样的索引排序会更为高效。