#连接的是mongos路由 [root@test1 bin]# ./mongo --port 27017 mongos> sh.status() --- Sharding Status --- sharding version: { "_id" : 1, "minCompatibleVersion" : 5, "currentVersion" : 6, "clusterId" : ObjectId("5be2a93b4c4972e711620a02") } shards: #显示的是分片的数据库 { "_id" : "shard1", "host" : "shard1/,test4:30001", "state" : 1 } { "_id" : "shard2", "host" : "shard2/,test3:30002", "state" : 1 } { "_id" : "shard3", "host" : "shard3/,test2:30003", "state" : 1 } active mongoses: #mongodb的版本号 "3.4.2" : 1 autosplit: #是否开启自动分片 Currently enabled: yes balancer: #均衡器(会在后面解释) Currently enabled: yes Currently running: no Balancer lock taken at Wed Nov 07 2018 18:26:23 GMT+0800 (CST) by ConfigServer:Balancer Failed balancer rounds in last 5 attempts: 0 Migration Results for the last 24 hours: 4 : Success databases: #分片的数据 { "_id" : "mytest", "primary" : "shard2", "partitioned" : true } mytest.test shard key: { "id" : 1 } #分片键 unique: false #是否唯一 balancing: true #是否均衡 chunks: #每个分片含有数据的块数 ,后面详细列出了数据在每个分片的块的范围! shard1 3 shard2 2 shard3 2 { "id" : { "$minKey" : 1 } } -->> { "id" : 2 } on : shard1 Timestamp(5, 1) { "id" : 2 } -->> { "id" : 22 } on : shard3 Timestamp(3, 0) { "id" : 22 } -->> { "id" : 171218 } on : shard2 Timestamp(4, 1) { "id" : 171218 } -->> { "id" : 373212 } on : shard2 Timestamp(3, 3) { "id" : 373212 } -->> { "id" : 544408 } on : shard1 Timestamp(4, 2) { "id" : 544408 } -->> { "id" : 742999 } on : shard1 Timestamp(4, 3) { "id" : 742999 } -->> { "id" : { "$maxKey" : 1 } } on : shard3 Timestamp(5, 0) mongos>
shard_cfg:PRIMARY> show dbs; admin 0.000GB config 0.001GB local 0.001GB shard_cfg:PRIMARY> use config switched to db config shard_cfg:PRIMARY> show tables; #库中的文档会在下面详细介绍 actionlog changelog chunks collections databases lockpings locks migrations mongos shards tags version
shard_cfg:PRIMARY> db.chunks.count() #共有7个块
shard_cfg:PRIMARY> db.chunks.find().limit(1).pretty() #查看第一个块的信息
"_id" : "mytest.test-id_MinKey",
"lastmod" : Timestamp(5, 1),
"lastmodEpoch" : ObjectId("5be2ce5986b5988b373c7cca"),
"ns" : "mytest.test", #命名空间
"min" : {
"id" : { "$minKey" : 1 } #分片键的最小值
"max" : { #分片键的最大值
"id" : 2
"shard" : "shard1" #在哪个分片
shard_cfg:PRIMARY> db.chunks.find({"shard":"shard1"}).count()
shard_cfg:PRIMARY> db.chunks.find({"shard":"shard2"}).count()
shard_cfg:PRIMARY> db.chunks.find({"shard":"shard3"}).count()
#每个分片集群中含有的数据数目,【这个算均衡吧,毕竟每条文档只有几个字节,ok这不重要,只要知道是均衡的就可以】 shard1:PRIMARY> db.test.count() 369788 shard2:PRIMARY> db.test.count() 373190 shard3:PRIMARY> db.test.count() 257022
shard_cfg:PRIMARY> db.changelog.find({what: "split"}).count() #发生了多少次分割(因为只第一次批量插入了数据,因此没有分割) 0 shard_cfg:PRIMARY> db.changelog.find({what: "moveChunk.commit"}).count() #发生迁移的次数 4
#再一次插入上面的数据,来观察分割与迁移 【这次插入2千万条数据,会很慢】
mongos> use mytest
switched to db mytest
mongos> for(var i = 1; i < 2000000; i++){
...{id: i, name: "test2"})
... }
WriteResult({ "nInserted" : 1 })
mongos> sh.status()
--- Sharding Status ---
sharding version: {
"_id" : 1,
"minCompatibleVersion" : 5,
"currentVersion" : 6,
"clusterId" : ObjectId("5be2a93b4c4972e711620a02")
{ "_id" : "shard1", "host" : "shard1/,test4:30001", "state" : 1 }
{ "_id" : "shard2", "host" : "shard2/,test3:30002", "state" : 1 }
{ "_id" : "shard3", "host" : "shard3/,test2:30003", "state" : 1 }
active mongoses:
"3.4.2" : 1
Currently enabled: yes
Currently enabled: yes
Currently running: no
Balancer lock taken at Wed Nov 07 2018 18:26:23 GMT+0800 (CST) by ConfigServer:Balancer
Failed balancer rounds in last 5 attempts: 0
Migration Results for the last 24 hours:
7 : Success
1 : Failed with error 'aborted', from shard2 to shard3
{ "_id" : "mytest", "primary" : "shard2", "partitioned" : true }
shard key: { "id" : 1 }
unique: false
balancing: true
shard1 5
shard2 4
shard3 4
{ "id" : { "$minKey" : 1 } } -->> { "id" : 2 } on : shard1 Timestamp(8, 1)
{ "id" : 2 } -->> { "id" : 22 } on : shard3 Timestamp(7, 1)
{ "id" : 22 } -->> { "id" : 171218 } on : shard2 Timestamp(6, 1)
{ "id" : 171218 } -->> { "id" : 256816 } on : shard3 Timestamp(6, 0)
{ "id" : 256816 } -->> { "id" : 342414 } on : shard2 Timestamp(5, 3)
{ "id" : 342414 } -->> { "id" : 373212 } on : shard2 Timestamp(5, 4)
{ "id" : 373212 } -->> { "id" : 544408 } on : shard1 Timestamp(4, 2)
{ "id" : 544408 } -->> { "id" : 742999 } on : shard1 Timestamp(4, 3)
{ "id" : 742999 } -->> { "id" : 828597 } on : shard3 Timestamp(6, 2)
{ "id" : 828597 } -->> { "id" : 1000000 } on : shard3 Timestamp(6, 3)
{ "id" : 1000000 } -->> { "id" : 1249999 } on : shard1 Timestamp(7, 2)
{ "id" : 1249999 } -->> { "id" : 1603980 } on : shard1 Timestamp(7, 3)
{ "id" : 1603980 } -->> { "id" : { "$maxKey" : 1 } } on : shard2 Timestamp(8, 0)
shard_cfg:PRIMARY> use config
switched to db config
shard_cfg:PRIMARY> db.changelog.find({what: "split"}).count()
shard_cfg:PRIMARY> db.changelog.find({what: "moveChunk.commit"}).count()
- 每个分片维护自己的索引。当在分片集合上声明索引时,每个分片都会为自己的集合部分定义单独的索引。
- 它遵循分片集合在每个分片上应该拥有的相同的索引原理。如果不是,就无法保证查询性能的一致性。
- 分片集合只允许在_id字段和分片键之间建立唯一索引。禁止其他地方建立唯一索引,因为强制唯一索引性需要在分片之间进行通信,这是由mongodb分片集群底层工作机制决定的。
shards: { "_id" : "shard1", "host" : "shard1/,test4:30001", "state" : 1 } { "_id" : "shard2", "host" : "shard2/,test3:30002", "state" : 1 } { "_id" : "shard3", "host" : "shard3/,test2:30003", "state" : 1 } 此时各个分片对应的数据块如下: chunks: shard1 5 shard2 4 shard3 4 ###########假设现在我们要删除最后一个分片集合,也就是删除shard3. mongos> sh.setBalancerState("true") #保证均衡器是开着的 { "ok" : 1 } mongos> use admin #进入admin库中 switched to db admin mongos> db.runCommand({removeshard: "shard3/,test2:30003"}) #删除分片 { "msg" : "draining started successfully", "state" : "started", "shard" : "shard3", "note" : "you need to drop or movePrimary these databases", "dbsToMove" : [ ], "ok" : 1 } #表示数据正在迁移到其他的库上 ##再次执行这条命令查看迁移的状态 mongos> db.runCommand({removeshard: "shard3/,test2:30003"}) { "msg" : "draining ongoing", "state" : "ongoing", "remaining" : { "chunks" : NumberLong(4), "dbs" : NumberLong(0) }, "note" : "you need to drop or movePrimary these databases", "dbsToMove" : [ ], "ok" : 1 } #这里遇见一个问题note提示说明shard3分片是整个集群的主分片【插入数据的时候数据先插入这个主分片然后,再迁移到其他的分片】,但是查看config库,发现主分片不是这个! mongos> use config switched to db config mongos> db.databases.find() #这里显示主分片是shard2 { "_id" : "mytest", "primary" : "shard2", "partitioned" : true } 【转移主分片的命令如下 mongos> db.runCommand({movePrimary: "mytest", to : "shard1"}) { "primary" : "shard1:shard1/,test4:30001", "ok" : 1 } 】 #然后删除就一直在这里,暂时未找到原因
mongos> use config #库中的数据和congisvr服务器中的数据是一样的! switched to db config mongos> show tables; actionlog changelog chunks collections databases lockpings locks migrations mongos shards tags version mongos> db.shards.find() #查看分片信息 { "_id" : "shard1", "host" : "shard1/,test4:30001", "state" : 1 } { "_id" : "shard2", "host" : "shard2/,test3:30002", "state" : 1 } { "_id" : "shard3", "host" : "shard3/,test2:30003", "state" : 1 } mongos> db.databases.find() #查看分片中数据库的信息,因为只有一个没有test库 { "_id" : "mytest", "primary" : "shard2", "partitioned" : true } #primary:说明这个分片是整个集群的主分片,插入数据的时候数据先插入这个主分片然后,再迁移到其他的分片。 mongos> db.collections.find().pretty() #分片的集合信息 { "_id" : "mytest.test", "lastmodEpoch" : ObjectId("5be4d47fd271124654f1411e"), "lastmod" : ISODate("1970-02-19T17:02:47.412Z"), "dropped" : false, "key" : { #分片键 "id" : 1 }, "unique" : false #分片键是否是唯一的 } mongos> db.mongos.find().pretty() #查看集群中所有的mongos路由信息 { "_id" : "test1:27017", "ping" : ISODate("2018-11-09T02:18:35.796Z"), "up" : NumberLong(69215), "waiting" : true, "mongoVersion" : "3.4.2" } { "_id" : "test3:27017", "ping" : ISODate("2018-11-09T02:18:35.594Z"), "up" : NumberLong(68335), "waiting" : true, "mongoVersion" : "3.4.2" } mongos> db.locks.find().pretty() #均衡器锁的信息:config.locks,记录所有集群范围的锁,可得知哪个mongos是均衡器 { "_id" : "balancer", "state" : 2, #0非活动状态、1尝试得到锁,但还没得到,2表示正在进行均衡 "ts" : ObjectId("5be3df32c3d597874a374d77"), "who" : "ConfigServer:Balancer", "process" : "ConfigServer", "when" : ISODate("2018-11-08T07:01:56.119Z"), "why" : "CSRS Balancer" #集群均衡器 } { "_id" : "mytest", "state" : 0, "ts" : ObjectId("5be4d7eed271124654f14137"), "who" : "test1:27017:1541660698:8505579169655688671:conn5", "process" : "test1:27017:1541660698:8505579169655688671", "when" : ISODate("2018-11-09T00:42:22.797Z"), "why" : "enableSharding" #分片功能 } { "_id" : "mytest.test", "state" : 0, "ts" : ObjectId("5be3df32c3d597874a374d77"), "who" : "ConfigServer:Balancer", "process" : "ConfigServer", "when" : ISODate("2018-11-09T01:51:38.065Z"), "why" : "Migrating chunk(s) in collection mytest.test" #合并块 } mongos> db.chunks.find().count() #总共有211个块 211 mongos> db.chunks.find().limit(1).pretty() #查看其中的一个块信息 { "_id" : "mytest.test-id_MinKey", "lastmod" : Timestamp(2, 0), "lastmodEpoch" : ObjectId("5be4d47fd271124654f1411e"), "ns" : "mytest.test", "min" : { "id" : { "$minKey" : 1 } }, "max" : { "id" : 250001 }, "shard" : "shard1" } mongos> db.changelog.find().count() #记录的是分片操作日志 1167 mongos> db.changelog.find().limit(1).pretty() #查看一个日志的文档 { "_id" : "test1-2018-11-08T15:05:38.318+0800-5be3e042c3d597874a3758b9", "server" : "test1", "clientAddr" : "", "time" : ISODate("2018-11-08T07:05:38.318Z"), "what" : "addShard", #可以根据what的动作不同来筛选出需要的信息。 "ns" : "", "details" : { "name" : "shard1", "host" : "shard1/test4:30001," } } mongos> db.changelog.find({what: "split"}).count() #分割发生的次数 0 mongos> db.changelog.find({what: "moveChunk.commit"}).count() #移动块的次数 140 mongos>db.adminCommand({"connPoolStats":1}) #查看分片的网络连接数据
不知道为什么,我搭建的这个集群没有settings这个集合 ,mongodb3.4以及mongodb3.6的官方文档都有这个集合,不知道为什么这里没有?
mongos> db.chunks.stats() { "sharded" : false, "primary" : "config", "ns" : "config.chunks", "size" : 34746, "count" : 211, "avgObjSize" : 164, "storageSize" : 53248, "capped" : false, "wiredTiger" : { "metadata" : { "formatVersion" : 1 }, "creationString" : "access_pattern_hint=none,allocation_size=4KB,app_metadata=(formatVersion=1),block_allocation=best,block_compressor=snappy,cache_resident=false,checksum=on,colgroups=,collator=,columns=,dictionary=0,encryption=(keyid=,name=),exclusive=false,extractor=,format=btree,huffman_key=,huffman_value=,ignore_in_memory_cache_size=false,immutable=false,internal_item_max=0,internal_key_max=0,internal_key_truncate=true,internal_page_max=4KB,key_format=q,key_gap=10,leaf_item_max=0,leaf_key_max=0,leaf_page_max=32KB,leaf_value_max=64MB,log=(enabled=true),lsm=(auto_throttle=true,bloom=true,bloom_bit_count=16,bloom_config=,bloom_hash_count=8,bloom_oldest=false,chunk_count_limit=0,chunk_max=5GB,chunk_size=10MB,merge_max=15,merge_min=0),memory_page_max=10m,os_cache_dirty_max=0,os_cache_max=0,prefix_compression=false,prefix_compression_min=4,source=,split_deepen_min_child=0,split_deepen_per_child=0,split_pct=90,type=file,value_format=u", "type" : "file", "uri" : "statistics:table:collection-14--1825262580711858791", "LSM" : { "bloom filter false positives" : 0, "bloom filter hits" : 0, "bloom filter misses" : 0, "bloom filter pages evicted from cache" : 0, "bloom filter pages read into cache" : 0, "bloom filters in the LSM tree" : 0, "chunks in the LSM tree" : 0, "highest merge generation in the LSM tree" : 0, "queries that could have benefited from a Bloom filter that did not exist" : 0, "sleep for LSM checkpoint throttle" : 0, "sleep for LSM merge throttle" : 0, "total size of bloom filters" : 0 }, "block-manager" : { "allocations requiring file extension" : 11, "blocks allocated" : 180, "blocks freed" : 50, "checkpoint size" : 12288, "file allocation unit size" : 4096, "file bytes available for reuse" : 24576, "file magic number" : 120897, "file major version number" : 1, "file size in bytes" : 53248, "minor version number" : 0 }, "btree" : { "btree checkpoint generation" : 1335, "column-store fixed-size leaf pages" : 0, "column-store internal pages" : 0, "column-store variable-size RLE encoded values" : 0, "column-store variable-size deleted values" : 0, "column-store variable-size leaf pages" : 0, "fixed-record size" : 0, "maximum internal page key size" : 368, "maximum internal page size" : 4096, "maximum leaf page key size" : 2867, "maximum leaf page size" : 32768, "maximum leaf page value size" : 67108864, "maximum tree depth" : 3, "number of key/value pairs" : 0, "overflow pages" : 0, "pages rewritten by compaction" : 0, "row-store internal pages" : 0, "row-store leaf pages" : 0 }, "cache" : { "bytes currently in the cache" : 80421, "bytes read into cache" : 0, "bytes written from cache" : 1194155, "checkpoint blocked page eviction" : 0, "data source pages selected for eviction unable to be evicted" : 0, "hazard pointer blocked page eviction" : 0, "in-memory page passed criteria to be split" : 0, "in-memory page splits" : 0, "internal pages evicted" : 0, "internal pages split during eviction" : 0, "leaf pages split during eviction" : 0, "modified pages evicted" : 0, "overflow pages read into cache" : 0, "overflow values cached in memory" : 0, "page split during eviction deepened the tree" : 0, "page written requiring lookaside records" : 0, "pages read into cache" : 0, "pages read into cache requiring lookaside entries" : 0, "pages requested from the cache" : 5640, "pages written from cache" : 95, "pages written requiring in-memory restoration" : 0, "unmodified pages evicted" : 0 }, "cache_walk" : { "Average difference between current eviction generation when the page was last considered" : 0, "Average on-disk page image size seen" : 0, "Clean pages currently in cache" : 0, "Current eviction generation" : 0, "Dirty pages currently in cache" : 0, "Entries in the root page" : 0, "Internal pages currently in cache" : 0, "Leaf pages currently in cache" : 0, "Maximum difference between current eviction generation when the page was last considered" : 0, "Maximum page size seen" : 0, "Minimum on-disk page image size seen" : 0, "On-disk page image sizes smaller than a single allocation unit" : 0, "Pages created in memory and never written" : 0, "Pages currently queued for eviction" : 0, "Pages that could not be queued for eviction" : 0, "Refs skipped during cache traversal" : 0, "Size of the root page" : 0, "Total number of pages currently in cache" : 0 }, "compression" : { "compressed pages read" : 0, "compressed pages written" : 52, "page written failed to compress" : 0, "page written was too small to compress" : 43, "raw compression call failed, additional data available" : 0, "raw compression call failed, no additional data available" : 0, "raw compression call succeeded" : 0 }, "cursor" : { "bulk-loaded cursor-insert calls" : 0, "create calls" : 19, "cursor-insert key and value bytes inserted" : 89436, "cursor-remove key bytes removed" : 0, "cursor-update value bytes updated" : 0, "insert calls" : 538, "next calls" : 669, "prev calls" : 1, "remove calls" : 0, "reset calls" : 5658, "restarted searches" : 0, "search calls" : 6051, "search near calls" : 0, "truncate calls" : 0, "update calls" : 0 }, "reconciliation" : { "dictionary matches" : 0, "fast-path pages deleted" : 0, "internal page key bytes discarded using suffix compression" : 21, "internal page multi-block writes" : 0, "internal-page overflow keys" : 0, "leaf page key bytes discarded using prefix compression" : 0, "leaf page multi-block writes" : 10, "leaf-page overflow keys" : 0, "maximum blocks required for a page" : 0, "overflow values written" : 0, "page checksum matches" : 1, "page reconciliation calls" : 86, "page reconciliation calls for eviction" : 0, "pages deleted" : 0 }, "session" : { "object compaction" : 0, "open cursor count" : 2 }, "transaction" : { "update conflicts" : 0 } }, "nindexes" : 4, "totalIndexSize" : 147456, "indexSizes" : { "_id_" : 36864, "ns_1_min_1" : 36864, "ns_1_shard_1_min_1" : 36864, "ns_1_lastmod_1" : 36864 }, "ok" : 1 }
- 热点 某些分片键会导致所有的读和写操作都在单个数据块或单个分片上。这可能导致单个分片服务器严重不堪重负,而其他分片服务器闲置,无所事事。
- 不可分割数据块 过于粗粒度的分片键可能导致许多文档使用相同的分片键。因为分片是基于分片键值的范围,所以意味着这些文档不能被分割为多个数据块,这个最终会闲置mongdb均匀分布数据的能力。
- 槽糕的定位 可以均匀的分布写操作,但是分片键与某些查询没有关联,也会导致性能很差。
mongos>{"files_id":"hashed"}) mongos> sh.enableSharding("foo") { "ok" : 1 } mongos> sh.shardCollection("foo.fs.chunks",{"files_id":"hashed"}) { "collectionsharded" : " foo.fs.chunks ", "ok" : 1 }
mongos > sh.addShardTag("shard0000", "T") mongos > sh.addShardTag("shard0001", "Q") mongos > sh.addShardTag("shard0002", "Q") mongos> sh.addTagRange("foo.ips",{ "ip": " ", … , "ip": " "}}, "T") mongos> sh.addTagRange("foo.ips",{ "ip": " ", … , "ip": " "}}, "Q")
- 备份分片集群数据的时候,要注意的第一件事情就是可能发生数据迁移。这意味,除非备份是在同一个时间点,否则一定会丢失数据。
- 备份分片集群时,也必须备份配置服务器元数据。因此,可以指定单配置服务器备份,因为所有的配置服务器的数据是一样的。
- 特别注意,以上的两个备份,需要设置禁止数据块迁移之后再做!
mongos> use config switched to db config mongos> sh.stopBalancer() #禁用均衡器 mongos> use config switched to db config mongos> sh.startBalancer() #开启均衡器
