分片集群
mongodb分片+副本集方式部署集群
介绍
背景:mongodb集群搭建方式有三种,1、主从(官方已经不推荐),2、副本集,3、分片。这里介绍如何通过分片sharding方式搭建mongodb集群。sharding集群方式也基于副本集,在搭建过程中,需要对分片和配置节点做副本集。最后将做好的副本集的分片加入到路由节点,构成集群。
sharding方式的集群中,有三类角色,分别是shard,config,router。如下图所示。
一、配置准备
创建存放数据的文件夹
mkdir -p /usr/local/software/mongodb/mongo-cluster/data/shard01
mkdir -p /usr/local/software/mongodb/mongo-cluster/data/shard02
mkdir -p /usr/local/software/mongodb/mongo-cluster/data/shard21
mkdir -p /usr/local/software/mongodb/mongo-cluster/data/shard22
mkdir -p /usr/local/software/mongodb/mongo-cluster/data/config
mkdir -p /usr/local/software/mongodb/mongo-cluster/data/config2
mkdir -p /usr/local/software/mongodb/mongo-cluster/data/config3
shard01.conf
port=27018
fork=true
dbpath=/usr/local/software/mongodb/mongo-cluster/data/shard01
logpath=/usr/local/software/mongodb/mongo-cluster/logs/shard01.log
logappend=true
bind_ip=127.0.0.1
# 设置副本集名称
replSet=shard1
# 声明这是一个集群的分片
shardsvr=true
shard02.conf
port=27019
fork=true
dbpath=/usr/local/software/mongodb/mongo-cluster/data/shard02
logpath=/usr/local/software/mongodb/mongo-cluster/logs/shard02.log
logappend=true
bind_ip=127.0.0.1
# 设置副本集名称
replSet=shard1
# 声明这是一个集群的分片
shardsvr=true
两个shard实例
shard21.conf
port=27118
fork=true
dbpath=/usr/local/software/mongodb/mongo-cluster/data/shard21
logpath=/usr/local/software/mongodb/mongo-cluster/logs/shard21.log
logappend=true
bind_ip=127.0.0.1
# 设置副本集名称
replSet=shard2
# 声明这是一个集群的分片
shardsvr=true
shard22.conf
port=27119
fork=true
dbpath=/usr/local/software/mongodb/mongo-cluster/data/shard22
logpath=/usr/local/software/mongodb/mongo-cluster/logs/shard22.log
logappend=true
bind_ip=127.0.0.1
# 设置副本集名称
replSet=shard2
# 声明这是一个集群的分片
shardsvr=true
config.conf
port=27117
fork=true
dbpath=/usr/local/software/mongodb/mongo-cluster/data/config
logpath=/usr/local/software/mongodb/mongo-cluster/logs/config.log
logappend=true
bind_ip=127.0.0.1
# 设置副本集名称
replSet=config
# 声明这是一个集群的config服务
configsvr=true
config2.conf
port=27217
fork=true
dbpath=/usr/local/software/mongodb/mongo-cluster/data/config2
logpath=/usr/local/software/mongodb/mongo-cluster/logs/config2.log
logappend=true
bind_ip=127.0.0.1
# 设置副本集名称
replSet=config
# 声明这是一个集群的config服务
configsvr=true
config3.conf
port=27317
fork=true
dbpath=/usr/local/software/mongodb/mongo-cluster/data/config3
logpath=/usr/local/software/mongodb/mongo-cluster/logs/config3.log
logappend=true
bind_ip=127.0.0.1
# 设置副本集名称
replSet=config
# 声明这是一个集群的config服务
configsvr=true
router.conf
port=27017
# 路由节点与元信息节点(config)关联
configdb=config/127.0.0.1:27117,127.0.0.1:27217,127.0.0.1:27317
logpath=/usr/local/software/mongodb/mongo-cluster/logs/router.log
fork=true
logappend=true
bind_ip=0.0.0.0
关于配置文件说明
shard01.conf, shard02.conf是shard1分片的配置。
shard21.conf, shard22.conf是shard2分片的配置。
config.conf, config2.conf, config3.conf是config节点配置。
二、启动分片节点和配置节点
# 启动分片节点27018 和 27019
/usr/local/software/mongodb/bin/mongod -f /usr/local/software/mongodb/mongo-cluster/conf/shard01.conf
/usr/local/software/mongodb/bin/mongod -f /usr/local/software/mongodb/mongo-cluster/conf/shard02.conf
# 启动分片节点 27118 和 27119
/usr/local/software/mongodb/bin/mongod -f /usr/local/software/mongodb/mongo-cluster/conf/shard21.conf
/usr/local/software/mongodb/bin/mongod -f /usr/local/software/mongodb/mongo-cluster/conf/shard22.conf
# 启动配置节点 27117、27217、27317
/usr/local/software/mongodb/bin/mongod -f /usr/local/software/mongodb/mongo-cluster/conf/config.conf
/usr/local/software/mongodb/bin/mongod -f /usr/local/software/mongodb/mongo-cluster/conf/config2.conf
/usr/local/software/mongodb/bin/mongod -f /usr/local/software/mongodb/mongo-cluster/conf/config3.conf
或者如下(不需要配置文件)
/usr/local/software/mongodb/bin/mongod -f --port 27018 --dbpath /usr/local/software/mongodb/mongo-cluster/data/shard01 --logpath=/usr/local/software/mongodb/mongo-cluster/logs/shard01.log --replSet shard1 --shardsvr --smallfiles --fork
/usr/local/software/mongodb/bin/mongod -f --port 27019 --dbpath /usr/local/software/mongodb/mongo-cluster/data/shard02 --logpath=/usr/local/software/mongodb/mongo-cluster/logs/shard02.log --replSet shard1 --shardsvr --smallfiles --fork
# 下面两个shard2分片节点
/usr/local/software/mongodb/bin/mongod -f --port 27118 --dbpath /usr/local/software/mongodb/mongo-cluster/data/shard21 --logpath=/usr/local/software/mongodb/mongo-cluster/logs/shard21.log --replSet shard2 --shardsvr --smallfiles --fork
/usr/local/software/mongodb/bin/mongod -f --port 27119 --dbpath /usr/local/software/mongodb/mongo-cluster/data/shard22 --logpath=/usr/local/software/mongodb/mongo-cluster/logs/shard22.log --replSet shard2 --shardsvr --smallfiles --fork
# 下面三个config
/usr/local/software/mongodb/bin/mongod -f --port 27117 --dbpath /usr/local/software/mongodb/mongo-cluster/data/config --logpath=/usr/local/software/mongodb/mongo-cluster/logs/config.log --replSet config --shardsvr --smallfiles --fork
/usr/local/software/mongodb/bin/mongod -f --port 27217 --dbpath /usr/local/software/mongodb/mongo-cluster/data/config2 --logpath=/usr/local/software/mongodb/mongo-cluster/logs/config2.log --replSet config --shardsvr --smallfiles --fork
/usr/local/software/mongodb/bin/mongod -f --port 27317 --dbpath /usr/local/software/mongodb/mongo-cluster/data/config3 --logpath=/usr/local/software/mongodb/mongo-cluster/logs/config3.log --replSet config --shardsvr --smallfiles --fork
三、配置节点构成副本集
进入到config的节点
/usr/local/software/mongodb/bin/mongo -port 27117
> var config = {_id:'config',members:[{_id:0,host:'127.0.0.1:27117'},{_id:1,host:'127.0.0.1:27217'},{_id:2,host:'127.0.0.1:27317'}]}
> rs.initiate(config)
> rs.status() // 查看状态
配置分片之后,等待10S左右,当前节点会变为primary节点。
四、分片节点构成副本集
登陆127.0.0.01:27018
/usr/local/software/mongodb/bin/mongo --port 27018
> var config = {_id:'shard1',members:[{_id:0,host:'127.0.0.1:27018'},{_id:1,host:'127.0.0.1:27019'}]}
> rs.initiate(config)
> rs.status()
接着登陆 127.0.0.1:27118
/usr/local/software/mongodb/bin/mongo --port 27118
> var config = {_id:'shard2',members:[{_id:0,host:'127.0.0.1:27118'},{_id:1,host:'127.0.0.1:27119'}]}
> rs.initiate(config)
两个分片shard,shard2设置成功之后,等待10s左右就会发现当前节点默认变为了当前分片的primary节点。
五、启动路由节点、并增加分片
启动路由mongo
/usr/local/software/mongodb/bin/mongos -f /usr/local/software/mongodb/mongo-cluster/conf/router.conf
/usr/local/software/mongodb/bin/mongos --port 27017 \
--configdb config/127.0.0.1:27117,127.0.0.1:27217,127.0.0.1:27317 \
--logpath=/usr/local/software/mongodb/mongo-cluster/logs/router.log \
--fork --logappend --bind_ip=0.0.0.0
登陆之后可以看到有个config的数据库 show dbs
config数据库中的集合包含了整个集群的配置信息。执行命令show collections(或show tables)
,我们可以看到有如下集合。
mongos> show collections
changelog //保存被分片的集合的任何元数据的改变,例如chunks的迁移、分割等。
chunks //保存集群中分片集合的所有块的信息,包含块的数据范围与块所在的片。
databases //保存数据库中的所有数据库,包含分片与未分片的。
lockpings //保存跟踪集群中的激活组件。
locks //均衡器balancer执行时产生锁,在此集合中插入一条记录。 可得知哪个mongos是均衡器
migrations
mongos //保存了集群中所有路由mongos的信息。nginx
shards //保存了集群中的所有片的信息。
tags // 分片标签
version //保存当前所有信息的所有片信息。
增加分片格式如下: "shard1/127.0.0.1:27018,127.0.0.1:27019"
# 登陆mongo
/usr/local/software/mongodb/bin/mongo --port 27017
# 串联路由服务器与分配副本集
mongos> sh.addShard("shard1/127.0.0.1:27018,127.0.0.1:27019")
mongos> sh.addShard("shard2/127.0.0.1:27118,127.0.0.1:27119")
查看集群状态 sh.status()
mongos> sh.status()
--- Sharding Status ---
sharding version: {
"_id" : 1,
"minCompatibleVersion" : 5,
"currentVersion" : 6,
"clusterId" : ObjectId("5da028fabb2fbb93f8991ab1")
}
shards: // 这里是刚才的两个分片集
{ "_id" : "shard1", "host" : "shard1/127.0.0.1:27018,127.0.0.1:27019", "state" : 1 }
{ "_id" : "shard2", "host" : "shard2/127.0.0.1:27118,127.0.0.1:27119", "state" : 1 }
...
六、设置数据库启用分片,启用索引。
在mongos的客户端下
mongos> use admin
# 分片指定数据库
mongos> sh.enableSharding("shop")
# 指定数据库里需要分片的集合和片键
sh.shardCollection("shop.goods", {id:1})
如果MongoDB报"sharding already enabled for collection xxx"的错误
- 进入到
- 查看数据库的集合是否已经存在分片
- 删除
mongos> use config
mongos> db.chunks.find({},{ns:1}) #查看
mongos> db.chunks.remove({ns:"shop.goods"}) # 删除
mongos> db.collections.remove( { _id: "shop.goods" } )
mongos>db.locks.remove( { _id: "shop.goods" }
七、写入数据测试。
进入push库
mongos> use shop
mongos> for(var i=1;i<=15000;i++){db.goods.save({_id:i,id:i, name:"user"})}
mongos> sh.status()
mongos> db.shop.status()
分别登陆shard1分片集 和 shard2分片集查看数据分配状态
/usr/local/software/mongodb/bin/mongo --port 27018
use shop
db.goods.count()
/usr/local/software/mongodb/bin/mongo --port 27118
use shop
db.goods.count()
七、手动预先分片
预先决定可以装多少条数据 和 决定片数
以shop.user表为例
// user表 用 userid作片键
mongos> sh.shardCollection("shop.user", {userid:1})
//预先在1k,2k...,40k这样的位置切好快
mongos> for(var i = 1; i<=40;i++) {sh.splitAt('shop.user',{userid:i*1000})}
// 添加数据
mongos> for(var i=1; i<=40000; i++) {db.user.insert({"userid":i, "name":"xiao ming"})}
切好块后,这些分割好的chunk是空的
通过mongos可以添加user数据,这些数据会添加到预先分配好的片, chunk就不会来回移动了
使用脚本的方式启动集群:
start.sh
#!/bin/sh
# 分别是 集群数据存放的文件夹, 配置文件所在的文件夹, mongodb的bin目录
dataPath=/usr/local/software/mongodb/mongo-cluster/data
configPath=/usr/local/software/mongodb/mongo-cluster/config
mongoRoot=/usr/local/software/mongodb/bin
# 集群内部使用的ip 这里都在一台集器上,使用同一个ip,一般一个副本集一个ip
IP=127.0.0.1
# 集群暴露的端口
rotuePort=27017
if [ "${1}x" == 'resetx' ]; then
pkill -9 mongo # 杀mongo相关的进程
rm -rf /usr/local/software/mongodb/mongo-cluster/data/*/* # 删数据
rm -rf /usr/local/software/mongodb/mongo-cluster/logs/* # 删日志
mkdir -p /usr/local/software/mongodb/mongo-cluster/data/shard01
mkdir -p /usr/local/software/mongodb/mongo-cluster/data/shard02
mkdir -p /usr/local/software/mongodb/mongo-cluster/data/shard21
mkdir -p /usr/local/software/mongodb/mongo-cluster/data/shard22
mkdir -p /usr/local/software/mongodb/mongo-cluster/data/config
mkdir -p /usr/local/software/mongodb/mongo-cluster/data/config2
mkdir -p /usr/local/software/mongodb/mongo-cluster/data/config3
exit;
fi
# 第一个参数为上一个命令的执行结果(正确0/不正确其它数字), 第二个参数为失败后的错误信息
function validate() {
if [[ $1 != 0 ]]; then
echo $2
exit 1
fi
}
# 端口
portArr=(27018 27019 27118 27119 27117 27217 27317)
# 数据保存位置
dataArr=(shard01 shard02 shard21 shard22 config config2 config3)
# 所属的副本集
replArr=(shard1 shard1 shard2 shard2 config config config)
# 机器类型 shardsvr分片 configsvr 代表meta信息配置
hostType=(shardsvr shardsvr shardsvr shardsvr configsvr configsvr configsvr)
for ((i=0; i<${#portArr[*]};i++)); do
/usr/local/software/mongodb/bin/mongod --port ${portArr[$i]} \
--dbpath /usr/local/software/mongodb/mongo-cluster/data/${dataArr[$i]} \
--logpath=/usr/local/software/mongodb/mongo-cluster/logs/${dataArr[$i]}.log \
--replSet ${replArr[$i]} --${hostType[$i]} --smallfiles --fork >/dev/null 2>&1
validate $? "===副本${replArr[$i]},类型${hostType[$i]},端口${portArr[$i]}启动失败!==="
echo "===副本${replArr[$i]},类型${hostType[$i]},端口${portArr[$i]}启动成功!==="
done
# replSet: 设置副本集名称
# ${hostType[$i]} (shardsvr configsvr): 表示当前集器是分片还是配置
# smallfiles 减少数据文件的初始大小
# fork 后台启动
# 构建分片副本集1(shard1) 27018 27019
/usr/local/software/mongodb/bin/mongo --port ${portArr[0]} << EOF
var config = {_id:"${replArr[0]}",members:[
{_id:0,host:"${IP}:${portArr[0]}"},
{_id:1,host:"${IP}:${portArr[1]}"}
]}
rs.initiate(config)
EOF
validate $? "===副本集${replArr[0]},IP:${IP}启动失败!==="
echo "===副本集${replArr[0]},IP:${IP}启动成功!==="
# 构建分片副本集2(shard2) 27118 27119
/usr/local/software/mongodb/bin/mongo --port ${portArr[2]} << EOF
var config = {_id:"${replArr[2]}",members:[
{_id:0,host:"${IP}:${portArr[2]}"},
{_id:1,host:"${IP}:${portArr[3]}"}
]}
rs.initiate(config)
EOF
validate $? "===副本集${replArr[2]},IP:${IP}启动失败!==="
echo "===副本集${replArr[2]},IP:${IP}启动成功!==="
# 配置config节点的副本集 27117 27217 27317
/usr/local/software/mongodb/bin/mongo -port ${portArr[4]} << EOF
var config = {_id:"${replArr[4]}",members:[
{_id:0,host:"${IP}:${portArr[4]}"},
{_id:1,host:"${IP}:${portArr[5]}"},
{_id:2,host:"${IP}:${portArr[6]}"}
]}
rs.initiate(config)
EOF
validate $? "===副本集${replArr[4]},IP:${IP}启动失败!==="
echo "===副本集${replArr[4]},IP:${IP}启动成功!==="
# 启动mongos(路由节点) 27017 与config关联(管理集群配置等元数据的节点)
/usr/local/software/mongodb/bin/mongos --port ${rotuePort} \
--configdb config/"${IP}:${portArr[4]}","${IP}:${portArr[5]}","${IP}:${portArr[6]}" \
--logpath=/usr/local/software/mongodb/mongo-cluster/logs/router.log \
--fork --logappend --bind_ip=0.0.0.0 >/dev/null 2>&1
validate $? "===路由节点mongos,IP:${IP},端口:${rotuePort}启动失败!==="
echo "===路由节点mongos,IP:${IP},端口:${rotuePort}启动成功!==="
# 添加分片节点 shard1 shard2 与分片节点关联(管理真实数据的节点)
/usr/local/software/mongodb/bin/mongo --port ${rotuePort} << EOF
use admin
sh.addShard("${replArr[0]}/${IP}:${portArr[0]},${IP}:${portArr[1]}")
sh.addShard("${replArr[2]}/${IP}:${portArr[2]},${IP}:${portArr[3]}")
EOF
# 测试(不是必须的)
/usr/local/software/mongodb/bin/mongo --port ${rotuePort} << EOF
use admin
sh.enableSharding("shop")
sh.shardCollection("shop.goods", {id:1})
use shop
for(var i=1;i<=15000;i++){db.goods.insert({id:i, name:"hello world how are you i am fine thanks!"})}
EOF
集群测试
对shop数据库进行分片
对shop数据库中的goods集合分片 片键为id 一般为主键
切换到分片数据库 并添加测试数据
登陆到mongs查看集群状态
/usr/local/software/mongodb/bin/mongo --port 27017
mongos> sh.status()
查看分片节点上的数据
# 登陆到shard1查看数据
/usr/local/software/mongodb/bin/mongo --port 27018 << EOF
use shop
db.user.count()
EOF
# 登陆到shard2查看数据
/usr/local/software/mongodb/bin/mongo --port 27118 << EOF
use shop
db.user.count()
EOF