02 常见中间见集群状态检查
目录
① redis 集群状态检查
app1=10.160.169.2
app2=10.160.169.140
data1=10.160.165.62
data2=10.160.165.60
data3=10.160.165.52
redis-cli -h 10.160.165.62 -p 6379 -a password INFO REPLICATION |grep role
data2=10.160.165.60
redis-cli -h 10.160.165.60 -p 6379 -a password INFO REPLICATION |grep role
data3=10.160.165.52
redis-cli -h 10.160.165.52 -p 6379 -a password INFO REPLICATION |grep role
② mongo 集群状态检查
data1=10.160.165.62 执行
mongo --host 10.160.165.62 --port 27017
use admin
db.auth('root','password')
rs.status()
③ rabbitmq 集群状态检查
data1=10.160.165.62 执行
rabbitmqctl cluster_status
systemctl status rabbitmq-server
④ minio 集群状态检查
data1=10.160.165.62 执行
curl -I http://10.160.165.62:9199/minio/health/live
curl -I http://10.160.165.62:9199/minio/health/cluster/read
⑤ zookeeper 集群状态检查
/usr/local/zookeeper/bin/zkCli.sh -server 10.160.165.62:2181,10.160.165.60:2181,10.160.165.52:2181
ls /
⑥ kafka 集群状态检查
#①发布消息
#创建主题
[root@cemmaidanglao-srv-uat-01159-ecs bin]# ./kafka-topics.sh --create --zookeeper 172.20.200.252:2181,172.20.200.2:2181,172.20.200.3:2181 --topic test --partitions 3 --replication-factor 1
Created topic test.
#向主题中写入内容
[root@cemmaidanglao-srv-uat-01159-ecs bin]# ./kafka-console-producer.sh --broker-list 172.20.200.252:9092,172.20.200.2:9092,172.20.200.3:9092 --topic test
>hello world
>hello abao
>111111
#CTRL+D结束写入内容
#②订阅消息,消费消息
[root@cemmaidanglao-srv-uat-01158-ecs bin]# ./kafka-console-consumer.sh --bootstrap-server localhost:9092 --topic test --from-beginning
hello world
hello abao
111111
#CTRL+C结束消费
⑦ elasticsearch 查看集群状态
curl 172.20.200.3:9200/_cat/nodes
⑧ mysql 查看集群状态
# mysql 查看一主二从状态
mysql -S /tmp/mysql3309.sock -e "SHOW SLAVE STATUS\G" | grep "Running:"
#显示下面状态即为成功建mysql 主从复制
#Slave_IO_Running: Yes
#Slave_SQL_Running: Yes
#从库查看和主库连接状态:
SHOW SLAVE STATUS\G
#主库查看从库binlog_Dump线程:
SHOW PROCESSLIST;
#查看用户信息:
select user,host from mysql.user limit 3;
#查看主库binlog 日志
SHOW MASTER STATUS;
⑨ clickhouse 集群检查
#!/bin/bash
# -------------------------------------------------------------------------------
# filename: ck-cluster-check.sh
# description: check clickhouse cluster health status
# usage: bash ck-cluster-check.sh
# -------------------------------------------------------------------------------
#=====================================================================#
### Colors
#RED=红色 ${RED} ERROR ${PLAIN}
RED='[\033[31m'
#GREEN=绿色 ${GREEN} SUCCESS ${PLAIN}
GREEN='[\033[32m'
#YELLOW=黄色 ${YELLOW} WARING ${PLAIN}
YELLOW='[\033[33m'
#BLUE=蓝色 ${BLUE} INFO ${PLAIN}
BLUE='[\033[34m'
#颜色后缀=PLAIN
PLAIN='\033[0m]:'
SUCCESS="${GREEN} SUCCESS ${PLAIN} \c"
ERROR="${RED} ERROR ${PLAIN} \c"
WARING="${YELLOW} WARING ${PLAIN} \c"
INFO="${YELLOW} INFO ${PLAIN} \c"
TIMESTAMP=$(date +'%H:%M:%S')
#=====================================================================#
#data1=10.160.165.62
#data2=10.160.165.60
#data3=10.160.165.52
# 判断报错
function is_success() {
if [ $? -eq 0 ]; then
TIMESTAMP=$(date +'%H:%M:%S')
echo -e $TIMESTAMP $SUCCESS && printf "%2s %-23s %-15s %-4s\n" " " "$1" "============>" "执行成功"
else
echo -e $TIMESTAMP $ERROR && printf "%4s %-23s %-15s %-4s\n" " " "step[$step]" "============>" "执行失败"
run_clickhouse_sql "$client_node" "drop database monitor on cluster ${CLICKHOUSE_CLUSTER} SYNC;" &> /dev/null
exit $1
fi
}
client_node=10.160.165.62
client_node2=10.160.165.60
client_node3=10.160.165.52
CLICKHOUSE_CLUSTER=default
shard_num="1"
step=0
run_clickhouse_sql() {
local clickhouse_host=$1
local sql=$2
clickhouse-client \
--host "10.160.165.62" \
--port "9000" \
--user "default" \
--password "password" \
--query "$sql"
}
check_ret() {
ret=$1
if [[ "$ret" != "0" ]]; then
echo "=== !!! warning !!!"
echo ">>> step[$step] failed"
run_clickhouse_sql "$client_node" "drop database monitor on cluster ${CLICKHOUSE_CLUSTER} SYNC;" &> /dev/null
exit $ret
fi
}
#环境变量
source /etc/profile.d/clickhouse.sh
echo -e $TIMESTAMP $INFO && printf "%2s %-23s %-15s %-4s\n" " " "step[$((++step))]""============>" "开始执行"
echo ">>> create database [db] on all nodes"
run_clickhouse_sql "$client_node" "CREATE DATABASE IF NOT EXISTS db ON CLUSTER ${CLICKHOUSE_CLUSTER}"
check_ret "$?"
echo ""
echo -e $TIMESTAMP $INFO && printf "%2s %-23s %-15s %-4s\n" " " "step[$((++step))]""============>" "开始执行"
echo ">>> create local table [db.tbl_local] on all nodes"
run_clickhouse_sql "$client_node" "
CREATE OR REPLACE TABLE db.tbl_local ON CLUSTER ${CLICKHOUSE_CLUSTER}
(
id Int64,
ts DateTime64,
seq Int32,
a Nullable(Int32),
b String
) ENGINE = ReplicatedReplacingMergeTree('/clickhouse/tables/{shard}/db/tbl_local', '{replica}', seq)
PARTITION BY toYYYYMM(ts)
ORDER BY id
"
check_ret "$?"
echo ""
echo -e $TIMESTAMP $INFO && printf "%2s %-23s %-15s %-4s\n" " " "step[$((++step))]""============>" "开始执行"
echo ">>> create distributed table [db.tbl] on all nodes"
run_clickhouse_sql "$client_node" "
CREATE OR REPLACE TABLE db.tbl ON CLUSTER ${CLICKHOUSE_CLUSTER}
AS db.tbl_local
ENGINE = Distributed(${CLICKHOUSE_CLUSTER}, db, tbl_local, id);
"
check_ret "$?"
echo ""
insert_client_node=$client_node
echo -e $TIMESTAMP $INFO && printf "%2s %-23s %-15s %-4s\n" " " "step[$((++step))]""============>" "开始执行"
echo ">>> insert one record into local table [db.tbl_local] on node[$insert_client_node] shard[1] replica[1]"
run_clickhouse_sql "$insert_client_node" "
INSERT INTO db.tbl_local values (0, '2021-01-01 00:00:00', 1, null, 'v_1')
"
check_ret "$?"
echo ""
echo -e $TIMESTAMP $INFO && printf "%2s %-23s %-15s %-4s\n" " " "step[$((++step))]""============>" "开始执行"
echo ">>> sleep 1s waiting replica sync"
sleep 1
echo ""
query_client_node=$client_node
echo -e $TIMESTAMP $INFO && printf "%2s %-23s %-15s %-4s\n" " " "step[$((++step))]""============>" "开始执行"
echo ">>> query local table [db.tbl_local] on node [$query_client_node]"
run_clickhouse_sql "$query_client_node" "SELECT * FROM db.tbl_local"
check_ret "$?"
line=`run_clickhouse_sql "$query_client_node" "SELECT * FROM db.tbl_local" | wc -l`
if [[ line < 1 ]]; then
echo "=== !!! warning !!!"
echo ">>> step[$step] failed"
exit $ret
fi
echo ""
query_client_node=$client_node2
echo -e $TIMESTAMP $INFO && printf "%2s %-23s %-15s %-4s\n" " " "step[$((++step))]""============>" "开始执行"
echo ">>> query local table [db.tbl_local] on node [$query_client_node]"
run_clickhouse_sql "$query_client_node" "SELECT * FROM db.tbl_local"
check_ret "$?"
line=`run_clickhouse_sql "$query_client_node" "SELECT * FROM db.tbl_local" | wc -l`
if [[ line < 1 ]]; then
echo "=== !!! warning !!!"
echo ">>> step[$step] failed"
exit $ret
fi
echo ""
query_client_node=$client_node3
echo -e $TIMESTAMP $INFO && printf "%2s %-23s %-15s %-4s\n" " " "step[$((++step))]""============>" "开始执行"
echo ">>> query local table [db.tbl_local] on node [$query_client_node]"
run_clickhouse_sql "$query_client_node" "SELECT * FROM db.tbl_local"
check_ret "$?"
line=`run_clickhouse_sql "$query_client_node" "SELECT * FROM db.tbl_local" | wc -l`
if [[ line < 1 ]]; then
echo "=== !!! warning !!!"
echo ">>> step[$step] failed"
exit $ret
fi
echo ""
query_client_node=$client_node
echo -e $TIMESTAMP $INFO && printf "%2s %-23s %-15s %-4s\n" " " "step[$((++step))]""============>" "开始执行"
echo ">>> query distributed table [db.tbl] on node [$query_client_node]"
run_clickhouse_sql "$query_client_node" "SELECT * FROM db.tbl"
check_ret "$?"
line=`run_clickhouse_sql "$query_client_node" "SELECT * FROM db.tbl" | wc -l`
if [[ line < $shard_num ]]; then
echo "=== !!! warning !!!"
echo ">>> step[$step] failed"
exit $ret
fi
echo ""
query_client_node=$client_node2
echo -e $TIMESTAMP $INFO && printf "%2s %-23s %-15s %-4s\n" " " "step[$((++step))]""============>" "开始执行"
echo ">>> query distributed table [db.tbl] on node [$query_client_node]"
run_clickhouse_sql "$query_client_node" "SELECT * FROM db.tbl"
check_ret "$?"
line=`run_clickhouse_sql "$query_client_node" "SELECT * FROM db.tbl" | wc -l`
if [[ line < $shard_num ]]; then
echo "=== !!! warning !!!"
echo ">>> step[$step] failed"
exit $ret
fi
echo ""
query_client_node=$client_node
echo -e $TIMESTAMP $INFO && printf "%2s %-23s %-15s %-4s\n" " " "step[$((++step))]""============>" "开始执行"
echo ">>> query distributed table [db.tbl] on node [$query_client_node]"
run_clickhouse_sql "$query_client_node" "SELECT * FROM db.tbl"
check_ret "$?"
line=`run_clickhouse_sql "$query_client_node" "SELECT * FROM db.tbl" | wc -l`
if [[ line < $shard_num ]]; then
echo "=== !!! warning !!!"
echo ">>> step[$step] failed"
exit $ret
fi
echo ""
echo -e $TIMESTAMP $INFO && printf "%2s %-23s %-15s %-4s\n" " " "step[$((++step))]""============>" "开始执行"
echo ">>> drop distributed table [db.tbl] on all nodes"
run_clickhouse_sql "$client_node" "DROP TABLE db.tbl ON CLUSTER ${CLICKHOUSE_CLUSTER} SYNC"
check_ret "$?"
echo ""
echo -e $TIMESTAMP $INFO && printf "%2s %-23s %-15s %-4s\n" " " "step[$((++step))]""============>" "开始执行"
echo ">>> drop local table [db.tbl_local] on all nodes"
run_clickhouse_sql "$client_node" "DROP TABLE db.tbl_local ON CLUSTER ${CLICKHOUSE_CLUSTER} SYNC"
check_ret "$?"
echo ""
echo -e $TIMESTAMP $INFO && printf "%2s %-23s %-15s %-4s\n" " " "step[$((++step))]""============>" "开始执行"
echo ">>> drop database [db] on all nodes"
run_clickhouse_sql "$client_node" "DROP DATABASE db ON CLUSTER ${CLICKHOUSE_CLUSTER} SYNC"
is_success "check ck_cluster"