02 常见中间见集群状态检查

① redis 集群状态检查

app1=10.160.169.2
app2=10.160.169.140
data1=10.160.165.62
data2=10.160.165.60
data3=10.160.165.52

redis-cli -h 10.160.165.62 -p 6379 -a password   INFO REPLICATION |grep role
data2=10.160.165.60
redis-cli -h 10.160.165.60 -p 6379 -a password   INFO REPLICATION |grep role
data3=10.160.165.52
redis-cli -h 10.160.165.52 -p 6379 -a password   INFO REPLICATION |grep role

② mongo 集群状态检查

data1=10.160.165.62 执行
mongo --host 10.160.165.62 --port 27017
use admin
db.auth('root','password')
rs.status()

③ rabbitmq 集群状态检查

data1=10.160.165.62 执行
rabbitmqctl cluster_status
systemctl status rabbitmq-server

④ minio 集群状态检查

data1=10.160.165.62 执行
curl -I http://10.160.165.62:9199/minio/health/live
curl -I http://10.160.165.62:9199/minio/health/cluster/read

⑤ zookeeper 集群状态检查

/usr/local/zookeeper/bin/zkCli.sh -server 10.160.165.62:2181,10.160.165.60:2181,10.160.165.52:2181
ls /

⑥ kafka 集群状态检查

#①发布消息
#创建主题
[root@cemmaidanglao-srv-uat-01159-ecs bin]# ./kafka-topics.sh --create --zookeeper 172.20.200.252:2181,172.20.200.2:2181,172.20.200.3:2181 --topic test --partitions 3 --replication-factor 1
Created topic test.
#向主题中写入内容
[root@cemmaidanglao-srv-uat-01159-ecs bin]# ./kafka-console-producer.sh --broker-list 172.20.200.252:9092,172.20.200.2:9092,172.20.200.3:9092 --topic test
>hello world
>hello abao
>111111
#CTRL+D结束写入内容
#②订阅消息,消费消息
[root@cemmaidanglao-srv-uat-01158-ecs bin]# ./kafka-console-consumer.sh --bootstrap-server localhost:9092 --topic test --from-beginning
hello world
hello abao
111111
#CTRL+C结束消费

⑦ elasticsearch 查看集群状态

 curl 172.20.200.3:9200/_cat/nodes

⑧ mysql 查看集群状态

# mysql 查看一主二从状态
mysql -S /tmp/mysql3309.sock -e "SHOW SLAVE STATUS\G" | grep "Running:"
#显示下面状态即为成功建mysql 主从复制
#Slave_IO_Running: Yes
#Slave_SQL_Running: Yes

#从库查看和主库连接状态:
SHOW SLAVE STATUS\G
#主库查看从库binlog_Dump线程:
SHOW PROCESSLIST; 
#查看用户信息:
select user,host from mysql.user limit 3;
#查看主库binlog 日志
SHOW MASTER STATUS;

⑨ clickhouse 集群检查

#!/bin/bash
# -------------------------------------------------------------------------------
# filename:     ck-cluster-check.sh
# description:  check clickhouse cluster health status
# usage:        bash ck-cluster-check.sh
# -------------------------------------------------------------------------------


#=====================================================================#
### Colors
#RED=红色   ${RED} ERROR ${PLAIN}
RED='[\033[31m'
#GREEN=绿色  ${GREEN} SUCCESS ${PLAIN}
GREEN='[\033[32m'
#YELLOW=黄色  ${YELLOW} WARING ${PLAIN}
YELLOW='[\033[33m'
#BLUE=蓝色   ${BLUE} INFO ${PLAIN}
BLUE='[\033[34m'
#颜色后缀=PLAIN
PLAIN='\033[0m]:'

SUCCESS="${GREEN} SUCCESS ${PLAIN} \c"
ERROR="${RED} ERROR ${PLAIN} \c"
WARING="${YELLOW} WARING ${PLAIN} \c"
INFO="${YELLOW} INFO ${PLAIN} \c"
TIMESTAMP=$(date +'%H:%M:%S')
#=====================================================================#
#data1=10.160.165.62
#data2=10.160.165.60
#data3=10.160.165.52

# 判断报错
function is_success() {
    if [ $? -eq 0 ]; then
       TIMESTAMP=$(date +'%H:%M:%S')
        echo -e $TIMESTAMP $SUCCESS && printf "%2s %-23s %-15s %-4s\n" " " "$1" "============>" "执行成功"
    else
        echo -e $TIMESTAMP $ERROR && printf "%4s %-23s %-15s %-4s\n" " " "step[$step]" "============>" "执行失败"
        run_clickhouse_sql "$client_node" "drop database monitor on cluster ${CLICKHOUSE_CLUSTER} SYNC;" &> /dev/null

        exit $1
    fi
}

 
client_node=10.160.165.62
client_node2=10.160.165.60
client_node3=10.160.165.52
CLICKHOUSE_CLUSTER=default
 
shard_num="1"
 
step=0
 
run_clickhouse_sql() {
    local clickhouse_host=$1
    local sql=$2
    clickhouse-client \
        --host "10.160.165.62" \
        --port "9000" \
        --user "default"  \
        --password "password" \
        --query "$sql"
}
 
check_ret() {
    ret=$1
    if [[ "$ret" != "0" ]]; then
        echo "=== !!! warning !!!"
        echo ">>> step[$step] failed"
        run_clickhouse_sql "$client_node" "drop database monitor on cluster ${CLICKHOUSE_CLUSTER} SYNC;" &> /dev/null
        exit $ret
    fi
}

#环境变量
source /etc/profile.d/clickhouse.sh

echo -e $TIMESTAMP $INFO && printf "%2s %-23s %-15s %-4s\n" " " "step[$((++step))]""============>" "开始执行"
echo ">>> create database [db] on all nodes"
run_clickhouse_sql "$client_node" "CREATE DATABASE IF NOT EXISTS db ON CLUSTER ${CLICKHOUSE_CLUSTER}"
check_ret "$?"
echo ""
 
echo -e $TIMESTAMP $INFO && printf "%2s %-23s %-15s %-4s\n" " " "step[$((++step))]""============>" "开始执行"
echo ">>> create local table [db.tbl_local] on all nodes"
run_clickhouse_sql "$client_node" "
CREATE OR REPLACE TABLE db.tbl_local ON CLUSTER ${CLICKHOUSE_CLUSTER}
(
    id  Int64,
    ts  DateTime64,
    seq Int32,
    a   Nullable(Int32),
    b   String
) ENGINE = ReplicatedReplacingMergeTree('/clickhouse/tables/{shard}/db/tbl_local', '{replica}', seq)
PARTITION BY toYYYYMM(ts)
ORDER BY id
"
check_ret "$?"
echo ""
 
echo -e $TIMESTAMP $INFO && printf "%2s %-23s %-15s %-4s\n" " " "step[$((++step))]""============>" "开始执行"
echo ">>> create distributed table [db.tbl] on all nodes"
run_clickhouse_sql "$client_node" "
CREATE OR REPLACE TABLE db.tbl ON CLUSTER ${CLICKHOUSE_CLUSTER}
AS db.tbl_local
ENGINE = Distributed(${CLICKHOUSE_CLUSTER}, db, tbl_local, id);
"
check_ret "$?"
echo ""
 
insert_client_node=$client_node

echo -e $TIMESTAMP $INFO && printf "%2s %-23s %-15s %-4s\n" " " "step[$((++step))]""============>" "开始执行"
echo ">>> insert one record into local table [db.tbl_local] on node[$insert_client_node] shard[1] replica[1]"
run_clickhouse_sql "$insert_client_node" "
INSERT INTO db.tbl_local values (0, '2021-01-01 00:00:00', 1, null, 'v_1')
"
check_ret "$?"
echo ""
 
echo -e $TIMESTAMP $INFO && printf "%2s %-23s %-15s %-4s\n" " " "step[$((++step))]""============>" "开始执行"
echo ">>> sleep 1s waiting replica sync"
sleep 1
echo ""
 
query_client_node=$client_node
echo -e $TIMESTAMP $INFO && printf "%2s %-23s %-15s %-4s\n" " " "step[$((++step))]""============>" "开始执行"
echo ">>> query local table [db.tbl_local] on node [$query_client_node]"
run_clickhouse_sql "$query_client_node" "SELECT * FROM db.tbl_local"
check_ret "$?"
line=`run_clickhouse_sql "$query_client_node" "SELECT * FROM db.tbl_local" | wc -l`
if [[ line < 1 ]]; then
    echo "=== !!! warning !!!"
    echo ">>> step[$step] failed"
    exit $ret
fi
echo ""
query_client_node=$client_node2
echo -e $TIMESTAMP $INFO && printf "%2s %-23s %-15s %-4s\n" " " "step[$((++step))]""============>" "开始执行"
echo ">>> query local table [db.tbl_local] on node [$query_client_node]"
run_clickhouse_sql "$query_client_node" "SELECT * FROM db.tbl_local"
check_ret "$?"
line=`run_clickhouse_sql "$query_client_node" "SELECT * FROM db.tbl_local" | wc -l`
if [[ line < 1 ]]; then
    echo "=== !!! warning !!!"
    echo ">>> step[$step] failed"
    exit $ret
fi
echo ""
query_client_node=$client_node3

echo -e $TIMESTAMP $INFO && printf "%2s %-23s %-15s %-4s\n" " " "step[$((++step))]""============>" "开始执行"
echo ">>> query local table [db.tbl_local] on node [$query_client_node]"
run_clickhouse_sql "$query_client_node" "SELECT * FROM db.tbl_local"
check_ret "$?"
line=`run_clickhouse_sql "$query_client_node" "SELECT * FROM db.tbl_local" | wc -l`
if [[ line < 1 ]]; then
    echo "=== !!! warning !!!"
    echo ">>> step[$step] failed"
    exit $ret
fi
echo ""
 
query_client_node=$client_node
echo -e $TIMESTAMP $INFO && printf "%2s %-23s %-15s %-4s\n" " " "step[$((++step))]""============>" "开始执行"
echo ">>> query distributed table [db.tbl] on node [$query_client_node]"
run_clickhouse_sql "$query_client_node" "SELECT * FROM db.tbl"
check_ret "$?"
line=`run_clickhouse_sql "$query_client_node" "SELECT * FROM db.tbl" | wc -l`
if [[ line < $shard_num ]]; then
    echo "=== !!! warning !!!"
    echo ">>> step[$step] failed"
    exit $ret
fi
echo ""
query_client_node=$client_node2
echo -e $TIMESTAMP $INFO && printf "%2s %-23s %-15s %-4s\n" " " "step[$((++step))]""============>" "开始执行"
echo ">>> query distributed table [db.tbl] on node [$query_client_node]"
run_clickhouse_sql "$query_client_node" "SELECT * FROM db.tbl"
check_ret "$?"
line=`run_clickhouse_sql "$query_client_node" "SELECT * FROM db.tbl" | wc -l`
if [[ line < $shard_num ]]; then
    echo "=== !!! warning !!!"
    echo ">>> step[$step] failed"
    exit $ret
fi
echo ""
query_client_node=$client_node
echo -e $TIMESTAMP $INFO && printf "%2s %-23s %-15s %-4s\n" " " "step[$((++step))]""============>" "开始执行"
echo ">>> query distributed table [db.tbl] on node [$query_client_node]"
run_clickhouse_sql "$query_client_node" "SELECT * FROM db.tbl"
check_ret "$?"
line=`run_clickhouse_sql "$query_client_node" "SELECT * FROM db.tbl" | wc -l`
if [[ line < $shard_num ]]; then
    echo "=== !!! warning !!!"
    echo ">>> step[$step] failed"
    exit $ret
fi
echo ""
 
echo -e $TIMESTAMP $INFO && printf "%2s %-23s %-15s %-4s\n" " " "step[$((++step))]""============>" "开始执行"
echo ">>> drop distributed table [db.tbl] on all nodes"
run_clickhouse_sql "$client_node" "DROP TABLE db.tbl ON CLUSTER ${CLICKHOUSE_CLUSTER} SYNC"
check_ret "$?"
echo ""
 
echo -e $TIMESTAMP $INFO && printf "%2s %-23s %-15s %-4s\n" " " "step[$((++step))]""============>" "开始执行"
echo ">>> drop local table [db.tbl_local] on all nodes"
run_clickhouse_sql "$client_node" "DROP TABLE db.tbl_local ON CLUSTER ${CLICKHOUSE_CLUSTER} SYNC"
check_ret "$?"
echo ""
 
echo -e $TIMESTAMP $INFO && printf "%2s %-23s %-15s %-4s\n" " " "step[$((++step))]""============>" "开始执行"
echo ">>> drop database [db] on all nodes"
run_clickhouse_sql "$client_node" "DROP DATABASE db ON CLUSTER ${CLICKHOUSE_CLUSTER} SYNC"

is_success "check ck_cluster"
posted @ 2022-05-26 21:16  潜龙~勿用  阅读(590)  评论(0编辑  收藏  举报