运维脚本备忘录
(240918,更新脚本)
#!/bin/bash
# Debian/RHEL/CentOS Check Script
if [ $(id -u) -ne 0 ]; then
echo "请以 root 用户执行此脚本"
exit 1
fi
# 获取IP地址
IPADDR=$(ifconfig | grep 'inet ' | awk '{print \$2}' | head -n 1)
# 检测操作系统类型
OS_TYPE=$(cat /etc/*release | grep '^ID=' | cut -d'=' -f2 | tr -d '"')
case "$OS_TYPE" in
debian)
OS_NAME="Debian"
;;
centos|rhel)
OS_NAME="RHEL/CentOS"
;;
*)
echo "不支持的操作系统: $OS_TYPE"
exit 1
;;
esac
# 获取操作系统版本
if [ -f /etc/os-release ]; then
osVersion=$(grep '^VERSION_ID=' /etc/os-release | cut -d'=' -f2 | tr -d '"')
echo "$OS_NAME 版本: ${osVersion:-未能获取版本信息}"
else
echo "/etc/os-release 文件不存在,无法获取版本信息。"
fi
# 设置变量
RESULTFILE="./log/HostDailyCheck-$IPADDR-$(date +%Y%m%d).txt"
BACKUP_DIR="./backup"
EMAIL="example@mail.com" # 设置接收邮件的邮箱地址
mkdir -p "$(dirname "$RESULTFILE")"
mkdir -p "$BACKUP_DIR"
function log_section {
echo -e "\n--------------------- \$1 ---------------------" >> "$RESULTFILE"
}
# 监控服务器的内存、硬盘、CPU、网络状态
function getSystemStatus {
log_section "系统状态"
echo "系统: $(uname -o)" >> "$RESULTFILE"
echo "内核: $(uname -r)" >> "$RESULTFILE"
echo "主机名: $(hostname)" >> "$RESULTFILE"
echo "当前时间: $(date +'%F %T')" >> "$RESULTFILE"
}
function getCpuStatus {
log_section "CPU 状态"
lscpu >> "$RESULTFILE"
}
function getMemStatus {
log_section "内存状态"
free -h >> "$RESULTFILE"
}
function getDiskStatus {
log_section "磁盘状态"
df -h >> "$RESULTFILE"
}
function getNetworkStatus {
log_section "网络状态"
ip addr >> "$RESULTFILE"
}
# Web 服务器监控
function checkNginxConfig {
log_section "Nginx 配置检查"
if command -v nginx &> /dev/null; then
nginx -t >> "$RESULTFILE" 2>&1
echo "Nginx 配置检查结果已记录" >> "$RESULTFILE"
else
echo "Nginx 未安装" >> "$RESULTFILE"
fi
}
function monitorNginxLogs {
log_section "Nginx 日志监控"
ACCESS_LOG="/var/log/nginx/access.log"
ERROR_LOG="/var/log/nginx/error.log"
if [ -f "$ACCESS_LOG" ]; then
echo "最近的访问日志:" >> "$RESULTFILE"
tail -n 10 "$ACCESS_LOG" >> "$RESULTFILE"
else
echo "Nginx 访问日志不存在" >> "$RESULTFILE"
fi
if [ -f "$ERROR_LOG" ]; then
echo "最近的错误日志:" >> "$RESULTFILE"
tail -n 10 "$ERROR_LOG" >> "$RESULTFILE"
else
echo "Nginx 错误日志不存在" >> "$RESULTFILE"
fi
}
# 数据库监控
function getMySQLStatus {
log_section "MySQL 状态"
if systemctl is-active mysql >> /dev/null 2>&1; then
echo "MySQL 服务状态: 运行中" >> "$RESULTFILE"
CONNECTIONS=$(mysql -e "SHOW STATUS LIKE 'Threads_connected';" | awk 'NR==2 {print \$2}')
echo "当前连接数: $CONNECTIONS" >> "$RESULTFILE"
VERSION=$(mysql -V | awk '{print \$5}' | tr -d ',')
echo "MySQL 版本: $VERSION" >> "$RESULTFILE"
log_section "最近的查询性能"
mysql -e "SHOW FULL PROCESSLIST;" >> "$RESULTFILE"
else
echo "MySQL 服务状态: 未运行" >> "$RESULTFILE"
fi
}
function backupMySQL {
log_section "MySQL 备份"
DB_NAME="your_mysql_database"
DB_USER="your_mysql_user"
DB_PASSWORD="your_mysql_password"
TIMESTAMP=$(date +'%Y%m%d_%H%M%S')
if mysqldump -u "$DB_USER" -p"$DB_PASSWORD" "$DB_NAME" > "$BACKUP_DIR/${DB_NAME}_backup_$TIMESTAMP.sql"; then
echo "MySQL 备份成功: ${DB_NAME}_backup_$TIMESTAMP.sql" >> "$RESULTFILE"
else
echo "MySQL 备份失败" >> "$RESULTFILE"
fi
}
function getPostgreSQLStatus {
log_section "PostgreSQL 状态"
if systemctl is-active postgresql >> /dev/null 2>&1; then
echo "PostgreSQL 服务状态: 运行中" >> "$RESULTFILE"
DB_NAME="your_postgres_database"
DB_USER="your_postgres_user"
TIMESTAMP=$(date +'%Y%m%d_%H%M%S')
if command -v psql &> /dev/null; then
echo "psql 可用" >> "$RESULTFILE"
else
echo "psql 未安装或不可用" >> "$RESULTFILE"
fi
if command -v pg_dump &> /dev/null; then
echo "pg_dump 可用" >> "$RESULTFILE"
else
echo "pg_dump 未安装或不可用" >> "$RESULTFILE"
fi
CONNECTIONS=$(psql -U "$DB_USER" -d "$DB_NAME" -c "SELECT COUNT(*) FROM pg_stat_activity;" -t | xargs)
echo "当前连接数: $CONNECTIONS" >> "$RESULTFILE"
echo -e "\n查询性能:" >> "$RESULTFILE"
psql -U "$DB_USER" -d "$DB_NAME" -c "SELECT * FROM pg_stat_activity ORDER BY state_change DESC LIMIT 5;" >> "$RESULTFILE"
# PostgreSQL 备份
backupPostgreSQL "$DB_NAME" "$DB_USER" "$TIMESTAMP"
else
echo "PostgreSQL 服务状态: 未运行" >> "$RESULTFILE"
fi
}
function backupPostgreSQL {
log_section "PostgreSQL 备份"
DB_NAME="\$1"
DB_USER="\$2"
TIMESTAMP="\$3"
if pg_dump -U "$DB_USER" "$DB_NAME" > "$BACKUP_DIR/${DB_NAME}_backup_$TIMESTAMP.sql"; then
echo "PostgreSQL 备份成功: ${DB_NAME}_backup_$TIMESTAMP.sql" >> "$RESULTFILE"
else
echo "PostgreSQL 备份失败" >> "$RESULTFILE"
fi
}
# 内存数据库监控
function checkRedisStatus {
log_section "Redis 状态"
if systemctl is-active redis >> /dev/null 2>&1; then
echo "Redis 服务状态: 运行中" >> "$RESULTFILE"
REDIS_INFO=$(redis-cli info)
echo "Redis 性能信息:" >> "$RESULTFILE"
echo "$REDIS_INFO" >> "$RESULTFILE"
else
echo "Redis 服务状态: 未运行" >> "$RESULTFILE"
fi
}
# 消息队列监控
function getRabbitMQStatus {
log_section "RabbitMQ 状态"
if systemctl is-active rabbitmq-server >> /dev/null 2>&1; then
echo "RabbitMQ 服务状态: 运行中" >> "$RESULTFILE"
rabbitmqctl list_queues name messages consumers | awk 'NR>1 {print "队列名: "\$1", 消息数: "\$2", 消费者数: "\$3}' >> "$RESULTFILE"
else
echo "RabbitMQ 服务状态: 未运行" >> "$RESULTFILE"
fi
}
# 日志服务监控
function monitorHAProxyLogs {
log_section "HAProxy 日志监控"
ACCESS_LOG="/var/log/haproxy.log"
if [ -f "$ACCESS_LOG" ]; then
echo "最近的 HAProxy 日志:" >> "$RESULTFILE"
tail -n 10 "$ACCESS_LOG" >> "$RESULTFILE"
else
echo "HAProxy 日志不存在" >> "$RESULTFILE"
fi
}
# 软件监控
function getNTPStatus {
log_section "NTP 状态"
if command -v ntpd &> /dev/null; then
echo "NTP服务状态: $(systemctl is-active ntpd)" >> "$RESULTFILE"
elif command -v chronyd &> /dev/null; then
echo "NTP服务状态: $(systemctl is-active chronyd)" >> "$RESULTFILE"
else
echo "NTP服务未安装或未配置" >> "$RESULTFILE"
fi
}
function getJDKStatus {
log_section "JDK 状态"
if command -v java &> /dev/null; then
java -version >> "$RESULTFILE" 2>&1
else
echo "Java 未安装" >> "$RESULTFILE"
fi
}
function getMavenStatus {
log_section "Maven 状态"
if command -v mvn &> /dev/null; then
mvn -v >> "$RESULTFILE" 2>&1
else
echo "Maven 未安装" >> "$RESULTFILE"
fi
}
function checkPrometheus {
log_section "Prometheus 状态"
if systemctl is-active prometheus >> /dev/null 2>&1; then
echo "Prometheus 服务状态: 运行中" >> "$RESULTFILE"
PROMETHEUS_URL="http://localhost:9090/metrics"
if curl -s -o /dev/null -w "%{http_code}" "$PROMETHEUS_URL" | grep -q "200"; then
echo "Prometheus 指标收集正常" >> "$RESULTFILE"
else
echo "Prometheus 指标收集异常" >> "$RESULTFILE"
fi
else
echo "Prometheus 服务状态: 未运行" >> "$RESULTFILE"
fi
}
function checkGrafana {
log_section "Grafana 状态"
if systemctl is-active grafana-server >> /dev/null 2>&1; then
echo "Grafana 服务状态: 运行中" >> "$RESULTFILE"
GRAFANA_URL="http://localhost:3000"
if curl -s -o /dev/null -w "%{http_code}" "$GRAFANA_URL" | grep -q "200"; then
echo "Grafana 仪表盘可用" >> "$RESULTFILE"
else
echo "Grafana 仪表盘不可用" >> "$RESULTFILE"
fi
else
echo "Grafana 服务状态: 未运行" >> "$RESULTFILE"
fi
}
# CI/CD 监控
function getCIStatus {
log_section "CI/CD 状态"
if systemctl is-active gitlab >> /dev/null 2>&1; then
echo "GitLab 服务状态: 运行中" >> "$RESULTFILE"
else
echo "GitLab 服务状态: 未运行" >> "$RESULTFILE"
fi
if systemctl is-active jenkins >> /dev/null 2>&1; then
echo "Jenkins 服务状态: 运行中" >> "$RESULTFILE"
else
echo "Jenkins 服务状态: 未运行" >> "$RESULTFILE"
fi
}
# 容器监控
function dockerInspection {
log_section "Docker 状态"
if command -v docker &> /dev/null; then
echo "当前运行的 Docker 容器:" >> "$RESULTFILE"
docker ps >> "$RESULTFILE"
echo -e "\n所有 Docker 容器:" >> "$RESULTFILE"
docker ps -a >> "$RESULTFILE"
echo -e "\n当前 Docker 镜像:" >> "$RESULTFILE"
docker images >> "$RESULTFILE"
echo -e "\nDocker 容器资源使用情况:" >> "$RESULTFILE"
docker stats --no-stream >> "$RESULTFILE"
echo -e "\nDocker 容器日志:" >> "$RESULTFILE"
CONTAINERS=$(docker ps -q)
for CONTAINER in $CONTAINERS; do
echo -e "\n容器 $CONTAINER 的日志:" >> "$RESULTFILE"
docker logs "$CONTAINER" --tail 10 >> "$RESULTFILE"
done
echo -e "\n当前 Docker 网络配置:" >> "$RESULTFILE"
docker network ls >> "$RESULTFILE"
else
echo "Docker 未安装" >> "$RESULTFILE"
fi
}
# 容器编排监控
function checkKubernetesStatus {
log_section "Kubernetes 状态"
if command -v kubectl &> /dev/null; then
echo "Kubernetes 集群状态:" >> "$RESULTFILE"
CLUSTER_STATUS=$(kubectl cluster-info)
echo "$CLUSTER_STATUS" >> "$RESULTFILE"
echo -e "\n节点状态:" >> "$RESULTFILE"
kubectl get nodes >> "$RESULTFILE"
echo -e "\nPod 状态:" >> "$RESULTFILE"
kubectl get pods --all-namespaces >> "$RESULTFILE"
else
echo "kubectl 未安装" >> "$RESULTFILE"
fi
}
# 主执行逻辑
{
getSystemStatus
getCpuStatus
getMemStatus
getDiskStatus
getNetworkStatus
checkNginxConfig
monitorNginxLogs
getMySQLStatus
backupMySQL
getPostgreSQLStatus
checkRedisStatus
getRabbitMQStatus
getNTPStatus
getJDKStatus
getMavenStatus
checkPrometheus
checkGrafana
getCIStatus
dockerInspection
checkKubernetesStatus
} > "$RESULTFILE" 2>&1
# echo "检查结果已保存到:$RESULTFILE"
# 发送结果到指定邮箱
mail -s "巡检结果 - $IPADDR" "$EMAIL" < "$RESULTFILE"
echo "检查结果已保存到:$RESULTFILE,且已发送到 $EMAIL"
(未修改)
package main
import (
"bytes"
"fmt"
"net/mail"
"net/smtp"
"os"
"os/exec"
"strings"
"time"
)
const (
emailSender = "your_email@gmail.com" // 发件邮箱
emailPassword = "your_email_password" // 发件邮箱密码
emailReceiver = "example@mail.com" // 收件邮箱
)
func main() {
if os.Geteuid() != 0 {
fmt.Println("请以 root 用户执行此脚本")
return
}
ipAddr, err := getIP()
if err != nil {
fmt.Println("获取IP地址失败:", err)
return
}
osName, osVersion, err := getOSInfo()
if err != nil {
fmt.Println("获取操作系统信息失败:", err)
return
}
resultFile := fmt.Sprintf("./log/HostDailyCheck-%s-%s.txt", ipAddr, time.Now().Format(""))
os.MkdirAll("./log", os.ModePerm)
os.MkdirAll("./backup", os.ModePerm)
var result bytes.Buffer
result.WriteString(fmt.Sprintf("IP 地址: %s\n", ipAddr))
result.WriteString(fmt.Sprintf("%s 版本: %s\n", osName, osVersion))
result.WriteString(getSystemStatus())
result.WriteString(getCPUStatus())
result.WriteString(getMemStatus())
result.WriteString(getDiskStatus())
result.WriteString(getNetworkStatus())
result.WriteString(checkNginxConfig())
result.WriteString(monitorNginxLogs())
result.WriteString(getMySQLStatus())
// Backup and other status checks can be added similarly
err = os.WriteFile(resultFile, result.Bytes(), 0644)
if err != nil {
fmt.Println("写入结果文件失败:", err)
return
}
err = sendEmail("巡检结果 - "+ipAddr, result.String())
if err != nil {
fmt.Println("发送邮件失败:", err)
} else {
fmt.Println("检查结果已发送到:", emailReceiver)
}
}
func getIP() (string, error) {
cmd := exec.Command("ifconfig")
output, err := cmd.Output()
if err != nil {
return "", err
}
lines := strings.Split(string(output), "\n")
for _, line := range lines {
if strings.Contains(line, "inet ") {
parts := strings.Fields(line)
return parts[1], nil
}
}
return "", fmt.Errorf("无法找到 IP 地址")
}
func getOSInfo() (string, string, error) {
cmd := exec.Command("cat", "/etc/os-release")
output, err := cmd.Output()
if err != nil {
return "", "", err
}
var osName, osVersion string
lines := strings.Split(string(output), "\n")
for _, line := range lines {
if strings.HasPrefix(line, "ID=") {
osName = strings.Trim(strings.Split(line, "=")[1], "\"")
}
if strings.HasPrefix(line, "VERSION_ID=") {
osVersion = strings.Trim(strings.Split(line, "=")[1], "\"")
}
}
if osName == "" || osVersion == "" {
return "", "", fmt.Errorf("无法获取操作系统信息")
}
return osName, osVersion, nil
}
func getSystemStatus() string {
var result bytes.Buffer
result.WriteString("系统状态:\n")
result.WriteString(fmt.Sprintf("系统: %s\n", execOutput("uname", "-o")))
result.WriteString(fmt.Sprintf("内核: %s\n", execOutput("uname", "-r")))
result.WriteString(fmt.Sprintf("主机名: %s\n", execOutput("hostname")))
result.WriteString(fmt.Sprintf("当前时间: %s\n", time.Now().Format(time.RFC1123)))
return result.String()
}
func getCPUStatus() string {
return fmt.Sprintf("CPU 状态:\n%s\n", execOutput("lscpu"))
}
func getMemStatus() string {
return fmt.Sprintf("内存状态:\n%s\n", execOutput("free", "-h"))
}
func getDiskStatus() string {
return fmt.Sprintf("磁盘状态:\n%s\n", execOutput("df", "-h"))
}
func getNetworkStatus() string {
return fmt.Sprintf("网络状态:\n%s\n", execOutput("ip", "addr"))
}
func checkNginxConfig() string {
var result bytes.Buffer
result.WriteString("Nginx 配置检查:\n")
if _, err := exec.LookPath("nginx"); err == nil {
result.WriteString(execOutput("nginx", "-t"))
} else {
result.WriteString("Nginx 未安装\n")
}
return result.String()
}
func monitorNginxLogs() string {
var result bytes.Buffer
result.WriteString("Nginx 日志监控:\n")
accessLog := "/var/log/nginx/access.log"
errorLog := "/var/log/nginx/error.log"
if _, err := os.Stat(accessLog); err == nil {
result.WriteString("最近的访问日志:\n")
result.WriteString(execOutput("tail", "-n", "10", accessLog))
} else {
result.WriteString("Nginx 访问日志不存在\n")
}
if _, err := os.Stat(errorLog); err == nil {
result.WriteString("最近的错误日志:\n")
result.WriteString(execOutput("tail", "-n", "10", errorLog))
} else {
result.WriteString("Nginx 错误日志不存在\n")
}
return result.String()
}
func getMySQLStatus() string {
var result bytes.Buffer
result.WriteString("MySQL 状态:\n")
if cmd := exec.Command("systemctl", "is-active", "mysql"); cmd.Run() == nil {
result.WriteString("MySQL 服务状态: 运行中\n")
result.WriteString(execOutput("mysql", "-e", "SHOW STATUS LIKE 'Threads_connected';"))
result.WriteString(execOutput("mysql", "-V"))
result.WriteString("最近的查询性能:\n")
result.WriteString(execOutput("mysql", "-e", "SHOW FULL PROCESSLIST;"))
} else {
result.WriteString("MySQL 服务状态: 未运行\n")
}
return result.String()
}
func execOutput(command string, args ...string) string {
cmd := exec.Command(command, args...)
output, err := cmd.Output()
if err != nil {
return fmt.Sprintf("执行命令失败: %s\n", err)
}
return string(output)
}
func sendEmail(subject, body string) error {
auth := smtp.PlainAuth("", emailSender, emailPassword, "smtp.gmail.com")
to := []string{emailReceiver}
msg := []byte("To: " + emailReceiver + "\r\n" +
"Subject: " + subject + "\r\n" +
"\r\n" +
body)
return smtp.SendMail("smtp.gmail.com:587", auth, emailSender, to, msg)
}