运维脚本备忘录

(240918,更新脚本)

#!/bin/bash
# Debian/RHEL/CentOS Check Script

if [ $(id -u) -ne 0 ]; then
  echo "请以 root 用户执行此脚本"
  exit 1
fi

# 获取IP地址
IPADDR=$(ifconfig | grep 'inet ' | awk '{print \$2}' | head -n 1)

# 检测操作系统类型
OS_TYPE=$(cat /etc/*release | grep '^ID=' | cut -d'=' -f2 | tr -d '"')
case "$OS_TYPE" in
  debian)
    OS_NAME="Debian"
    ;;
  centos|rhel)
    OS_NAME="RHEL/CentOS"
    ;;
  *)
    echo "不支持的操作系统: $OS_TYPE"
    exit 1
    ;;
esac

# 获取操作系统版本
if [ -f /etc/os-release ]; then
  osVersion=$(grep '^VERSION_ID=' /etc/os-release | cut -d'=' -f2 | tr -d '"')
  echo "$OS_NAME 版本: ${osVersion:-未能获取版本信息}"
else
  echo "/etc/os-release 文件不存在,无法获取版本信息。"
fi

# 设置变量
RESULTFILE="./log/HostDailyCheck-$IPADDR-$(date +%Y%m%d).txt"
BACKUP_DIR="./backup"
EMAIL="example@mail.com"  # 设置接收邮件的邮箱地址
mkdir -p "$(dirname "$RESULTFILE")"
mkdir -p "$BACKUP_DIR"

function log_section {
  echo -e "\n--------------------- \$1 ---------------------" >> "$RESULTFILE"
}

# 监控服务器的内存、硬盘、CPU、网络状态
function getSystemStatus {
  log_section "系统状态"
  echo "系统: $(uname -o)" >> "$RESULTFILE"
  echo "内核: $(uname -r)" >> "$RESULTFILE"
  echo "主机名: $(hostname)" >> "$RESULTFILE"
  echo "当前时间: $(date +'%F %T')" >> "$RESULTFILE"
}

function getCpuStatus {
  log_section "CPU 状态"
  lscpu >> "$RESULTFILE"
}

function getMemStatus {
  log_section "内存状态"
  free -h >> "$RESULTFILE"
}

function getDiskStatus {
  log_section "磁盘状态"
  df -h >> "$RESULTFILE"
}

function getNetworkStatus {
  log_section "网络状态"
  ip addr >> "$RESULTFILE"
}

# Web 服务器监控
function checkNginxConfig {
  log_section "Nginx 配置检查"
  
  if command -v nginx &> /dev/null; then
    nginx -t >> "$RESULTFILE" 2>&1
    echo "Nginx 配置检查结果已记录" >> "$RESULTFILE"
  else
    echo "Nginx 未安装" >> "$RESULTFILE"
  fi
}

function monitorNginxLogs {
  log_section "Nginx 日志监控"
  
  ACCESS_LOG="/var/log/nginx/access.log"
  ERROR_LOG="/var/log/nginx/error.log"
  
  if [ -f "$ACCESS_LOG" ]; then
    echo "最近的访问日志:" >> "$RESULTFILE"
    tail -n 10 "$ACCESS_LOG" >> "$RESULTFILE"
  else
    echo "Nginx 访问日志不存在" >> "$RESULTFILE"
  fi

  if [ -f "$ERROR_LOG" ]; then
    echo "最近的错误日志:" >> "$RESULTFILE"
    tail -n 10 "$ERROR_LOG" >> "$RESULTFILE"
  else
    echo "Nginx 错误日志不存在" >> "$RESULTFILE"
  fi
}

# 数据库监控
function getMySQLStatus {
  log_section "MySQL 状态"

  if systemctl is-active mysql >> /dev/null 2>&1; then
    echo "MySQL 服务状态: 运行中" >> "$RESULTFILE"
    
    CONNECTIONS=$(mysql -e "SHOW STATUS LIKE 'Threads_connected';" | awk 'NR==2 {print \$2}')
    echo "当前连接数: $CONNECTIONS" >> "$RESULTFILE"
    
    VERSION=$(mysql -V | awk '{print \$5}' | tr -d ',')
    echo "MySQL 版本: $VERSION" >> "$RESULTFILE"

    log_section "最近的查询性能"
    mysql -e "SHOW FULL PROCESSLIST;" >> "$RESULTFILE"
  else
    echo "MySQL 服务状态: 未运行" >> "$RESULTFILE"
  fi
}

function backupMySQL {
  log_section "MySQL 备份"
  
  DB_NAME="your_mysql_database"
  DB_USER="your_mysql_user"
  DB_PASSWORD="your_mysql_password"
  TIMESTAMP=$(date +'%Y%m%d_%H%M%S')

  if mysqldump -u "$DB_USER" -p"$DB_PASSWORD" "$DB_NAME" > "$BACKUP_DIR/${DB_NAME}_backup_$TIMESTAMP.sql"; then
    echo "MySQL 备份成功: ${DB_NAME}_backup_$TIMESTAMP.sql" >> "$RESULTFILE"
  else
    echo "MySQL 备份失败" >> "$RESULTFILE"
  fi
}

function getPostgreSQLStatus {
  log_section "PostgreSQL 状态"

  if systemctl is-active postgresql >> /dev/null 2>&1; then
    echo "PostgreSQL 服务状态: 运行中" >> "$RESULTFILE"
    
    DB_NAME="your_postgres_database"
    DB_USER="your_postgres_user"
    TIMESTAMP=$(date +'%Y%m%d_%H%M%S')

    if command -v psql &> /dev/null; then
      echo "psql 可用" >> "$RESULTFILE"
    else
      echo "psql 未安装或不可用" >> "$RESULTFILE"
    fi
    
    if command -v pg_dump &> /dev/null; then
      echo "pg_dump 可用" >> "$RESULTFILE"
    else
      echo "pg_dump 未安装或不可用" >> "$RESULTFILE"
    fi

    CONNECTIONS=$(psql -U "$DB_USER" -d "$DB_NAME" -c "SELECT COUNT(*) FROM pg_stat_activity;" -t | xargs)
    echo "当前连接数: $CONNECTIONS" >> "$RESULTFILE"
    
    echo -e "\n查询性能:" >> "$RESULTFILE"
    psql -U "$DB_USER" -d "$DB_NAME" -c "SELECT * FROM pg_stat_activity ORDER BY state_change DESC LIMIT 5;" >> "$RESULTFILE"

    # PostgreSQL 备份
    backupPostgreSQL "$DB_NAME" "$DB_USER" "$TIMESTAMP"
    
  else
    echo "PostgreSQL 服务状态: 未运行" >> "$RESULTFILE"
  fi
}

function backupPostgreSQL {
  log_section "PostgreSQL 备份"
  
  DB_NAME="\$1"
  DB_USER="\$2"
  TIMESTAMP="\$3"

  if pg_dump -U "$DB_USER" "$DB_NAME" > "$BACKUP_DIR/${DB_NAME}_backup_$TIMESTAMP.sql"; then
    echo "PostgreSQL 备份成功: ${DB_NAME}_backup_$TIMESTAMP.sql" >> "$RESULTFILE"
  else
    echo "PostgreSQL 备份失败" >> "$RESULTFILE"
  fi
}

# 内存数据库监控
function checkRedisStatus {
  log_section "Redis 状态"

  if systemctl is-active redis >> /dev/null 2>&1; then
    echo "Redis 服务状态: 运行中" >> "$RESULTFILE"
    
    REDIS_INFO=$(redis-cli info)
    echo "Redis 性能信息:" >> "$RESULTFILE"
    echo "$REDIS_INFO" >> "$RESULTFILE"
  else
    echo "Redis 服务状态: 未运行" >> "$RESULTFILE"
  fi
}

# 消息队列监控
function getRabbitMQStatus {
  log_section "RabbitMQ 状态"

  if systemctl is-active rabbitmq-server >> /dev/null 2>&1; then
    echo "RabbitMQ 服务状态: 运行中" >> "$RESULTFILE"
    
    rabbitmqctl list_queues name messages consumers | awk 'NR>1 {print "队列名: "\$1", 消息数: "\$2", 消费者数: "\$3}' >> "$RESULTFILE"
  else
    echo "RabbitMQ 服务状态: 未运行" >> "$RESULTFILE"
  fi
}

# 日志服务监控
function monitorHAProxyLogs {
  log_section "HAProxy 日志监控"

  ACCESS_LOG="/var/log/haproxy.log"

  if [ -f "$ACCESS_LOG" ]; then
    echo "最近的 HAProxy 日志:" >> "$RESULTFILE"
    tail -n 10 "$ACCESS_LOG" >> "$RESULTFILE"
  else
    echo "HAProxy 日志不存在" >> "$RESULTFILE"
  fi
}

# 软件监控
function getNTPStatus {
  log_section "NTP 状态"
  if command -v ntpd &> /dev/null; then
    echo "NTP服务状态: $(systemctl is-active ntpd)" >> "$RESULTFILE"
  elif command -v chronyd &> /dev/null; then
    echo "NTP服务状态: $(systemctl is-active chronyd)" >> "$RESULTFILE"
  else
    echo "NTP服务未安装或未配置" >> "$RESULTFILE"
  fi
}

function getJDKStatus {
  log_section "JDK 状态"
  if command -v java &> /dev/null; then
    java -version >> "$RESULTFILE" 2>&1
  else
    echo "Java 未安装" >> "$RESULTFILE"
  fi
}

function getMavenStatus {
  log_section "Maven 状态"
  if command -v mvn &> /dev/null; then
    mvn -v >> "$RESULTFILE" 2>&1
  else
    echo "Maven 未安装" >> "$RESULTFILE"
  fi
}

function checkPrometheus {
  log_section "Prometheus 状态"

  if systemctl is-active prometheus >> /dev/null 2>&1; then
    echo "Prometheus 服务状态: 运行中" >> "$RESULTFILE"

    PROMETHEUS_URL="http://localhost:9090/metrics"
    if curl -s -o /dev/null -w "%{http_code}" "$PROMETHEUS_URL" | grep -q "200"; then
      echo "Prometheus 指标收集正常" >> "$RESULTFILE"
    else
      echo "Prometheus 指标收集异常" >> "$RESULTFILE"
    fi
  else
    echo "Prometheus 服务状态: 未运行" >> "$RESULTFILE"
  fi
}

function checkGrafana {
  log_section "Grafana 状态"

  if systemctl is-active grafana-server >> /dev/null 2>&1; then
    echo "Grafana 服务状态: 运行中" >> "$RESULTFILE"

    GRAFANA_URL="http://localhost:3000"
    if curl -s -o /dev/null -w "%{http_code}" "$GRAFANA_URL" | grep -q "200"; then
      echo "Grafana 仪表盘可用" >> "$RESULTFILE"
    else
      echo "Grafana 仪表盘不可用" >> "$RESULTFILE"
    fi
  else
    echo "Grafana 服务状态: 未运行" >> "$RESULTFILE"
  fi
}

# CI/CD 监控
function getCIStatus {
  log_section "CI/CD 状态"
  
  if systemctl is-active gitlab >> /dev/null 2>&1; then
    echo "GitLab 服务状态: 运行中" >> "$RESULTFILE"
  else
    echo "GitLab 服务状态: 未运行" >> "$RESULTFILE"
  fi
  
  if systemctl is-active jenkins >> /dev/null 2>&1; then
    echo "Jenkins 服务状态: 运行中" >> "$RESULTFILE"
  else
    echo "Jenkins 服务状态: 未运行" >> "$RESULTFILE"
  fi
}

# 容器监控
function dockerInspection {
  log_section "Docker 状态"

  if command -v docker &> /dev/null; then
    echo "当前运行的 Docker 容器:" >> "$RESULTFILE"
    docker ps >> "$RESULTFILE"
    
    echo -e "\n所有 Docker 容器:" >> "$RESULTFILE"
    docker ps -a >> "$RESULTFILE"
    
    echo -e "\n当前 Docker 镜像:" >> "$RESULTFILE"
    docker images >> "$RESULTFILE"
    
    echo -e "\nDocker 容器资源使用情况:" >> "$RESULTFILE"
    docker stats --no-stream >> "$RESULTFILE"

    echo -e "\nDocker 容器日志:" >> "$RESULTFILE"
    CONTAINERS=$(docker ps -q)
    for CONTAINER in $CONTAINERS; do
      echo -e "\n容器 $CONTAINER 的日志:" >> "$RESULTFILE"
      docker logs "$CONTAINER" --tail 10 >> "$RESULTFILE"
    done

    echo -e "\n当前 Docker 网络配置:" >> "$RESULTFILE"
    docker network ls >> "$RESULTFILE"
  else
    echo "Docker 未安装" >> "$RESULTFILE"
  fi
}

# 容器编排监控
function checkKubernetesStatus {
  log_section "Kubernetes 状态"

  if command -v kubectl &> /dev/null; then
    echo "Kubernetes 集群状态:" >> "$RESULTFILE"
    
    CLUSTER_STATUS=$(kubectl cluster-info)
    echo "$CLUSTER_STATUS" >> "$RESULTFILE"
    
    echo -e "\n节点状态:" >> "$RESULTFILE"
    kubectl get nodes >> "$RESULTFILE"
    
    echo -e "\nPod 状态:" >> "$RESULTFILE"
    kubectl get pods --all-namespaces >> "$RESULTFILE"
  else
    echo "kubectl 未安装" >> "$RESULTFILE"
  fi
}

# 主执行逻辑
{
  getSystemStatus
  getCpuStatus
  getMemStatus
  getDiskStatus
  getNetworkStatus
  checkNginxConfig
  monitorNginxLogs
  getMySQLStatus
  backupMySQL
  getPostgreSQLStatus
  checkRedisStatus
  getRabbitMQStatus
  getNTPStatus
  getJDKStatus
  getMavenStatus
  checkPrometheus
  checkGrafana
  getCIStatus
  dockerInspection
  checkKubernetesStatus
} > "$RESULTFILE" 2>&1

# echo "检查结果已保存到:$RESULTFILE"
# 发送结果到指定邮箱
mail -s "巡检结果 - $IPADDR" "$EMAIL" < "$RESULTFILE"
echo "检查结果已保存到:$RESULTFILE,且已发送到 $EMAIL"

(未修改)

package main

import (
	"bytes"
	"fmt"
	"net/mail"
	"net/smtp"
	"os"
	"os/exec"
	"strings"
	"time"
)

const (
	emailSender   = "your_email@gmail.com" // 发件邮箱
	emailPassword = "your_email_password"    // 发件邮箱密码
	emailReceiver = "example@mail.com"        // 收件邮箱
)

func main() {
	if os.Geteuid() != 0 {
		fmt.Println("请以 root 用户执行此脚本")
		return
	}

	ipAddr, err := getIP()
	if err != nil {
		fmt.Println("获取IP地址失败:", err)
		return
	}

	osName, osVersion, err := getOSInfo()
	if err != nil {
		fmt.Println("获取操作系统信息失败:", err)
		return
	}

	resultFile := fmt.Sprintf("./log/HostDailyCheck-%s-%s.txt", ipAddr, time.Now().Format(""))
	os.MkdirAll("./log", os.ModePerm)
	os.MkdirAll("./backup", os.ModePerm)

	var result bytes.Buffer
	result.WriteString(fmt.Sprintf("IP 地址: %s\n", ipAddr))
	result.WriteString(fmt.Sprintf("%s 版本: %s\n", osName, osVersion))

	result.WriteString(getSystemStatus())
	result.WriteString(getCPUStatus())
	result.WriteString(getMemStatus())
	result.WriteString(getDiskStatus())
	result.WriteString(getNetworkStatus())
	result.WriteString(checkNginxConfig())
	result.WriteString(monitorNginxLogs())
	result.WriteString(getMySQLStatus())
	// Backup and other status checks can be added similarly

	err = os.WriteFile(resultFile, result.Bytes(), 0644)
	if err != nil {
		fmt.Println("写入结果文件失败:", err)
		return
	}

	err = sendEmail("巡检结果 - "+ipAddr, result.String())
	if err != nil {
		fmt.Println("发送邮件失败:", err)
	} else {
		fmt.Println("检查结果已发送到:", emailReceiver)
	}
}

func getIP() (string, error) {
	cmd := exec.Command("ifconfig")
	output, err := cmd.Output()
	if err != nil {
		return "", err
	}
	lines := strings.Split(string(output), "\n")
	for _, line := range lines {
		if strings.Contains(line, "inet ") {
			parts := strings.Fields(line)
			return parts[1], nil
		}
	}
	return "", fmt.Errorf("无法找到 IP 地址")
}

func getOSInfo() (string, string, error) {
	cmd := exec.Command("cat", "/etc/os-release")
	output, err := cmd.Output()
	if err != nil {
		return "", "", err
	}

	var osName, osVersion string
	lines := strings.Split(string(output), "\n")
	for _, line := range lines {
		if strings.HasPrefix(line, "ID=") {
			osName = strings.Trim(strings.Split(line, "=")[1], "\"")
		}
		if strings.HasPrefix(line, "VERSION_ID=") {
			osVersion = strings.Trim(strings.Split(line, "=")[1], "\"")
		}
	}

	if osName == "" || osVersion == "" {
		return "", "", fmt.Errorf("无法获取操作系统信息")
	}

	return osName, osVersion, nil
}

func getSystemStatus() string {
	var result bytes.Buffer
	result.WriteString("系统状态:\n")
	result.WriteString(fmt.Sprintf("系统: %s\n", execOutput("uname", "-o")))
	result.WriteString(fmt.Sprintf("内核: %s\n", execOutput("uname", "-r")))
	result.WriteString(fmt.Sprintf("主机名: %s\n", execOutput("hostname")))
	result.WriteString(fmt.Sprintf("当前时间: %s\n", time.Now().Format(time.RFC1123)))
	return result.String()
}

func getCPUStatus() string {
	return fmt.Sprintf("CPU 状态:\n%s\n", execOutput("lscpu"))
}

func getMemStatus() string {
	return fmt.Sprintf("内存状态:\n%s\n", execOutput("free", "-h"))
}

func getDiskStatus() string {
	return fmt.Sprintf("磁盘状态:\n%s\n", execOutput("df", "-h"))
}

func getNetworkStatus() string {
	return fmt.Sprintf("网络状态:\n%s\n", execOutput("ip", "addr"))
}

func checkNginxConfig() string {
	var result bytes.Buffer
	result.WriteString("Nginx 配置检查:\n")
	if _, err := exec.LookPath("nginx"); err == nil {
		result.WriteString(execOutput("nginx", "-t"))
	} else {
		result.WriteString("Nginx 未安装\n")
	}
	return result.String()
}

func monitorNginxLogs() string {
	var result bytes.Buffer
	result.WriteString("Nginx 日志监控:\n")

	accessLog := "/var/log/nginx/access.log"
	errorLog := "/var/log/nginx/error.log"

	if _, err := os.Stat(accessLog); err == nil {
		result.WriteString("最近的访问日志:\n")
		result.WriteString(execOutput("tail", "-n", "10", accessLog))
	} else {
		result.WriteString("Nginx 访问日志不存在\n")
	}

	if _, err := os.Stat(errorLog); err == nil {
		result.WriteString("最近的错误日志:\n")
		result.WriteString(execOutput("tail", "-n", "10", errorLog))
	} else {
		result.WriteString("Nginx 错误日志不存在\n")
	}

	return result.String()
}

func getMySQLStatus() string {
	var result bytes.Buffer
	result.WriteString("MySQL 状态:\n")

	if cmd := exec.Command("systemctl", "is-active", "mysql"); cmd.Run() == nil {
		result.WriteString("MySQL 服务状态: 运行中\n")
		result.WriteString(execOutput("mysql", "-e", "SHOW STATUS LIKE 'Threads_connected';"))
		result.WriteString(execOutput("mysql", "-V"))
		result.WriteString("最近的查询性能:\n")
		result.WriteString(execOutput("mysql", "-e", "SHOW FULL PROCESSLIST;"))
	} else {
		result.WriteString("MySQL 服务状态: 未运行\n")
	}

	return result.String()
}

func execOutput(command string, args ...string) string {
	cmd := exec.Command(command, args...)
	output, err := cmd.Output()
	if err != nil {
		return fmt.Sprintf("执行命令失败: %s\n", err)
	}
	return string(output)
}

func sendEmail(subject, body string) error {
	auth := smtp.PlainAuth("", emailSender, emailPassword, "smtp.gmail.com")
	to := []string{emailReceiver}
	msg := []byte("To: " + emailReceiver + "\r\n" +
		"Subject: " + subject + "\r\n" +
		"\r\n" +
		body)

	return smtp.SendMail("smtp.gmail.com:587", auth, emailSender, to, msg)
}
posted @ 2024-07-15 15:28  Mugetsukun  阅读(2)  评论(0编辑  收藏  举报