386 lines
11 KiB
Python
386 lines
11 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
监控告警配置脚本
|
|
自动安装和配置 Prometheus + Grafana 监控栈
|
|
"""
|
|
import os
|
|
import sys
|
|
import subprocess
|
|
import json
|
|
import time
|
|
from pathlib import Path
|
|
|
|
|
|
class MonitoringSetup:
|
|
"""监控配置类"""
|
|
|
|
def __init__(self):
|
|
self.monitoring_dir = Path('monitoring')
|
|
self.docker_compose_file = self.monitoring_dir / 'docker-compose.yml'
|
|
|
|
def check_docker(self):
|
|
"""检查 Docker 和 Docker Compose"""
|
|
print("🔍 检查 Docker 环境...")
|
|
|
|
try:
|
|
result = subprocess.run(['docker', '--version'], capture_output=True, text=True)
|
|
print(f"✅ Docker: {result.stdout.strip()}")
|
|
except FileNotFoundError:
|
|
print("❌ Docker 未安装")
|
|
print("\n请安装 Docker: https://docs.docker.com/get-docker/")
|
|
return False
|
|
|
|
try:
|
|
result = subprocess.run(['docker-compose', '--version'], capture_output=True, text=True)
|
|
print(f"✅ Docker Compose: {result.stdout.strip()}")
|
|
except FileNotFoundError:
|
|
try:
|
|
result = subprocess.run(['docker', 'compose', 'version'], capture_output=True, text=True)
|
|
print(f"✅ Docker Compose: {result.stdout.strip()}")
|
|
except FileNotFoundError:
|
|
print("❌ Docker Compose 未安装")
|
|
print("\n请安装 Docker Compose")
|
|
return False
|
|
|
|
return True
|
|
|
|
def create_docker_compose(self):
|
|
"""创建 Docker Compose 配置"""
|
|
print("\n📝 创建 Docker Compose 配置...")
|
|
|
|
self.monitoring_dir.mkdir(exist_ok=True)
|
|
|
|
compose_content = """version: '3.8'
|
|
|
|
services:
|
|
# Prometheus 监控
|
|
prometheus:
|
|
image: prom/prometheus:latest
|
|
container_name: kamaxitong-prometheus
|
|
restart: unless-stopped
|
|
ports:
|
|
- "9090:9090"
|
|
volumes:
|
|
- ./prometheus.yml:/etc/prometheus/prometheus.yml
|
|
- ./alert_rules.yml:/etc/prometheus/alert_rules.yml
|
|
- prometheus_data:/prometheus
|
|
command:
|
|
- '--config.file=/etc/prometheus/prometheus.yml'
|
|
- '--storage.tsdb.path=/prometheus'
|
|
- '--web.console.libraries=/etc/prometheus/console_libraries'
|
|
- '--web.console.templates=/etc/prometheus/consoles'
|
|
- '--storage.tsdb.retention.time=200h'
|
|
- '--web.enable-lifecycle'
|
|
networks:
|
|
- monitoring
|
|
|
|
# Grafana 仪表板
|
|
grafana:
|
|
image: grafana/grafana:latest
|
|
container_name: kamaxitong-grafana
|
|
restart: unless-stopped
|
|
ports:
|
|
- "3000:3000"
|
|
volumes:
|
|
- grafana_data:/var/lib/grafana
|
|
- ./grafana/provisioning:/etc/grafana/provisioning
|
|
environment:
|
|
- GF_SECURITY_ADMIN_USER=admin
|
|
- GF_SECURITY_ADMIN_PASSWORD=admin123
|
|
- GF_USERS_ALLOW_SIGN_UP=false
|
|
networks:
|
|
- monitoring
|
|
|
|
# Node Exporter 系统监控
|
|
node-exporter:
|
|
image: prom/node-exporter:latest
|
|
container_name: kamaxitong-node-exporter
|
|
restart: unless-stopped
|
|
ports:
|
|
- "9100:9100"
|
|
volumes:
|
|
- /proc:/host/proc:ro
|
|
- /sys:/host/sys:ro
|
|
- /:/rootfs:ro
|
|
command:
|
|
- '--path.procfs=/host/proc'
|
|
- '--path.rootfs=/rootfs'
|
|
- '--path.sysfs=/host/sys'
|
|
- '--collector.filesystem.mount-points-exclude=^/(sys|proc|dev|host|etc)($$|/)'
|
|
networks:
|
|
- monitoring
|
|
|
|
# AlertManager 告警管理
|
|
alertmanager:
|
|
image: prom/alertmanager:latest
|
|
container_name: kamaxitong-alertmanager
|
|
restart: unless-stopped
|
|
ports:
|
|
- "9093:9093"
|
|
volumes:
|
|
- ./alertmanager.yml:/etc/alertmanager/alertmanager.yml
|
|
- alertmanager_data:/alertmanager
|
|
command:
|
|
- '--config.file=/etc/alertmanager/alertmanager.yml'
|
|
- '--storage.path=/alertmanager'
|
|
networks:
|
|
- monitoring
|
|
|
|
# Redis Exporter
|
|
redis-exporter:
|
|
image: oliver006/redis_exporter:latest
|
|
container_name: kamaxitong-redis-exporter
|
|
restart: unless-stopped
|
|
ports:
|
|
- "9121:9121"
|
|
environment:
|
|
- REDIS_ADDR=redis://redis:6379
|
|
networks:
|
|
- monitoring
|
|
depends_on:
|
|
- redis
|
|
|
|
# Redis 数据库
|
|
redis:
|
|
image: redis:alpine
|
|
container_name: kamaxitong-redis
|
|
restart: unless-stopped
|
|
ports:
|
|
- "6379:6379"
|
|
volumes:
|
|
- redis_data:/data
|
|
networks:
|
|
- monitoring
|
|
|
|
volumes:
|
|
prometheus_data:
|
|
grafana_data:
|
|
alertmanager_data:
|
|
redis_data:
|
|
|
|
networks:
|
|
monitoring:
|
|
driver: bridge
|
|
"""
|
|
|
|
with open(self.docker_compose_file, 'w') as f:
|
|
f.write(compose_content)
|
|
|
|
print(f"✅ Docker Compose 配置已创建: {self.docker_compose_file}")
|
|
|
|
def create_alertmanager_config(self):
|
|
"""创建 AlertManager 配置"""
|
|
print("\n📝 创建 AlertManager 配置...")
|
|
|
|
alertmanager_config = self.monitoring_dir / 'alertmanager.yml'
|
|
|
|
config_content = """global:
|
|
smtp_smarthost: 'localhost:587'
|
|
smtp_from: 'alerts@yourcompany.com'
|
|
smtp_auth_username: 'alerts@yourcompany.com'
|
|
smtp_auth_password: 'your-password'
|
|
|
|
route:
|
|
group_by: ['alertname']
|
|
group_wait: 10s
|
|
group_interval: 10s
|
|
repeat_interval: 1h
|
|
receiver: 'web.hook'
|
|
routes:
|
|
- match:
|
|
severity: critical
|
|
receiver: 'critical-alerts'
|
|
- match:
|
|
severity: warning
|
|
receiver: 'warning-alerts'
|
|
|
|
receivers:
|
|
- name: 'web.hook'
|
|
webhook_configs:
|
|
- url: 'http://localhost:5001/alert'
|
|
send_resolved: true
|
|
|
|
- name: 'critical-alerts'
|
|
email_configs:
|
|
- to: 'admin@yourcompany.com'
|
|
subject: '【严重告警】KaMiXiTong 系统告警'
|
|
body: |
|
|
{{ range .Alerts }}
|
|
告警: {{ .Annotations.summary }}
|
|
描述: {{ .Annotations.description }}
|
|
时间: {{ .StartsAt }}
|
|
级别: {{ .Labels.severity }}
|
|
{{ end }}
|
|
|
|
- name: 'warning-alerts'
|
|
email_configs:
|
|
- to: 'admin@yourcompany.com'
|
|
subject: '【警告】KaMiXiTong 系统告警'
|
|
body: |
|
|
{{ range .Alerts }}
|
|
告警: {{ .Annotations.summary }}
|
|
描述: {{ .Annotations.description }}
|
|
时间: {{ .StartsAt }}
|
|
级别: {{ .Labels.severity }}
|
|
{{ end }}
|
|
|
|
inhibit_rules:
|
|
- source_match:
|
|
severity: 'critical'
|
|
target_match:
|
|
severity: 'warning'
|
|
equal: ['alertname', 'dev', 'instance']
|
|
"""
|
|
|
|
with open(alertmanager_config, 'w') as f:
|
|
f.write(config_content)
|
|
|
|
print(f"✅ AlertManager 配置已创建: {alertmanager_config}")
|
|
|
|
def create_grafana_provisioning(self):
|
|
"""创建 Grafana 配置"""
|
|
print("\n📝 创建 Grafana 配置...")
|
|
|
|
provisioning_dir = self.monitoring_dir / 'grafana' / 'provisioning'
|
|
provisioning_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
# 数据源配置
|
|
datasource_config = provisioning_dir / 'datasources.yml'
|
|
datasource_content = """apiVersion: 1
|
|
|
|
datasources:
|
|
- name: Prometheus
|
|
type: prometheus
|
|
access: proxy
|
|
url: http://prometheus:9090
|
|
isDefault: true
|
|
"""
|
|
|
|
with open(datasource_config, 'w') as f:
|
|
f.write(datasource_content)
|
|
|
|
# 仪表板配置
|
|
dashboard_config = provisioning_dir / 'dashboards.yml'
|
|
dashboard_content = """apiVersion: 1
|
|
|
|
providers:
|
|
- name: 'KaMiXiTong'
|
|
orgId: 1
|
|
folder: 'KaMiXiTong'
|
|
type: file
|
|
disableDeletion: false
|
|
editable: true
|
|
options:
|
|
path: /etc/grafana/provisioning/dashboards
|
|
"""
|
|
|
|
with open(dashboard_config, 'w') as f:
|
|
f.write(dashboard_content)
|
|
|
|
print("✅ Grafana 配置已创建")
|
|
|
|
def copy_monitoring_files(self):
|
|
"""复制监控配置文件"""
|
|
print("\n📁 复制监控配置文件...")
|
|
|
|
# 复制 Prometheus 配置
|
|
prometheus_src = Path('monitoring/prometheus.yml')
|
|
prometheus_dst = self.monitoring_dir / 'prometheus.yml'
|
|
if prometheus_src.exists():
|
|
prometheus_dst.write_text(prometheus_src.read_text())
|
|
|
|
# 复制告警规则
|
|
alert_rules_src = Path('monitoring/alert_rules.yml')
|
|
alert_rules_dst = self.monitoring_dir / 'alert_rules.yml'
|
|
if alert_rules_src.exists():
|
|
alert_rules_dst.write_text(alert_rules_src.read_text())
|
|
|
|
# 复制 Grafana 仪表板
|
|
grafana_src = Path('monitoring/grafana_dashboard.json')
|
|
grafana_dashboard_dir = self.monitoring_dir / 'grafana' / 'provisioning' / 'dashboards'
|
|
grafana_dashboard_dir.mkdir(parents=True, exist_ok=True)
|
|
grafana_dst = grafana_dashboard_dir / 'kamaxitong_dashboard.json'
|
|
if grafana_src.exists():
|
|
grafana_dst.write_text(grafana_src.read_text())
|
|
|
|
print("✅ 配置文件已复制")
|
|
|
|
def start_monitoring(self):
|
|
"""启动监控服务"""
|
|
print("\n🚀 启动监控服务...")
|
|
|
|
os.chdir(self.monitoring_dir)
|
|
|
|
# 启动服务
|
|
print("启动 Docker Compose...")
|
|
result = subprocess.run(['docker-compose', 'up', '-d'], capture_output=True, text=True)
|
|
|
|
if result.returncode != 0:
|
|
print(f"❌ 启动失败: {result.stderr}")
|
|
return False
|
|
|
|
print("✅ 监控服务已启动")
|
|
return True
|
|
|
|
def show_access_info(self):
|
|
"""显示访问信息"""
|
|
print("\n" + "=" * 60)
|
|
print("✅ 监控服务配置完成!")
|
|
print("=" * 60)
|
|
print("\n📊 访问地址:")
|
|
print(" Grafana 仪表板: http://localhost:3000")
|
|
print(" 用户名: admin")
|
|
print(" 密码: admin123")
|
|
print("\n Prometheus: http://localhost:9090")
|
|
print(" AlertManager: http://localhost:9093")
|
|
print(" Node Exporter: http://localhost:9100")
|
|
print("\n📋 常用命令:")
|
|
print(" 查看服务状态: docker-compose ps")
|
|
print(" 查看日志: docker-compose logs -f")
|
|
print(" 停止服务: docker-compose down")
|
|
print(" 重启服务: docker-compose restart")
|
|
print("\n⚠️ 注意:")
|
|
print(" - 首次启动 Grafana 需要导入仪表板")
|
|
print(" - 定期备份 Grafana 数据")
|
|
print(" - 配置邮件告警需要修改 alertmanager.yml")
|
|
|
|
def run(self):
|
|
"""运行配置流程"""
|
|
print("=" * 60)
|
|
print("🔧 KaMiXiTong 监控告警配置工具")
|
|
print("=" * 60)
|
|
|
|
# 检查 Docker
|
|
if not self.check_docker():
|
|
sys.exit(1)
|
|
|
|
# 创建配置
|
|
self.create_docker_compose()
|
|
self.create_alertmanager_config()
|
|
self.create_grafana_provisioning()
|
|
self.copy_monitoring_files()
|
|
|
|
# 询问是否启动
|
|
print("\n是否启动监控服务? (y/N)")
|
|
if input().lower() == 'y':
|
|
if self.start_monitoring():
|
|
time.sleep(5) # 等待服务启动
|
|
self.show_access_info()
|
|
else:
|
|
print("❌ 启动失败,请检查错误信息")
|
|
sys.exit(1)
|
|
else:
|
|
print("\n📋 手动启动命令:")
|
|
print(f" cd {self.monitoring_dir}")
|
|
print(" docker-compose up -d")
|
|
|
|
|
|
def main():
|
|
"""主函数"""
|
|
setup = MonitoringSetup()
|
|
setup.run()
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|