第一次提交
This commit is contained in:
385
scripts/setup_monitoring.py
Normal file
385
scripts/setup_monitoring.py
Normal file
@@ -0,0 +1,385 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
监控告警配置脚本
|
||||
自动安装和配置 Prometheus + Grafana 监控栈
|
||||
"""
|
||||
import os
|
||||
import sys
|
||||
import subprocess
|
||||
import json
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
class MonitoringSetup:
|
||||
"""监控配置类"""
|
||||
|
||||
def __init__(self):
|
||||
self.monitoring_dir = Path('monitoring')
|
||||
self.docker_compose_file = self.monitoring_dir / 'docker-compose.yml'
|
||||
|
||||
def check_docker(self):
|
||||
"""检查 Docker 和 Docker Compose"""
|
||||
print("🔍 检查 Docker 环境...")
|
||||
|
||||
try:
|
||||
result = subprocess.run(['docker', '--version'], capture_output=True, text=True)
|
||||
print(f"✅ Docker: {result.stdout.strip()}")
|
||||
except FileNotFoundError:
|
||||
print("❌ Docker 未安装")
|
||||
print("\n请安装 Docker: https://docs.docker.com/get-docker/")
|
||||
return False
|
||||
|
||||
try:
|
||||
result = subprocess.run(['docker-compose', '--version'], capture_output=True, text=True)
|
||||
print(f"✅ Docker Compose: {result.stdout.strip()}")
|
||||
except FileNotFoundError:
|
||||
try:
|
||||
result = subprocess.run(['docker', 'compose', 'version'], capture_output=True, text=True)
|
||||
print(f"✅ Docker Compose: {result.stdout.strip()}")
|
||||
except FileNotFoundError:
|
||||
print("❌ Docker Compose 未安装")
|
||||
print("\n请安装 Docker Compose")
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def create_docker_compose(self):
|
||||
"""创建 Docker Compose 配置"""
|
||||
print("\n📝 创建 Docker Compose 配置...")
|
||||
|
||||
self.monitoring_dir.mkdir(exist_ok=True)
|
||||
|
||||
compose_content = """version: '3.8'
|
||||
|
||||
services:
|
||||
# Prometheus 监控
|
||||
prometheus:
|
||||
image: prom/prometheus:latest
|
||||
container_name: kamaxitong-prometheus
|
||||
restart: unless-stopped
|
||||
ports:
|
||||
- "9090:9090"
|
||||
volumes:
|
||||
- ./prometheus.yml:/etc/prometheus/prometheus.yml
|
||||
- ./alert_rules.yml:/etc/prometheus/alert_rules.yml
|
||||
- prometheus_data:/prometheus
|
||||
command:
|
||||
- '--config.file=/etc/prometheus/prometheus.yml'
|
||||
- '--storage.tsdb.path=/prometheus'
|
||||
- '--web.console.libraries=/etc/prometheus/console_libraries'
|
||||
- '--web.console.templates=/etc/prometheus/consoles'
|
||||
- '--storage.tsdb.retention.time=200h'
|
||||
- '--web.enable-lifecycle'
|
||||
networks:
|
||||
- monitoring
|
||||
|
||||
# Grafana 仪表板
|
||||
grafana:
|
||||
image: grafana/grafana:latest
|
||||
container_name: kamaxitong-grafana
|
||||
restart: unless-stopped
|
||||
ports:
|
||||
- "3000:3000"
|
||||
volumes:
|
||||
- grafana_data:/var/lib/grafana
|
||||
- ./grafana/provisioning:/etc/grafana/provisioning
|
||||
environment:
|
||||
- GF_SECURITY_ADMIN_USER=admin
|
||||
- GF_SECURITY_ADMIN_PASSWORD=admin123
|
||||
- GF_USERS_ALLOW_SIGN_UP=false
|
||||
networks:
|
||||
- monitoring
|
||||
|
||||
# Node Exporter 系统监控
|
||||
node-exporter:
|
||||
image: prom/node-exporter:latest
|
||||
container_name: kamaxitong-node-exporter
|
||||
restart: unless-stopped
|
||||
ports:
|
||||
- "9100:9100"
|
||||
volumes:
|
||||
- /proc:/host/proc:ro
|
||||
- /sys:/host/sys:ro
|
||||
- /:/rootfs:ro
|
||||
command:
|
||||
- '--path.procfs=/host/proc'
|
||||
- '--path.rootfs=/rootfs'
|
||||
- '--path.sysfs=/host/sys'
|
||||
- '--collector.filesystem.mount-points-exclude=^/(sys|proc|dev|host|etc)($$|/)'
|
||||
networks:
|
||||
- monitoring
|
||||
|
||||
# AlertManager 告警管理
|
||||
alertmanager:
|
||||
image: prom/alertmanager:latest
|
||||
container_name: kamaxitong-alertmanager
|
||||
restart: unless-stopped
|
||||
ports:
|
||||
- "9093:9093"
|
||||
volumes:
|
||||
- ./alertmanager.yml:/etc/alertmanager/alertmanager.yml
|
||||
- alertmanager_data:/alertmanager
|
||||
command:
|
||||
- '--config.file=/etc/alertmanager/alertmanager.yml'
|
||||
- '--storage.path=/alertmanager'
|
||||
networks:
|
||||
- monitoring
|
||||
|
||||
# Redis Exporter
|
||||
redis-exporter:
|
||||
image: oliver006/redis_exporter:latest
|
||||
container_name: kamaxitong-redis-exporter
|
||||
restart: unless-stopped
|
||||
ports:
|
||||
- "9121:9121"
|
||||
environment:
|
||||
- REDIS_ADDR=redis://redis:6379
|
||||
networks:
|
||||
- monitoring
|
||||
depends_on:
|
||||
- redis
|
||||
|
||||
# Redis 数据库
|
||||
redis:
|
||||
image: redis:alpine
|
||||
container_name: kamaxitong-redis
|
||||
restart: unless-stopped
|
||||
ports:
|
||||
- "6379:6379"
|
||||
volumes:
|
||||
- redis_data:/data
|
||||
networks:
|
||||
- monitoring
|
||||
|
||||
volumes:
|
||||
prometheus_data:
|
||||
grafana_data:
|
||||
alertmanager_data:
|
||||
redis_data:
|
||||
|
||||
networks:
|
||||
monitoring:
|
||||
driver: bridge
|
||||
"""
|
||||
|
||||
with open(self.docker_compose_file, 'w') as f:
|
||||
f.write(compose_content)
|
||||
|
||||
print(f"✅ Docker Compose 配置已创建: {self.docker_compose_file}")
|
||||
|
||||
def create_alertmanager_config(self):
|
||||
"""创建 AlertManager 配置"""
|
||||
print("\n📝 创建 AlertManager 配置...")
|
||||
|
||||
alertmanager_config = self.monitoring_dir / 'alertmanager.yml'
|
||||
|
||||
config_content = """global:
|
||||
smtp_smarthost: 'localhost:587'
|
||||
smtp_from: 'alerts@yourcompany.com'
|
||||
smtp_auth_username: 'alerts@yourcompany.com'
|
||||
smtp_auth_password: 'your-password'
|
||||
|
||||
route:
|
||||
group_by: ['alertname']
|
||||
group_wait: 10s
|
||||
group_interval: 10s
|
||||
repeat_interval: 1h
|
||||
receiver: 'web.hook'
|
||||
routes:
|
||||
- match:
|
||||
severity: critical
|
||||
receiver: 'critical-alerts'
|
||||
- match:
|
||||
severity: warning
|
||||
receiver: 'warning-alerts'
|
||||
|
||||
receivers:
|
||||
- name: 'web.hook'
|
||||
webhook_configs:
|
||||
- url: 'http://localhost:5001/alert'
|
||||
send_resolved: true
|
||||
|
||||
- name: 'critical-alerts'
|
||||
email_configs:
|
||||
- to: 'admin@yourcompany.com'
|
||||
subject: '【严重告警】KaMiXiTong 系统告警'
|
||||
body: |
|
||||
{{ range .Alerts }}
|
||||
告警: {{ .Annotations.summary }}
|
||||
描述: {{ .Annotations.description }}
|
||||
时间: {{ .StartsAt }}
|
||||
级别: {{ .Labels.severity }}
|
||||
{{ end }}
|
||||
|
||||
- name: 'warning-alerts'
|
||||
email_configs:
|
||||
- to: 'admin@yourcompany.com'
|
||||
subject: '【警告】KaMiXiTong 系统告警'
|
||||
body: |
|
||||
{{ range .Alerts }}
|
||||
告警: {{ .Annotations.summary }}
|
||||
描述: {{ .Annotations.description }}
|
||||
时间: {{ .StartsAt }}
|
||||
级别: {{ .Labels.severity }}
|
||||
{{ end }}
|
||||
|
||||
inhibit_rules:
|
||||
- source_match:
|
||||
severity: 'critical'
|
||||
target_match:
|
||||
severity: 'warning'
|
||||
equal: ['alertname', 'dev', 'instance']
|
||||
"""
|
||||
|
||||
with open(alertmanager_config, 'w') as f:
|
||||
f.write(config_content)
|
||||
|
||||
print(f"✅ AlertManager 配置已创建: {alertmanager_config}")
|
||||
|
||||
def create_grafana_provisioning(self):
|
||||
"""创建 Grafana 配置"""
|
||||
print("\n📝 创建 Grafana 配置...")
|
||||
|
||||
provisioning_dir = self.monitoring_dir / 'grafana' / 'provisioning'
|
||||
provisioning_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# 数据源配置
|
||||
datasource_config = provisioning_dir / 'datasources.yml'
|
||||
datasource_content = """apiVersion: 1
|
||||
|
||||
datasources:
|
||||
- name: Prometheus
|
||||
type: prometheus
|
||||
access: proxy
|
||||
url: http://prometheus:9090
|
||||
isDefault: true
|
||||
"""
|
||||
|
||||
with open(datasource_config, 'w') as f:
|
||||
f.write(datasource_content)
|
||||
|
||||
# 仪表板配置
|
||||
dashboard_config = provisioning_dir / 'dashboards.yml'
|
||||
dashboard_content = """apiVersion: 1
|
||||
|
||||
providers:
|
||||
- name: 'KaMiXiTong'
|
||||
orgId: 1
|
||||
folder: 'KaMiXiTong'
|
||||
type: file
|
||||
disableDeletion: false
|
||||
editable: true
|
||||
options:
|
||||
path: /etc/grafana/provisioning/dashboards
|
||||
"""
|
||||
|
||||
with open(dashboard_config, 'w') as f:
|
||||
f.write(dashboard_content)
|
||||
|
||||
print("✅ Grafana 配置已创建")
|
||||
|
||||
def copy_monitoring_files(self):
|
||||
"""复制监控配置文件"""
|
||||
print("\n📁 复制监控配置文件...")
|
||||
|
||||
# 复制 Prometheus 配置
|
||||
prometheus_src = Path('monitoring/prometheus.yml')
|
||||
prometheus_dst = self.monitoring_dir / 'prometheus.yml'
|
||||
if prometheus_src.exists():
|
||||
prometheus_dst.write_text(prometheus_src.read_text())
|
||||
|
||||
# 复制告警规则
|
||||
alert_rules_src = Path('monitoring/alert_rules.yml')
|
||||
alert_rules_dst = self.monitoring_dir / 'alert_rules.yml'
|
||||
if alert_rules_src.exists():
|
||||
alert_rules_dst.write_text(alert_rules_src.read_text())
|
||||
|
||||
# 复制 Grafana 仪表板
|
||||
grafana_src = Path('monitoring/grafana_dashboard.json')
|
||||
grafana_dashboard_dir = self.monitoring_dir / 'grafana' / 'provisioning' / 'dashboards'
|
||||
grafana_dashboard_dir.mkdir(parents=True, exist_ok=True)
|
||||
grafana_dst = grafana_dashboard_dir / 'kamaxitong_dashboard.json'
|
||||
if grafana_src.exists():
|
||||
grafana_dst.write_text(grafana_src.read_text())
|
||||
|
||||
print("✅ 配置文件已复制")
|
||||
|
||||
def start_monitoring(self):
|
||||
"""启动监控服务"""
|
||||
print("\n🚀 启动监控服务...")
|
||||
|
||||
os.chdir(self.monitoring_dir)
|
||||
|
||||
# 启动服务
|
||||
print("启动 Docker Compose...")
|
||||
result = subprocess.run(['docker-compose', 'up', '-d'], capture_output=True, text=True)
|
||||
|
||||
if result.returncode != 0:
|
||||
print(f"❌ 启动失败: {result.stderr}")
|
||||
return False
|
||||
|
||||
print("✅ 监控服务已启动")
|
||||
return True
|
||||
|
||||
def show_access_info(self):
|
||||
"""显示访问信息"""
|
||||
print("\n" + "=" * 60)
|
||||
print("✅ 监控服务配置完成!")
|
||||
print("=" * 60)
|
||||
print("\n📊 访问地址:")
|
||||
print(" Grafana 仪表板: http://localhost:3000")
|
||||
print(" 用户名: admin")
|
||||
print(" 密码: admin123")
|
||||
print("\n Prometheus: http://localhost:9090")
|
||||
print(" AlertManager: http://localhost:9093")
|
||||
print(" Node Exporter: http://localhost:9100")
|
||||
print("\n📋 常用命令:")
|
||||
print(" 查看服务状态: docker-compose ps")
|
||||
print(" 查看日志: docker-compose logs -f")
|
||||
print(" 停止服务: docker-compose down")
|
||||
print(" 重启服务: docker-compose restart")
|
||||
print("\n⚠️ 注意:")
|
||||
print(" - 首次启动 Grafana 需要导入仪表板")
|
||||
print(" - 定期备份 Grafana 数据")
|
||||
print(" - 配置邮件告警需要修改 alertmanager.yml")
|
||||
|
||||
def run(self):
|
||||
"""运行配置流程"""
|
||||
print("=" * 60)
|
||||
print("🔧 KaMiXiTong 监控告警配置工具")
|
||||
print("=" * 60)
|
||||
|
||||
# 检查 Docker
|
||||
if not self.check_docker():
|
||||
sys.exit(1)
|
||||
|
||||
# 创建配置
|
||||
self.create_docker_compose()
|
||||
self.create_alertmanager_config()
|
||||
self.create_grafana_provisioning()
|
||||
self.copy_monitoring_files()
|
||||
|
||||
# 询问是否启动
|
||||
print("\n是否启动监控服务? (y/N)")
|
||||
if input().lower() == 'y':
|
||||
if self.start_monitoring():
|
||||
time.sleep(5) # 等待服务启动
|
||||
self.show_access_info()
|
||||
else:
|
||||
print("❌ 启动失败,请检查错误信息")
|
||||
sys.exit(1)
|
||||
else:
|
||||
print("\n📋 手动启动命令:")
|
||||
print(f" cd {self.monitoring_dir}")
|
||||
print(" docker-compose up -d")
|
||||
|
||||
|
||||
def main():
|
||||
"""主函数"""
|
||||
setup = MonitoringSetup()
|
||||
setup.run()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Reference in New Issue
Block a user