feat(monitoring): complete monitoring pipeline and fix presentation flag persistence
add superadmin monitoring dashboard with protected route, menu entry, and monitoring data client add monitoring overview API endpoint and improve log serialization/aggregation for dashboard use extend listener health/log handling with robust status/event/timestamp normalization and screenshot payload extraction improve screenshot persistence and retrieval (timestamp-aware uploads, latest screenshot endpoint fallback) fix page_progress and auto_progress persistence/serialization across create, update, and detached occurrence flows align technical and project docs to reflect implemented monitoring and no-version-bump backend changes add documentation sync log entry and include minor compose env indentation cleanup
This commit is contained in:
@@ -1,14 +1,95 @@
|
||||
from flask import Blueprint, jsonify, request
|
||||
from server.database import Session
|
||||
from server.permissions import admin_or_higher
|
||||
from models.models import ClientLog, Client, LogLevel
|
||||
from server.permissions import admin_or_higher, superadmin_only
|
||||
from models.models import ClientLog, Client, ClientGroup, LogLevel
|
||||
from sqlalchemy import desc, func
|
||||
from datetime import datetime, timedelta, timezone
|
||||
import json
|
||||
import os
|
||||
import glob
|
||||
|
||||
from server.serializers import dict_to_camel_case
|
||||
|
||||
client_logs_bp = Blueprint("client_logs", __name__, url_prefix="/api/client-logs")
|
||||
|
||||
|
||||
def _grace_period_seconds():
|
||||
env = os.environ.get("ENV", "production").lower()
|
||||
if env in ("development", "dev"):
|
||||
return int(os.environ.get("HEARTBEAT_GRACE_PERIOD_DEV", "180"))
|
||||
return int(os.environ.get("HEARTBEAT_GRACE_PERIOD_PROD", "170"))
|
||||
|
||||
|
||||
def _to_utc(dt):
|
||||
if dt is None:
|
||||
return None
|
||||
if dt.tzinfo is None:
|
||||
return dt.replace(tzinfo=timezone.utc)
|
||||
return dt.astimezone(timezone.utc)
|
||||
|
||||
|
||||
def _is_client_alive(last_alive, is_active):
|
||||
if not last_alive or not is_active:
|
||||
return False
|
||||
return (datetime.now(timezone.utc) - _to_utc(last_alive)) <= timedelta(seconds=_grace_period_seconds())
|
||||
|
||||
|
||||
def _safe_context(raw_context):
|
||||
if not raw_context:
|
||||
return {}
|
||||
try:
|
||||
return json.loads(raw_context)
|
||||
except (TypeError, json.JSONDecodeError):
|
||||
return {"raw": raw_context}
|
||||
|
||||
|
||||
def _serialize_log_entry(log, include_client_uuid=False):
|
||||
if not log:
|
||||
return None
|
||||
|
||||
entry = {
|
||||
"id": log.id,
|
||||
"timestamp": log.timestamp.isoformat() if log.timestamp else None,
|
||||
"level": log.level.value if log.level else None,
|
||||
"message": log.message,
|
||||
"context": _safe_context(log.context),
|
||||
}
|
||||
if include_client_uuid:
|
||||
entry["client_uuid"] = log.client_uuid
|
||||
return entry
|
||||
|
||||
|
||||
def _determine_client_status(is_alive, process_status, screen_health_status, log_counts):
|
||||
if not is_alive:
|
||||
return "offline"
|
||||
if process_status == "crashed" or screen_health_status in ("BLACK", "FROZEN"):
|
||||
return "critical"
|
||||
if log_counts.get("ERROR", 0) > 0:
|
||||
return "critical"
|
||||
if process_status in ("starting", "stopped") or log_counts.get("WARN", 0) > 0:
|
||||
return "warning"
|
||||
return "healthy"
|
||||
|
||||
|
||||
def _infer_last_screenshot_ts(client_uuid):
|
||||
screenshots_dir = os.path.join(os.path.dirname(__file__), "..", "screenshots")
|
||||
|
||||
candidate_files = []
|
||||
latest_file = os.path.join(screenshots_dir, f"{client_uuid}.jpg")
|
||||
if os.path.exists(latest_file):
|
||||
candidate_files.append(latest_file)
|
||||
|
||||
candidate_files.extend(glob.glob(os.path.join(screenshots_dir, f"{client_uuid}_*.jpg")))
|
||||
if not candidate_files:
|
||||
return None
|
||||
|
||||
try:
|
||||
newest_path = max(candidate_files, key=os.path.getmtime)
|
||||
return datetime.fromtimestamp(os.path.getmtime(newest_path), timezone.utc)
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
@client_logs_bp.route("/test", methods=["GET"])
|
||||
def test_client_logs():
|
||||
"""Test endpoint to verify logging infrastructure (no auth required)"""
|
||||
@@ -107,22 +188,7 @@ def get_client_logs(uuid):
|
||||
# Format results
|
||||
result = []
|
||||
for log in logs:
|
||||
entry = {
|
||||
"id": log.id,
|
||||
"timestamp": log.timestamp.isoformat() if log.timestamp else None,
|
||||
"level": log.level.value if log.level else None,
|
||||
"message": log.message,
|
||||
"context": {}
|
||||
}
|
||||
|
||||
# Parse context JSON
|
||||
if log.context:
|
||||
try:
|
||||
entry["context"] = json.loads(log.context)
|
||||
except json.JSONDecodeError:
|
||||
entry["context"] = {"raw": log.context}
|
||||
|
||||
result.append(entry)
|
||||
result.append(_serialize_log_entry(log))
|
||||
|
||||
session.close()
|
||||
return jsonify({
|
||||
@@ -212,6 +278,141 @@ def get_logs_summary():
|
||||
return jsonify({"error": f"Server error: {str(e)}"}), 500
|
||||
|
||||
|
||||
@client_logs_bp.route("/monitoring-overview", methods=["GET"])
|
||||
@superadmin_only
|
||||
def get_monitoring_overview():
|
||||
"""Return a dashboard-friendly monitoring overview for all clients."""
|
||||
session = Session()
|
||||
try:
|
||||
hours = min(int(request.args.get("hours", 24)), 168)
|
||||
since = datetime.now(timezone.utc) - timedelta(hours=hours)
|
||||
|
||||
clients = (
|
||||
session.query(Client, ClientGroup.name.label("group_name"))
|
||||
.outerjoin(ClientGroup, Client.group_id == ClientGroup.id)
|
||||
.order_by(ClientGroup.name.asc(), Client.description.asc(), Client.hostname.asc(), Client.uuid.asc())
|
||||
.all()
|
||||
)
|
||||
|
||||
log_stats = (
|
||||
session.query(
|
||||
ClientLog.client_uuid,
|
||||
ClientLog.level,
|
||||
func.count(ClientLog.id).label("count"),
|
||||
)
|
||||
.filter(ClientLog.timestamp >= since)
|
||||
.group_by(ClientLog.client_uuid, ClientLog.level)
|
||||
.all()
|
||||
)
|
||||
|
||||
counts_by_client = {}
|
||||
for stat in log_stats:
|
||||
if stat.client_uuid not in counts_by_client:
|
||||
counts_by_client[stat.client_uuid] = {
|
||||
"ERROR": 0,
|
||||
"WARN": 0,
|
||||
"INFO": 0,
|
||||
"DEBUG": 0,
|
||||
}
|
||||
counts_by_client[stat.client_uuid][stat.level.value] = stat.count
|
||||
|
||||
clients_payload = []
|
||||
summary_counts = {
|
||||
"total_clients": 0,
|
||||
"online_clients": 0,
|
||||
"offline_clients": 0,
|
||||
"healthy_clients": 0,
|
||||
"warning_clients": 0,
|
||||
"critical_clients": 0,
|
||||
"error_logs": 0,
|
||||
"warn_logs": 0,
|
||||
}
|
||||
|
||||
for client, group_name in clients:
|
||||
log_counts = counts_by_client.get(
|
||||
client.uuid,
|
||||
{"ERROR": 0, "WARN": 0, "INFO": 0, "DEBUG": 0},
|
||||
)
|
||||
is_alive = _is_client_alive(client.last_alive, client.is_active)
|
||||
process_status = client.process_status.value if client.process_status else None
|
||||
screen_health_status = client.screen_health_status.value if client.screen_health_status else None
|
||||
status = _determine_client_status(is_alive, process_status, screen_health_status, log_counts)
|
||||
|
||||
latest_log = (
|
||||
session.query(ClientLog)
|
||||
.filter_by(client_uuid=client.uuid)
|
||||
.order_by(desc(ClientLog.timestamp))
|
||||
.first()
|
||||
)
|
||||
latest_error = (
|
||||
session.query(ClientLog)
|
||||
.filter_by(client_uuid=client.uuid, level=LogLevel.ERROR)
|
||||
.order_by(desc(ClientLog.timestamp))
|
||||
.first()
|
||||
)
|
||||
|
||||
screenshot_ts = client.last_screenshot_analyzed or _infer_last_screenshot_ts(client.uuid)
|
||||
|
||||
clients_payload.append({
|
||||
"uuid": client.uuid,
|
||||
"hostname": client.hostname,
|
||||
"description": client.description,
|
||||
"ip": client.ip,
|
||||
"model": client.model,
|
||||
"group_id": client.group_id,
|
||||
"group_name": group_name,
|
||||
"registration_time": client.registration_time.isoformat() if client.registration_time else None,
|
||||
"last_alive": client.last_alive.isoformat() if client.last_alive else None,
|
||||
"is_alive": is_alive,
|
||||
"status": status,
|
||||
"current_event_id": client.current_event_id,
|
||||
"current_process": client.current_process,
|
||||
"process_status": process_status,
|
||||
"process_pid": client.process_pid,
|
||||
"screen_health_status": screen_health_status,
|
||||
"last_screenshot_analyzed": screenshot_ts.isoformat() if screenshot_ts else None,
|
||||
"last_screenshot_hash": client.last_screenshot_hash,
|
||||
"screenshot_url": f"/screenshots/{client.uuid}",
|
||||
"log_counts_24h": {
|
||||
"error": log_counts["ERROR"],
|
||||
"warn": log_counts["WARN"],
|
||||
"info": log_counts["INFO"],
|
||||
"debug": log_counts["DEBUG"],
|
||||
},
|
||||
"latest_log": _serialize_log_entry(latest_log),
|
||||
"latest_error": _serialize_log_entry(latest_error),
|
||||
})
|
||||
|
||||
summary_counts["total_clients"] += 1
|
||||
summary_counts["error_logs"] += log_counts["ERROR"]
|
||||
summary_counts["warn_logs"] += log_counts["WARN"]
|
||||
if is_alive:
|
||||
summary_counts["online_clients"] += 1
|
||||
else:
|
||||
summary_counts["offline_clients"] += 1
|
||||
if status == "healthy":
|
||||
summary_counts["healthy_clients"] += 1
|
||||
elif status == "warning":
|
||||
summary_counts["warning_clients"] += 1
|
||||
elif status == "critical":
|
||||
summary_counts["critical_clients"] += 1
|
||||
|
||||
payload = {
|
||||
"summary": summary_counts,
|
||||
"period_hours": hours,
|
||||
"grace_period_seconds": _grace_period_seconds(),
|
||||
"since": since.isoformat(),
|
||||
"timestamp": datetime.now(timezone.utc).isoformat(),
|
||||
"clients": clients_payload,
|
||||
}
|
||||
session.close()
|
||||
return jsonify(dict_to_camel_case(payload))
|
||||
|
||||
except Exception as e:
|
||||
session.close()
|
||||
return jsonify({"error": f"Server error: {str(e)}"}), 500
|
||||
|
||||
|
||||
@client_logs_bp.route("/recent-errors", methods=["GET"])
|
||||
@admin_or_higher
|
||||
def get_recent_errors():
|
||||
@@ -235,14 +436,7 @@ def get_recent_errors():
|
||||
|
||||
result = []
|
||||
for log in logs:
|
||||
entry = {
|
||||
"id": log.id,
|
||||
"client_uuid": log.client_uuid,
|
||||
"timestamp": log.timestamp.isoformat() if log.timestamp else None,
|
||||
"message": log.message,
|
||||
"context": json.loads(log.context) if log.context else {}
|
||||
}
|
||||
result.append(entry)
|
||||
result.append(_serialize_log_entry(log, include_client_uuid=True))
|
||||
|
||||
session.close()
|
||||
return jsonify({
|
||||
|
||||
Reference in New Issue
Block a user