feat(monitoring): complete monitoring pipeline and fix presentation flag persistence

add superadmin monitoring dashboard with protected route, menu entry, and monitoring data client
add monitoring overview API endpoint and improve log serialization/aggregation for dashboard use
extend listener health/log handling with robust status/event/timestamp normalization and screenshot payload extraction
improve screenshot persistence and retrieval (timestamp-aware uploads, latest screenshot endpoint fallback)
fix page_progress and auto_progress persistence/serialization across create, update, and detached occurrence flows
align technical and project docs to reflect implemented monitoring and no-version-bump backend changes
add documentation sync log entry and include minor compose env indentation cleanup
This commit is contained in:
2026-03-24 11:18:33 +00:00
parent 3107d0f671
commit 9c330f984f
18 changed files with 2095 additions and 104 deletions

View File

@@ -1,14 +1,95 @@
from flask import Blueprint, jsonify, request
from server.database import Session
from server.permissions import admin_or_higher
from models.models import ClientLog, Client, LogLevel
from server.permissions import admin_or_higher, superadmin_only
from models.models import ClientLog, Client, ClientGroup, LogLevel
from sqlalchemy import desc, func
from datetime import datetime, timedelta, timezone
import json
import os
import glob
from server.serializers import dict_to_camel_case
client_logs_bp = Blueprint("client_logs", __name__, url_prefix="/api/client-logs")
def _grace_period_seconds():
env = os.environ.get("ENV", "production").lower()
if env in ("development", "dev"):
return int(os.environ.get("HEARTBEAT_GRACE_PERIOD_DEV", "180"))
return int(os.environ.get("HEARTBEAT_GRACE_PERIOD_PROD", "170"))
def _to_utc(dt):
if dt is None:
return None
if dt.tzinfo is None:
return dt.replace(tzinfo=timezone.utc)
return dt.astimezone(timezone.utc)
def _is_client_alive(last_alive, is_active):
if not last_alive or not is_active:
return False
return (datetime.now(timezone.utc) - _to_utc(last_alive)) <= timedelta(seconds=_grace_period_seconds())
def _safe_context(raw_context):
if not raw_context:
return {}
try:
return json.loads(raw_context)
except (TypeError, json.JSONDecodeError):
return {"raw": raw_context}
def _serialize_log_entry(log, include_client_uuid=False):
if not log:
return None
entry = {
"id": log.id,
"timestamp": log.timestamp.isoformat() if log.timestamp else None,
"level": log.level.value if log.level else None,
"message": log.message,
"context": _safe_context(log.context),
}
if include_client_uuid:
entry["client_uuid"] = log.client_uuid
return entry
def _determine_client_status(is_alive, process_status, screen_health_status, log_counts):
if not is_alive:
return "offline"
if process_status == "crashed" or screen_health_status in ("BLACK", "FROZEN"):
return "critical"
if log_counts.get("ERROR", 0) > 0:
return "critical"
if process_status in ("starting", "stopped") or log_counts.get("WARN", 0) > 0:
return "warning"
return "healthy"
def _infer_last_screenshot_ts(client_uuid):
screenshots_dir = os.path.join(os.path.dirname(__file__), "..", "screenshots")
candidate_files = []
latest_file = os.path.join(screenshots_dir, f"{client_uuid}.jpg")
if os.path.exists(latest_file):
candidate_files.append(latest_file)
candidate_files.extend(glob.glob(os.path.join(screenshots_dir, f"{client_uuid}_*.jpg")))
if not candidate_files:
return None
try:
newest_path = max(candidate_files, key=os.path.getmtime)
return datetime.fromtimestamp(os.path.getmtime(newest_path), timezone.utc)
except Exception:
return None
@client_logs_bp.route("/test", methods=["GET"])
def test_client_logs():
"""Test endpoint to verify logging infrastructure (no auth required)"""
@@ -107,22 +188,7 @@ def get_client_logs(uuid):
# Format results
result = []
for log in logs:
entry = {
"id": log.id,
"timestamp": log.timestamp.isoformat() if log.timestamp else None,
"level": log.level.value if log.level else None,
"message": log.message,
"context": {}
}
# Parse context JSON
if log.context:
try:
entry["context"] = json.loads(log.context)
except json.JSONDecodeError:
entry["context"] = {"raw": log.context}
result.append(entry)
result.append(_serialize_log_entry(log))
session.close()
return jsonify({
@@ -212,6 +278,141 @@ def get_logs_summary():
return jsonify({"error": f"Server error: {str(e)}"}), 500
@client_logs_bp.route("/monitoring-overview", methods=["GET"])
@superadmin_only
def get_monitoring_overview():
"""Return a dashboard-friendly monitoring overview for all clients."""
session = Session()
try:
hours = min(int(request.args.get("hours", 24)), 168)
since = datetime.now(timezone.utc) - timedelta(hours=hours)
clients = (
session.query(Client, ClientGroup.name.label("group_name"))
.outerjoin(ClientGroup, Client.group_id == ClientGroup.id)
.order_by(ClientGroup.name.asc(), Client.description.asc(), Client.hostname.asc(), Client.uuid.asc())
.all()
)
log_stats = (
session.query(
ClientLog.client_uuid,
ClientLog.level,
func.count(ClientLog.id).label("count"),
)
.filter(ClientLog.timestamp >= since)
.group_by(ClientLog.client_uuid, ClientLog.level)
.all()
)
counts_by_client = {}
for stat in log_stats:
if stat.client_uuid not in counts_by_client:
counts_by_client[stat.client_uuid] = {
"ERROR": 0,
"WARN": 0,
"INFO": 0,
"DEBUG": 0,
}
counts_by_client[stat.client_uuid][stat.level.value] = stat.count
clients_payload = []
summary_counts = {
"total_clients": 0,
"online_clients": 0,
"offline_clients": 0,
"healthy_clients": 0,
"warning_clients": 0,
"critical_clients": 0,
"error_logs": 0,
"warn_logs": 0,
}
for client, group_name in clients:
log_counts = counts_by_client.get(
client.uuid,
{"ERROR": 0, "WARN": 0, "INFO": 0, "DEBUG": 0},
)
is_alive = _is_client_alive(client.last_alive, client.is_active)
process_status = client.process_status.value if client.process_status else None
screen_health_status = client.screen_health_status.value if client.screen_health_status else None
status = _determine_client_status(is_alive, process_status, screen_health_status, log_counts)
latest_log = (
session.query(ClientLog)
.filter_by(client_uuid=client.uuid)
.order_by(desc(ClientLog.timestamp))
.first()
)
latest_error = (
session.query(ClientLog)
.filter_by(client_uuid=client.uuid, level=LogLevel.ERROR)
.order_by(desc(ClientLog.timestamp))
.first()
)
screenshot_ts = client.last_screenshot_analyzed or _infer_last_screenshot_ts(client.uuid)
clients_payload.append({
"uuid": client.uuid,
"hostname": client.hostname,
"description": client.description,
"ip": client.ip,
"model": client.model,
"group_id": client.group_id,
"group_name": group_name,
"registration_time": client.registration_time.isoformat() if client.registration_time else None,
"last_alive": client.last_alive.isoformat() if client.last_alive else None,
"is_alive": is_alive,
"status": status,
"current_event_id": client.current_event_id,
"current_process": client.current_process,
"process_status": process_status,
"process_pid": client.process_pid,
"screen_health_status": screen_health_status,
"last_screenshot_analyzed": screenshot_ts.isoformat() if screenshot_ts else None,
"last_screenshot_hash": client.last_screenshot_hash,
"screenshot_url": f"/screenshots/{client.uuid}",
"log_counts_24h": {
"error": log_counts["ERROR"],
"warn": log_counts["WARN"],
"info": log_counts["INFO"],
"debug": log_counts["DEBUG"],
},
"latest_log": _serialize_log_entry(latest_log),
"latest_error": _serialize_log_entry(latest_error),
})
summary_counts["total_clients"] += 1
summary_counts["error_logs"] += log_counts["ERROR"]
summary_counts["warn_logs"] += log_counts["WARN"]
if is_alive:
summary_counts["online_clients"] += 1
else:
summary_counts["offline_clients"] += 1
if status == "healthy":
summary_counts["healthy_clients"] += 1
elif status == "warning":
summary_counts["warning_clients"] += 1
elif status == "critical":
summary_counts["critical_clients"] += 1
payload = {
"summary": summary_counts,
"period_hours": hours,
"grace_period_seconds": _grace_period_seconds(),
"since": since.isoformat(),
"timestamp": datetime.now(timezone.utc).isoformat(),
"clients": clients_payload,
}
session.close()
return jsonify(dict_to_camel_case(payload))
except Exception as e:
session.close()
return jsonify({"error": f"Server error: {str(e)}"}), 500
@client_logs_bp.route("/recent-errors", methods=["GET"])
@admin_or_higher
def get_recent_errors():
@@ -235,14 +436,7 @@ def get_recent_errors():
result = []
for log in logs:
entry = {
"id": log.id,
"client_uuid": log.client_uuid,
"timestamp": log.timestamp.isoformat() if log.timestamp else None,
"message": log.message,
"context": json.loads(log.context) if log.context else {}
}
result.append(entry)
result.append(_serialize_log_entry(log, include_client_uuid=True))
session.close()
return jsonify({