add superadmin monitoring dashboard with protected route, menu entry, and monitoring data client add monitoring overview API endpoint and improve log serialization/aggregation for dashboard use extend listener health/log handling with robust status/event/timestamp normalization and screenshot payload extraction improve screenshot persistence and retrieval (timestamp-aware uploads, latest screenshot endpoint fallback) fix page_progress and auto_progress persistence/serialization across create, update, and detached occurrence flows align technical and project docs to reflect implemented monitoring and no-version-bump backend changes add documentation sync log entry and include minor compose env indentation cleanup
450 lines
15 KiB
Python
450 lines
15 KiB
Python
from flask import Blueprint, jsonify, request
|
|
from server.database import Session
|
|
from server.permissions import admin_or_higher, superadmin_only
|
|
from models.models import ClientLog, Client, ClientGroup, LogLevel
|
|
from sqlalchemy import desc, func
|
|
from datetime import datetime, timedelta, timezone
|
|
import json
|
|
import os
|
|
import glob
|
|
|
|
from server.serializers import dict_to_camel_case
|
|
|
|
client_logs_bp = Blueprint("client_logs", __name__, url_prefix="/api/client-logs")
|
|
|
|
|
|
def _grace_period_seconds():
|
|
env = os.environ.get("ENV", "production").lower()
|
|
if env in ("development", "dev"):
|
|
return int(os.environ.get("HEARTBEAT_GRACE_PERIOD_DEV", "180"))
|
|
return int(os.environ.get("HEARTBEAT_GRACE_PERIOD_PROD", "170"))
|
|
|
|
|
|
def _to_utc(dt):
|
|
if dt is None:
|
|
return None
|
|
if dt.tzinfo is None:
|
|
return dt.replace(tzinfo=timezone.utc)
|
|
return dt.astimezone(timezone.utc)
|
|
|
|
|
|
def _is_client_alive(last_alive, is_active):
|
|
if not last_alive or not is_active:
|
|
return False
|
|
return (datetime.now(timezone.utc) - _to_utc(last_alive)) <= timedelta(seconds=_grace_period_seconds())
|
|
|
|
|
|
def _safe_context(raw_context):
|
|
if not raw_context:
|
|
return {}
|
|
try:
|
|
return json.loads(raw_context)
|
|
except (TypeError, json.JSONDecodeError):
|
|
return {"raw": raw_context}
|
|
|
|
|
|
def _serialize_log_entry(log, include_client_uuid=False):
|
|
if not log:
|
|
return None
|
|
|
|
entry = {
|
|
"id": log.id,
|
|
"timestamp": log.timestamp.isoformat() if log.timestamp else None,
|
|
"level": log.level.value if log.level else None,
|
|
"message": log.message,
|
|
"context": _safe_context(log.context),
|
|
}
|
|
if include_client_uuid:
|
|
entry["client_uuid"] = log.client_uuid
|
|
return entry
|
|
|
|
|
|
def _determine_client_status(is_alive, process_status, screen_health_status, log_counts):
|
|
if not is_alive:
|
|
return "offline"
|
|
if process_status == "crashed" or screen_health_status in ("BLACK", "FROZEN"):
|
|
return "critical"
|
|
if log_counts.get("ERROR", 0) > 0:
|
|
return "critical"
|
|
if process_status in ("starting", "stopped") or log_counts.get("WARN", 0) > 0:
|
|
return "warning"
|
|
return "healthy"
|
|
|
|
|
|
def _infer_last_screenshot_ts(client_uuid):
|
|
screenshots_dir = os.path.join(os.path.dirname(__file__), "..", "screenshots")
|
|
|
|
candidate_files = []
|
|
latest_file = os.path.join(screenshots_dir, f"{client_uuid}.jpg")
|
|
if os.path.exists(latest_file):
|
|
candidate_files.append(latest_file)
|
|
|
|
candidate_files.extend(glob.glob(os.path.join(screenshots_dir, f"{client_uuid}_*.jpg")))
|
|
if not candidate_files:
|
|
return None
|
|
|
|
try:
|
|
newest_path = max(candidate_files, key=os.path.getmtime)
|
|
return datetime.fromtimestamp(os.path.getmtime(newest_path), timezone.utc)
|
|
except Exception:
|
|
return None
|
|
|
|
|
|
@client_logs_bp.route("/test", methods=["GET"])
|
|
def test_client_logs():
|
|
"""Test endpoint to verify logging infrastructure (no auth required)"""
|
|
session = Session()
|
|
try:
|
|
# Count total logs
|
|
total_logs = session.query(func.count(ClientLog.id)).scalar()
|
|
|
|
# Count by level
|
|
error_count = session.query(func.count(ClientLog.id)).filter_by(level=LogLevel.ERROR).scalar()
|
|
warn_count = session.query(func.count(ClientLog.id)).filter_by(level=LogLevel.WARN).scalar()
|
|
info_count = session.query(func.count(ClientLog.id)).filter_by(level=LogLevel.INFO).scalar()
|
|
|
|
# Get last 5 logs
|
|
recent_logs = session.query(ClientLog).order_by(desc(ClientLog.timestamp)).limit(5).all()
|
|
|
|
recent = []
|
|
for log in recent_logs:
|
|
recent.append({
|
|
"client_uuid": log.client_uuid,
|
|
"level": log.level.value if log.level else None,
|
|
"message": log.message,
|
|
"timestamp": log.timestamp.isoformat() if log.timestamp else None
|
|
})
|
|
|
|
session.close()
|
|
return jsonify({
|
|
"status": "ok",
|
|
"infrastructure": "working",
|
|
"total_logs": total_logs,
|
|
"counts": {
|
|
"ERROR": error_count,
|
|
"WARN": warn_count,
|
|
"INFO": info_count
|
|
},
|
|
"recent_5": recent
|
|
})
|
|
except Exception as e:
|
|
session.close()
|
|
return jsonify({"status": "error", "message": str(e)}), 500
|
|
|
|
|
|
@client_logs_bp.route("/<uuid>/logs", methods=["GET"])
|
|
@admin_or_higher
|
|
def get_client_logs(uuid):
|
|
"""
|
|
Get logs for a specific client
|
|
Query params:
|
|
- level: ERROR, WARN, INFO, DEBUG (optional)
|
|
- limit: number of entries (default 50, max 500)
|
|
- since: ISO timestamp (optional)
|
|
|
|
Example: /api/client-logs/abc-123/logs?level=ERROR&limit=100
|
|
"""
|
|
session = Session()
|
|
try:
|
|
# Verify client exists
|
|
client = session.query(Client).filter_by(uuid=uuid).first()
|
|
if not client:
|
|
session.close()
|
|
return jsonify({"error": "Client not found"}), 404
|
|
|
|
# Parse query parameters
|
|
level_param = request.args.get('level')
|
|
limit = min(int(request.args.get('limit', 50)), 500)
|
|
since_param = request.args.get('since')
|
|
|
|
# Build query
|
|
query = session.query(ClientLog).filter_by(client_uuid=uuid)
|
|
|
|
# Filter by log level
|
|
if level_param:
|
|
try:
|
|
level_enum = LogLevel[level_param.upper()]
|
|
query = query.filter_by(level=level_enum)
|
|
except KeyError:
|
|
session.close()
|
|
return jsonify({"error": f"Invalid level: {level_param}. Must be ERROR, WARN, INFO, or DEBUG"}), 400
|
|
|
|
# Filter by timestamp
|
|
if since_param:
|
|
try:
|
|
# Handle both with and without 'Z' suffix
|
|
since_str = since_param.replace('Z', '+00:00')
|
|
since_dt = datetime.fromisoformat(since_str)
|
|
if since_dt.tzinfo is None:
|
|
since_dt = since_dt.replace(tzinfo=timezone.utc)
|
|
query = query.filter(ClientLog.timestamp >= since_dt)
|
|
except ValueError:
|
|
session.close()
|
|
return jsonify({"error": "Invalid timestamp format. Use ISO 8601"}), 400
|
|
|
|
# Execute query
|
|
logs = query.order_by(desc(ClientLog.timestamp)).limit(limit).all()
|
|
|
|
# Format results
|
|
result = []
|
|
for log in logs:
|
|
result.append(_serialize_log_entry(log))
|
|
|
|
session.close()
|
|
return jsonify({
|
|
"client_uuid": uuid,
|
|
"logs": result,
|
|
"count": len(result),
|
|
"limit": limit
|
|
})
|
|
|
|
except Exception as e:
|
|
session.close()
|
|
return jsonify({"error": f"Server error: {str(e)}"}), 500
|
|
|
|
|
|
@client_logs_bp.route("/summary", methods=["GET"])
|
|
@admin_or_higher
|
|
def get_logs_summary():
|
|
"""
|
|
Get summary of errors/warnings across all clients in last 24 hours
|
|
Returns count of ERROR, WARN, INFO logs per client
|
|
|
|
Example response:
|
|
{
|
|
"summary": {
|
|
"client-uuid-1": {"ERROR": 5, "WARN": 12, "INFO": 45},
|
|
"client-uuid-2": {"ERROR": 0, "WARN": 3, "INFO": 20}
|
|
},
|
|
"period_hours": 24,
|
|
"timestamp": "2026-03-09T21:00:00Z"
|
|
}
|
|
"""
|
|
session = Session()
|
|
try:
|
|
# Get hours parameter (default 24, max 168 = 1 week)
|
|
hours = min(int(request.args.get('hours', 24)), 168)
|
|
since = datetime.now(timezone.utc) - timedelta(hours=hours)
|
|
|
|
# Query log counts grouped by client and level
|
|
stats = session.query(
|
|
ClientLog.client_uuid,
|
|
ClientLog.level,
|
|
func.count(ClientLog.id).label('count')
|
|
).filter(
|
|
ClientLog.timestamp >= since
|
|
).group_by(
|
|
ClientLog.client_uuid,
|
|
ClientLog.level
|
|
).all()
|
|
|
|
# Build summary dictionary
|
|
summary = {}
|
|
for stat in stats:
|
|
uuid = stat.client_uuid
|
|
if uuid not in summary:
|
|
# Initialize all levels to 0
|
|
summary[uuid] = {
|
|
"ERROR": 0,
|
|
"WARN": 0,
|
|
"INFO": 0,
|
|
"DEBUG": 0
|
|
}
|
|
|
|
summary[uuid][stat.level.value] = stat.count
|
|
|
|
# Get client info for enrichment
|
|
clients = session.query(Client.uuid, Client.hostname, Client.description).all()
|
|
client_info = {c.uuid: {"hostname": c.hostname, "description": c.description} for c in clients}
|
|
|
|
# Enrich summary with client info
|
|
enriched_summary = {}
|
|
for uuid, counts in summary.items():
|
|
enriched_summary[uuid] = {
|
|
"counts": counts,
|
|
"info": client_info.get(uuid, {})
|
|
}
|
|
|
|
session.close()
|
|
return jsonify({
|
|
"summary": enriched_summary,
|
|
"period_hours": hours,
|
|
"since": since.isoformat(),
|
|
"timestamp": datetime.now(timezone.utc).isoformat()
|
|
})
|
|
|
|
except Exception as e:
|
|
session.close()
|
|
return jsonify({"error": f"Server error: {str(e)}"}), 500
|
|
|
|
|
|
@client_logs_bp.route("/monitoring-overview", methods=["GET"])
|
|
@superadmin_only
|
|
def get_monitoring_overview():
|
|
"""Return a dashboard-friendly monitoring overview for all clients."""
|
|
session = Session()
|
|
try:
|
|
hours = min(int(request.args.get("hours", 24)), 168)
|
|
since = datetime.now(timezone.utc) - timedelta(hours=hours)
|
|
|
|
clients = (
|
|
session.query(Client, ClientGroup.name.label("group_name"))
|
|
.outerjoin(ClientGroup, Client.group_id == ClientGroup.id)
|
|
.order_by(ClientGroup.name.asc(), Client.description.asc(), Client.hostname.asc(), Client.uuid.asc())
|
|
.all()
|
|
)
|
|
|
|
log_stats = (
|
|
session.query(
|
|
ClientLog.client_uuid,
|
|
ClientLog.level,
|
|
func.count(ClientLog.id).label("count"),
|
|
)
|
|
.filter(ClientLog.timestamp >= since)
|
|
.group_by(ClientLog.client_uuid, ClientLog.level)
|
|
.all()
|
|
)
|
|
|
|
counts_by_client = {}
|
|
for stat in log_stats:
|
|
if stat.client_uuid not in counts_by_client:
|
|
counts_by_client[stat.client_uuid] = {
|
|
"ERROR": 0,
|
|
"WARN": 0,
|
|
"INFO": 0,
|
|
"DEBUG": 0,
|
|
}
|
|
counts_by_client[stat.client_uuid][stat.level.value] = stat.count
|
|
|
|
clients_payload = []
|
|
summary_counts = {
|
|
"total_clients": 0,
|
|
"online_clients": 0,
|
|
"offline_clients": 0,
|
|
"healthy_clients": 0,
|
|
"warning_clients": 0,
|
|
"critical_clients": 0,
|
|
"error_logs": 0,
|
|
"warn_logs": 0,
|
|
}
|
|
|
|
for client, group_name in clients:
|
|
log_counts = counts_by_client.get(
|
|
client.uuid,
|
|
{"ERROR": 0, "WARN": 0, "INFO": 0, "DEBUG": 0},
|
|
)
|
|
is_alive = _is_client_alive(client.last_alive, client.is_active)
|
|
process_status = client.process_status.value if client.process_status else None
|
|
screen_health_status = client.screen_health_status.value if client.screen_health_status else None
|
|
status = _determine_client_status(is_alive, process_status, screen_health_status, log_counts)
|
|
|
|
latest_log = (
|
|
session.query(ClientLog)
|
|
.filter_by(client_uuid=client.uuid)
|
|
.order_by(desc(ClientLog.timestamp))
|
|
.first()
|
|
)
|
|
latest_error = (
|
|
session.query(ClientLog)
|
|
.filter_by(client_uuid=client.uuid, level=LogLevel.ERROR)
|
|
.order_by(desc(ClientLog.timestamp))
|
|
.first()
|
|
)
|
|
|
|
screenshot_ts = client.last_screenshot_analyzed or _infer_last_screenshot_ts(client.uuid)
|
|
|
|
clients_payload.append({
|
|
"uuid": client.uuid,
|
|
"hostname": client.hostname,
|
|
"description": client.description,
|
|
"ip": client.ip,
|
|
"model": client.model,
|
|
"group_id": client.group_id,
|
|
"group_name": group_name,
|
|
"registration_time": client.registration_time.isoformat() if client.registration_time else None,
|
|
"last_alive": client.last_alive.isoformat() if client.last_alive else None,
|
|
"is_alive": is_alive,
|
|
"status": status,
|
|
"current_event_id": client.current_event_id,
|
|
"current_process": client.current_process,
|
|
"process_status": process_status,
|
|
"process_pid": client.process_pid,
|
|
"screen_health_status": screen_health_status,
|
|
"last_screenshot_analyzed": screenshot_ts.isoformat() if screenshot_ts else None,
|
|
"last_screenshot_hash": client.last_screenshot_hash,
|
|
"screenshot_url": f"/screenshots/{client.uuid}",
|
|
"log_counts_24h": {
|
|
"error": log_counts["ERROR"],
|
|
"warn": log_counts["WARN"],
|
|
"info": log_counts["INFO"],
|
|
"debug": log_counts["DEBUG"],
|
|
},
|
|
"latest_log": _serialize_log_entry(latest_log),
|
|
"latest_error": _serialize_log_entry(latest_error),
|
|
})
|
|
|
|
summary_counts["total_clients"] += 1
|
|
summary_counts["error_logs"] += log_counts["ERROR"]
|
|
summary_counts["warn_logs"] += log_counts["WARN"]
|
|
if is_alive:
|
|
summary_counts["online_clients"] += 1
|
|
else:
|
|
summary_counts["offline_clients"] += 1
|
|
if status == "healthy":
|
|
summary_counts["healthy_clients"] += 1
|
|
elif status == "warning":
|
|
summary_counts["warning_clients"] += 1
|
|
elif status == "critical":
|
|
summary_counts["critical_clients"] += 1
|
|
|
|
payload = {
|
|
"summary": summary_counts,
|
|
"period_hours": hours,
|
|
"grace_period_seconds": _grace_period_seconds(),
|
|
"since": since.isoformat(),
|
|
"timestamp": datetime.now(timezone.utc).isoformat(),
|
|
"clients": clients_payload,
|
|
}
|
|
session.close()
|
|
return jsonify(dict_to_camel_case(payload))
|
|
|
|
except Exception as e:
|
|
session.close()
|
|
return jsonify({"error": f"Server error: {str(e)}"}), 500
|
|
|
|
|
|
@client_logs_bp.route("/recent-errors", methods=["GET"])
|
|
@admin_or_higher
|
|
def get_recent_errors():
|
|
"""
|
|
Get recent ERROR logs across all clients
|
|
Query params:
|
|
- limit: number of entries (default 20, max 100)
|
|
|
|
Useful for system-wide error monitoring
|
|
"""
|
|
session = Session()
|
|
try:
|
|
limit = min(int(request.args.get('limit', 20)), 100)
|
|
|
|
# Get recent errors from all clients
|
|
logs = session.query(ClientLog).filter_by(
|
|
level=LogLevel.ERROR
|
|
).order_by(
|
|
desc(ClientLog.timestamp)
|
|
).limit(limit).all()
|
|
|
|
result = []
|
|
for log in logs:
|
|
result.append(_serialize_log_entry(log, include_client_uuid=True))
|
|
|
|
session.close()
|
|
return jsonify({
|
|
"errors": result,
|
|
"count": len(result)
|
|
})
|
|
|
|
except Exception as e:
|
|
session.close()
|
|
return jsonify({"error": f"Server error: {str(e)}"}), 500
|