from flask import Blueprint, jsonify, request from server.database import Session from server.permissions import admin_or_higher, superadmin_only from models.models import ClientLog, Client, ClientGroup, LogLevel from sqlalchemy import desc, func from datetime import datetime, timedelta, timezone import json import os import glob from server.serializers import dict_to_camel_case client_logs_bp = Blueprint("client_logs", __name__, url_prefix="/api/client-logs") PRIORITY_SCREENSHOT_TTL_SECONDS = int(os.environ.get("PRIORITY_SCREENSHOT_TTL_SECONDS", "120")) def _grace_period_seconds(): env = os.environ.get("ENV", "production").lower() if env in ("development", "dev"): return int(os.environ.get("HEARTBEAT_GRACE_PERIOD_DEV", "180")) return int(os.environ.get("HEARTBEAT_GRACE_PERIOD_PROD", "170")) def _to_utc(dt): if dt is None: return None if dt.tzinfo is None: return dt.replace(tzinfo=timezone.utc) return dt.astimezone(timezone.utc) def _is_client_alive(last_alive, is_active): if not last_alive or not is_active: return False return (datetime.now(timezone.utc) - _to_utc(last_alive)) <= timedelta(seconds=_grace_period_seconds()) def _safe_context(raw_context): if not raw_context: return {} try: return json.loads(raw_context) except (TypeError, json.JSONDecodeError): return {"raw": raw_context} def _serialize_log_entry(log, include_client_uuid=False): if not log: return None entry = { "id": log.id, "timestamp": log.timestamp.isoformat() if log.timestamp else None, "level": log.level.value if log.level else None, "message": log.message, "context": _safe_context(log.context), } if include_client_uuid: entry["client_uuid"] = log.client_uuid return entry def _determine_client_status(is_alive, process_status, screen_health_status, log_counts): if not is_alive: return "offline" if process_status == "crashed" or screen_health_status in ("BLACK", "FROZEN"): return "critical" if log_counts.get("ERROR", 0) > 0: return "critical" if process_status in ("starting", "stopped") or log_counts.get("WARN", 0) > 0: return "warning" return "healthy" def _infer_last_screenshot_ts(client_uuid): screenshots_dir = os.path.join(os.path.dirname(__file__), "..", "screenshots") candidate_files = [] latest_file = os.path.join(screenshots_dir, f"{client_uuid}.jpg") if os.path.exists(latest_file): candidate_files.append(latest_file) candidate_files.extend(glob.glob(os.path.join(screenshots_dir, f"{client_uuid}_*.jpg"))) if not candidate_files: return None try: newest_path = max(candidate_files, key=os.path.getmtime) return datetime.fromtimestamp(os.path.getmtime(newest_path), timezone.utc) except Exception: return None def _load_screenshot_metadata(client_uuid): screenshots_dir = os.path.join(os.path.dirname(__file__), "..", "screenshots") metadata_path = os.path.join(screenshots_dir, f"{client_uuid}_meta.json") if not os.path.exists(metadata_path): return {} try: with open(metadata_path, "r", encoding="utf-8") as metadata_file: data = json.load(metadata_file) return data if isinstance(data, dict) else {} except Exception: return {} def _is_priority_screenshot_active(priority_received_at): if not priority_received_at: return False try: normalized = str(priority_received_at).replace("Z", "+00:00") parsed = datetime.fromisoformat(normalized) parsed_utc = _to_utc(parsed) except Exception: return False return (datetime.now(timezone.utc) - parsed_utc) <= timedelta(seconds=PRIORITY_SCREENSHOT_TTL_SECONDS) @client_logs_bp.route("/test", methods=["GET"]) def test_client_logs(): """Test endpoint to verify logging infrastructure (no auth required)""" session = Session() try: # Count total logs total_logs = session.query(func.count(ClientLog.id)).scalar() # Count by level error_count = session.query(func.count(ClientLog.id)).filter_by(level=LogLevel.ERROR).scalar() warn_count = session.query(func.count(ClientLog.id)).filter_by(level=LogLevel.WARN).scalar() info_count = session.query(func.count(ClientLog.id)).filter_by(level=LogLevel.INFO).scalar() # Get last 5 logs recent_logs = session.query(ClientLog).order_by(desc(ClientLog.timestamp)).limit(5).all() recent = [] for log in recent_logs: recent.append({ "client_uuid": log.client_uuid, "level": log.level.value if log.level else None, "message": log.message, "timestamp": log.timestamp.isoformat() if log.timestamp else None }) session.close() return jsonify({ "status": "ok", "infrastructure": "working", "total_logs": total_logs, "counts": { "ERROR": error_count, "WARN": warn_count, "INFO": info_count }, "recent_5": recent }) except Exception as e: session.close() return jsonify({"status": "error", "message": str(e)}), 500 @client_logs_bp.route("//logs", methods=["GET"]) @admin_or_higher def get_client_logs(uuid): """ Get logs for a specific client Query params: - level: ERROR, WARN, INFO, DEBUG (optional) - limit: number of entries (default 50, max 500) - since: ISO timestamp (optional) Example: /api/client-logs/abc-123/logs?level=ERROR&limit=100 """ session = Session() try: # Verify client exists client = session.query(Client).filter_by(uuid=uuid).first() if not client: session.close() return jsonify({"error": "Client not found"}), 404 # Parse query parameters level_param = request.args.get('level') limit = min(int(request.args.get('limit', 50)), 500) since_param = request.args.get('since') # Build query query = session.query(ClientLog).filter_by(client_uuid=uuid) # Filter by log level if level_param: try: level_enum = LogLevel[level_param.upper()] query = query.filter_by(level=level_enum) except KeyError: session.close() return jsonify({"error": f"Invalid level: {level_param}. Must be ERROR, WARN, INFO, or DEBUG"}), 400 # Filter by timestamp if since_param: try: # Handle both with and without 'Z' suffix since_str = since_param.replace('Z', '+00:00') since_dt = datetime.fromisoformat(since_str) if since_dt.tzinfo is None: since_dt = since_dt.replace(tzinfo=timezone.utc) query = query.filter(ClientLog.timestamp >= since_dt) except ValueError: session.close() return jsonify({"error": "Invalid timestamp format. Use ISO 8601"}), 400 # Execute query logs = query.order_by(desc(ClientLog.timestamp)).limit(limit).all() # Format results result = [] for log in logs: result.append(_serialize_log_entry(log)) session.close() return jsonify({ "client_uuid": uuid, "logs": result, "count": len(result), "limit": limit }) except Exception as e: session.close() return jsonify({"error": f"Server error: {str(e)}"}), 500 @client_logs_bp.route("/summary", methods=["GET"]) @admin_or_higher def get_logs_summary(): """ Get summary of errors/warnings across all clients in last 24 hours Returns count of ERROR, WARN, INFO logs per client Example response: { "summary": { "client-uuid-1": {"ERROR": 5, "WARN": 12, "INFO": 45}, "client-uuid-2": {"ERROR": 0, "WARN": 3, "INFO": 20} }, "period_hours": 24, "timestamp": "2026-03-09T21:00:00Z" } """ session = Session() try: # Get hours parameter (default 24, max 168 = 1 week) hours = min(int(request.args.get('hours', 24)), 168) since = datetime.now(timezone.utc) - timedelta(hours=hours) # Query log counts grouped by client and level stats = session.query( ClientLog.client_uuid, ClientLog.level, func.count(ClientLog.id).label('count') ).filter( ClientLog.timestamp >= since ).group_by( ClientLog.client_uuid, ClientLog.level ).all() # Build summary dictionary summary = {} for stat in stats: uuid = stat.client_uuid if uuid not in summary: # Initialize all levels to 0 summary[uuid] = { "ERROR": 0, "WARN": 0, "INFO": 0, "DEBUG": 0 } summary[uuid][stat.level.value] = stat.count # Get client info for enrichment clients = session.query(Client.uuid, Client.hostname, Client.description).all() client_info = {c.uuid: {"hostname": c.hostname, "description": c.description} for c in clients} # Enrich summary with client info enriched_summary = {} for uuid, counts in summary.items(): enriched_summary[uuid] = { "counts": counts, "info": client_info.get(uuid, {}) } session.close() return jsonify({ "summary": enriched_summary, "period_hours": hours, "since": since.isoformat(), "timestamp": datetime.now(timezone.utc).isoformat() }) except Exception as e: session.close() return jsonify({"error": f"Server error: {str(e)}"}), 500 @client_logs_bp.route("/monitoring-overview", methods=["GET"]) @superadmin_only def get_monitoring_overview(): """Return a dashboard-friendly monitoring overview for all clients.""" session = Session() try: hours = min(int(request.args.get("hours", 24)), 168) since = datetime.now(timezone.utc) - timedelta(hours=hours) clients = ( session.query(Client, ClientGroup.name.label("group_name")) .outerjoin(ClientGroup, Client.group_id == ClientGroup.id) .order_by(ClientGroup.name.asc(), Client.description.asc(), Client.hostname.asc(), Client.uuid.asc()) .all() ) log_stats = ( session.query( ClientLog.client_uuid, ClientLog.level, func.count(ClientLog.id).label("count"), ) .filter(ClientLog.timestamp >= since) .group_by(ClientLog.client_uuid, ClientLog.level) .all() ) counts_by_client = {} for stat in log_stats: if stat.client_uuid not in counts_by_client: counts_by_client[stat.client_uuid] = { "ERROR": 0, "WARN": 0, "INFO": 0, "DEBUG": 0, } counts_by_client[stat.client_uuid][stat.level.value] = stat.count clients_payload = [] summary_counts = { "total_clients": 0, "online_clients": 0, "offline_clients": 0, "healthy_clients": 0, "warning_clients": 0, "critical_clients": 0, "error_logs": 0, "warn_logs": 0, "active_priority_screenshots": 0, } for client, group_name in clients: log_counts = counts_by_client.get( client.uuid, {"ERROR": 0, "WARN": 0, "INFO": 0, "DEBUG": 0}, ) is_alive = _is_client_alive(client.last_alive, client.is_active) process_status = client.process_status.value if client.process_status else None screen_health_status = client.screen_health_status.value if client.screen_health_status else None status = _determine_client_status(is_alive, process_status, screen_health_status, log_counts) latest_log = ( session.query(ClientLog) .filter_by(client_uuid=client.uuid) .order_by(desc(ClientLog.timestamp)) .first() ) latest_error = ( session.query(ClientLog) .filter_by(client_uuid=client.uuid, level=LogLevel.ERROR) .order_by(desc(ClientLog.timestamp)) .first() ) screenshot_ts = client.last_screenshot_analyzed or _infer_last_screenshot_ts(client.uuid) screenshot_meta = _load_screenshot_metadata(client.uuid) latest_screenshot_type = screenshot_meta.get("latest_screenshot_type") or "periodic" priority_screenshot_type = screenshot_meta.get("last_priority_screenshot_type") priority_screenshot_received_at = screenshot_meta.get("last_priority_received_at") has_active_priority = _is_priority_screenshot_active(priority_screenshot_received_at) screenshot_url = f"/screenshots/{client.uuid}/priority" if has_active_priority else f"/screenshots/{client.uuid}" clients_payload.append({ "uuid": client.uuid, "hostname": client.hostname, "description": client.description, "ip": client.ip, "model": client.model, "group_id": client.group_id, "group_name": group_name, "registration_time": client.registration_time.isoformat() if client.registration_time else None, "last_alive": client.last_alive.isoformat() if client.last_alive else None, "is_alive": is_alive, "status": status, "current_event_id": client.current_event_id, "current_process": client.current_process, "process_status": process_status, "process_pid": client.process_pid, "screen_health_status": screen_health_status, "last_screenshot_analyzed": screenshot_ts.isoformat() if screenshot_ts else None, "last_screenshot_hash": client.last_screenshot_hash, "latest_screenshot_type": latest_screenshot_type, "priority_screenshot_type": priority_screenshot_type, "priority_screenshot_received_at": priority_screenshot_received_at, "has_active_priority_screenshot": has_active_priority, "screenshot_url": screenshot_url, "log_counts_24h": { "error": log_counts["ERROR"], "warn": log_counts["WARN"], "info": log_counts["INFO"], "debug": log_counts["DEBUG"], }, "latest_log": _serialize_log_entry(latest_log), "latest_error": _serialize_log_entry(latest_error), }) summary_counts["total_clients"] += 1 summary_counts["error_logs"] += log_counts["ERROR"] summary_counts["warn_logs"] += log_counts["WARN"] if has_active_priority: summary_counts["active_priority_screenshots"] += 1 if is_alive: summary_counts["online_clients"] += 1 else: summary_counts["offline_clients"] += 1 if status == "healthy": summary_counts["healthy_clients"] += 1 elif status == "warning": summary_counts["warning_clients"] += 1 elif status == "critical": summary_counts["critical_clients"] += 1 payload = { "summary": summary_counts, "period_hours": hours, "grace_period_seconds": _grace_period_seconds(), "since": since.isoformat(), "timestamp": datetime.now(timezone.utc).isoformat(), "clients": clients_payload, } session.close() return jsonify(dict_to_camel_case(payload)) except Exception as e: session.close() return jsonify({"error": f"Server error: {str(e)}"}), 500 @client_logs_bp.route("/recent-errors", methods=["GET"]) @admin_or_higher def get_recent_errors(): """ Get recent ERROR logs across all clients Query params: - limit: number of entries (default 20, max 100) Useful for system-wide error monitoring """ session = Session() try: limit = min(int(request.args.get('limit', 20)), 100) # Get recent errors from all clients logs = session.query(ClientLog).filter_by( level=LogLevel.ERROR ).order_by( desc(ClientLog.timestamp) ).limit(limit).all() result = [] for log in logs: result.append(_serialize_log_entry(log, include_client_uuid=True)) session.close() return jsonify({ "errors": result, "count": len(result) }) except Exception as e: session.close() return jsonify({"error": f"Server error: {str(e)}"}), 500