infoscreen/server/routes/client_logs.py

from flask import Blueprint, jsonify, request
from server.database import Session
from server.permissions import admin_or_higher, superadmin_only
from models.models import ClientLog, Client, ClientGroup, LogLevel
from sqlalchemy import desc, func
from datetime import datetime, timedelta, timezone
import json
import os
import glob

from server.serializers import dict_to_camel_case

client_logs_bp = Blueprint("client_logs", __name__, url_prefix="/api/client-logs")
PRIORITY_SCREENSHOT_TTL_SECONDS = int(os.environ.get("PRIORITY_SCREENSHOT_TTL_SECONDS", "120"))


def _grace_period_seconds():
    env = os.environ.get("ENV", "production").lower()
    if env in ("development", "dev"):
        return int(os.environ.get("HEARTBEAT_GRACE_PERIOD_DEV", "180"))
    return int(os.environ.get("HEARTBEAT_GRACE_PERIOD_PROD", "170"))


def _to_utc(dt):
    if dt is None:
        return None
    if dt.tzinfo is None:
        return dt.replace(tzinfo=timezone.utc)
    return dt.astimezone(timezone.utc)


def _is_client_alive(last_alive, is_active):
    if not last_alive or not is_active:
        return False
    return (datetime.now(timezone.utc) - _to_utc(last_alive)) <= timedelta(seconds=_grace_period_seconds())


def _safe_context(raw_context):
    if not raw_context:
        return {}
    try:
        return json.loads(raw_context)
    except (TypeError, json.JSONDecodeError):
        return {"raw": raw_context}


def _serialize_log_entry(log, include_client_uuid=False):
    if not log:
        return None

    entry = {
        "id": log.id,
        "timestamp": log.timestamp.isoformat() if log.timestamp else None,
        "level": log.level.value if log.level else None,
        "message": log.message,
        "context": _safe_context(log.context),
    }
    if include_client_uuid:
        entry["client_uuid"] = log.client_uuid
    return entry


def _determine_client_status(is_alive, process_status, screen_health_status, log_counts):
    if not is_alive:
        return "offline"
    if process_status == "crashed" or screen_health_status in ("BLACK", "FROZEN"):
        return "critical"
    if log_counts.get("ERROR", 0) > 0:
        return "critical"
    if process_status in ("starting", "stopped") or log_counts.get("WARN", 0) > 0:
        return "warning"
    return "healthy"


def _infer_last_screenshot_ts(client_uuid):
    screenshots_dir = os.path.join(os.path.dirname(__file__), "..", "screenshots")

    candidate_files = []
    latest_file = os.path.join(screenshots_dir, f"{client_uuid}.jpg")
    if os.path.exists(latest_file):
        candidate_files.append(latest_file)

    candidate_files.extend(glob.glob(os.path.join(screenshots_dir, f"{client_uuid}_*.jpg")))
    if not candidate_files:
        return None

    try:
        newest_path = max(candidate_files, key=os.path.getmtime)
        return datetime.fromtimestamp(os.path.getmtime(newest_path), timezone.utc)
    except Exception:
        return None


def _load_screenshot_metadata(client_uuid):
    screenshots_dir = os.path.join(os.path.dirname(__file__), "..", "screenshots")
    metadata_path = os.path.join(screenshots_dir, f"{client_uuid}_meta.json")
    if not os.path.exists(metadata_path):
        return {}

    try:
        with open(metadata_path, "r", encoding="utf-8") as metadata_file:
            data = json.load(metadata_file)
        return data if isinstance(data, dict) else {}
    except Exception:
        return {}


def _is_priority_screenshot_active(priority_received_at):
    if not priority_received_at:
        return False

    try:
        normalized = str(priority_received_at).replace("Z", "+00:00")
        parsed = datetime.fromisoformat(normalized)
        parsed_utc = _to_utc(parsed)
    except Exception:
        return False

    return (datetime.now(timezone.utc) - parsed_utc) <= timedelta(seconds=PRIORITY_SCREENSHOT_TTL_SECONDS)


@client_logs_bp.route("/test", methods=["GET"])
def test_client_logs():
    """Test endpoint to verify logging infrastructure (no auth required)"""
    session = Session()
    try:
        # Count total logs
        total_logs = session.query(func.count(ClientLog.id)).scalar()

        # Count by level
        error_count = session.query(func.count(ClientLog.id)).filter_by(level=LogLevel.ERROR).scalar()
        warn_count = session.query(func.count(ClientLog.id)).filter_by(level=LogLevel.WARN).scalar()
        info_count = session.query(func.count(ClientLog.id)).filter_by(level=LogLevel.INFO).scalar()

        # Get last 5 logs
        recent_logs = session.query(ClientLog).order_by(desc(ClientLog.timestamp)).limit(5).all()

        recent = []
        for log in recent_logs:
            recent.append({
                "client_uuid": log.client_uuid,
                "level": log.level.value if log.level else None,
                "message": log.message,
                "timestamp": log.timestamp.isoformat() if log.timestamp else None
            })

        session.close()
        return jsonify({
            "status": "ok",
            "infrastructure": "working",
            "total_logs": total_logs,
            "counts": {
                "ERROR": error_count,
                "WARN": warn_count,
                "INFO": info_count
            },
            "recent_5": recent
        })
    except Exception as e:
        session.close()
        return jsonify({"status": "error", "message": str(e)}), 500


@client_logs_bp.route("/<uuid>/logs", methods=["GET"])
@admin_or_higher
def get_client_logs(uuid):
    """
    Get logs for a specific client
    Query params:
      - level: ERROR, WARN, INFO, DEBUG (optional)
      - limit: number of entries (default 50, max 500)
      - since: ISO timestamp (optional)

    Example: /api/client-logs/abc-123/logs?level=ERROR&limit=100
    """
    session = Session()
    try:
        # Verify client exists
        client = session.query(Client).filter_by(uuid=uuid).first()
        if not client:
            session.close()
            return jsonify({"error": "Client not found"}), 404

        # Parse query parameters
        level_param = request.args.get('level')
        limit = min(int(request.args.get('limit', 50)), 500)
        since_param = request.args.get('since')

        # Build query
        query = session.query(ClientLog).filter_by(client_uuid=uuid)

        # Filter by log level
        if level_param:
            try:
                level_enum = LogLevel[level_param.upper()]
                query = query.filter_by(level=level_enum)
            except KeyError:
                session.close()
                return jsonify({"error": f"Invalid level: {level_param}. Must be ERROR, WARN, INFO, or DEBUG"}), 400

        # Filter by timestamp
        if since_param:
            try:
                # Handle both with and without 'Z' suffix
                since_str = since_param.replace('Z', '+00:00')
                since_dt = datetime.fromisoformat(since_str)
                if since_dt.tzinfo is None:
                    since_dt = since_dt.replace(tzinfo=timezone.utc)
                query = query.filter(ClientLog.timestamp >= since_dt)
            except ValueError:
                session.close()
                return jsonify({"error": "Invalid timestamp format. Use ISO 8601"}), 400

        # Execute query
        logs = query.order_by(desc(ClientLog.timestamp)).limit(limit).all()

        # Format results
        result = []
        for log in logs:
            result.append(_serialize_log_entry(log))

        session.close()
        return jsonify({
            "client_uuid": uuid,
            "logs": result,
            "count": len(result),
            "limit": limit
        })

    except Exception as e:
        session.close()
        return jsonify({"error": f"Server error: {str(e)}"}), 500


@client_logs_bp.route("/summary", methods=["GET"])
@admin_or_higher
def get_logs_summary():
    """
    Get summary of errors/warnings across all clients in last 24 hours
    Returns count of ERROR, WARN, INFO logs per client

    Example response:
    {
      "summary": {
        "client-uuid-1": {"ERROR": 5, "WARN": 12, "INFO": 45},
        "client-uuid-2": {"ERROR": 0, "WARN": 3, "INFO": 20}
      },
      "period_hours": 24,
      "timestamp": "2026-03-09T21:00:00Z"
    }
    """
    session = Session()
    try:
        # Get hours parameter (default 24, max 168 = 1 week)
        hours = min(int(request.args.get('hours', 24)), 168)
        since = datetime.now(timezone.utc) - timedelta(hours=hours)

        # Query log counts grouped by client and level
        stats = session.query(
            ClientLog.client_uuid,
            ClientLog.level,
            func.count(ClientLog.id).label('count')
        ).filter(
            ClientLog.timestamp >= since
        ).group_by(
            ClientLog.client_uuid,
            ClientLog.level
        ).all()

        # Build summary dictionary
        summary = {}
        for stat in stats:
            uuid = stat.client_uuid
            if uuid not in summary:
                # Initialize all levels to 0
                summary[uuid] = {
                    "ERROR": 0,
                    "WARN": 0,
                    "INFO": 0,
                    "DEBUG": 0
                }

            summary[uuid][stat.level.value] = stat.count

        # Get client info for enrichment
        clients = session.query(Client.uuid, Client.hostname, Client.description).all()
        client_info = {c.uuid: {"hostname": c.hostname, "description": c.description} for c in clients}

        # Enrich summary with client info
        enriched_summary = {}
        for uuid, counts in summary.items():
            enriched_summary[uuid] = {
                "counts": counts,
                "info": client_info.get(uuid, {})
            }

        session.close()
        return jsonify({
            "summary": enriched_summary,
            "period_hours": hours,
            "since": since.isoformat(),
            "timestamp": datetime.now(timezone.utc).isoformat()
        })

    except Exception as e:
        session.close()
        return jsonify({"error": f"Server error: {str(e)}"}), 500


@client_logs_bp.route("/monitoring-overview", methods=["GET"])
@superadmin_only
def get_monitoring_overview():
    """Return a dashboard-friendly monitoring overview for all clients."""
    session = Session()
    try:
        hours = min(int(request.args.get("hours", 24)), 168)
        since = datetime.now(timezone.utc) - timedelta(hours=hours)

        clients = (
            session.query(Client, ClientGroup.name.label("group_name"))
            .outerjoin(ClientGroup, Client.group_id == ClientGroup.id)
            .order_by(ClientGroup.name.asc(), Client.description.asc(), Client.hostname.asc(), Client.uuid.asc())
            .all()
        )

        log_stats = (
            session.query(
                ClientLog.client_uuid,
                ClientLog.level,
                func.count(ClientLog.id).label("count"),
            )
            .filter(ClientLog.timestamp >= since)
            .group_by(ClientLog.client_uuid, ClientLog.level)
            .all()
        )

        counts_by_client = {}
        for stat in log_stats:
            if stat.client_uuid not in counts_by_client:
                counts_by_client[stat.client_uuid] = {
                    "ERROR": 0,
                    "WARN": 0,
                    "INFO": 0,
                    "DEBUG": 0,
                }
            counts_by_client[stat.client_uuid][stat.level.value] = stat.count

        clients_payload = []
        summary_counts = {
            "total_clients": 0,
            "online_clients": 0,
            "offline_clients": 0,
            "healthy_clients": 0,
            "warning_clients": 0,
            "critical_clients": 0,
            "error_logs": 0,
            "warn_logs": 0,
            "active_priority_screenshots": 0,
        }

        for client, group_name in clients:
            log_counts = counts_by_client.get(
                client.uuid,
                {"ERROR": 0, "WARN": 0, "INFO": 0, "DEBUG": 0},
            )
            is_alive = _is_client_alive(client.last_alive, client.is_active)
            process_status = client.process_status.value if client.process_status else None
            screen_health_status = client.screen_health_status.value if client.screen_health_status else None
            status = _determine_client_status(is_alive, process_status, screen_health_status, log_counts)

            latest_log = (
                session.query(ClientLog)
                .filter_by(client_uuid=client.uuid)
                .order_by(desc(ClientLog.timestamp))
                .first()
            )
            latest_error = (
                session.query(ClientLog)
                .filter_by(client_uuid=client.uuid, level=LogLevel.ERROR)
                .order_by(desc(ClientLog.timestamp))
                .first()
            )

            screenshot_ts = client.last_screenshot_analyzed or _infer_last_screenshot_ts(client.uuid)
            screenshot_meta = _load_screenshot_metadata(client.uuid)
            latest_screenshot_type = screenshot_meta.get("latest_screenshot_type") or "periodic"
            priority_screenshot_type = screenshot_meta.get("last_priority_screenshot_type")
            priority_screenshot_received_at = screenshot_meta.get("last_priority_received_at")
            has_active_priority = _is_priority_screenshot_active(priority_screenshot_received_at)
            screenshot_url = f"/screenshots/{client.uuid}/priority" if has_active_priority else f"/screenshots/{client.uuid}"

            clients_payload.append({
                "uuid": client.uuid,
                "hostname": client.hostname,
                "description": client.description,
                "ip": client.ip,
                "model": client.model,
                "group_id": client.group_id,
                "group_name": group_name,
                "registration_time": client.registration_time.isoformat() if client.registration_time else None,
                "last_alive": client.last_alive.isoformat() if client.last_alive else None,
                "is_alive": is_alive,
                "status": status,
                "current_event_id": client.current_event_id,
                "current_process": client.current_process,
                "process_status": process_status,
                "process_pid": client.process_pid,
                "screen_health_status": screen_health_status,
                "last_screenshot_analyzed": screenshot_ts.isoformat() if screenshot_ts else None,
                "last_screenshot_hash": client.last_screenshot_hash,
                "latest_screenshot_type": latest_screenshot_type,
                "priority_screenshot_type": priority_screenshot_type,
                "priority_screenshot_received_at": priority_screenshot_received_at,
                "has_active_priority_screenshot": has_active_priority,
                "screenshot_url": screenshot_url,
                "log_counts_24h": {
                    "error": log_counts["ERROR"],
                    "warn": log_counts["WARN"],
                    "info": log_counts["INFO"],
                    "debug": log_counts["DEBUG"],
                },
                "latest_log": _serialize_log_entry(latest_log),
                "latest_error": _serialize_log_entry(latest_error),
            })

            summary_counts["total_clients"] += 1
            summary_counts["error_logs"] += log_counts["ERROR"]
            summary_counts["warn_logs"] += log_counts["WARN"]
            if has_active_priority:
                summary_counts["active_priority_screenshots"] += 1
            if is_alive:
                summary_counts["online_clients"] += 1
            else:
                summary_counts["offline_clients"] += 1
            if status == "healthy":
                summary_counts["healthy_clients"] += 1
            elif status == "warning":
                summary_counts["warning_clients"] += 1
            elif status == "critical":
                summary_counts["critical_clients"] += 1

        payload = {
            "summary": summary_counts,
            "period_hours": hours,
            "grace_period_seconds": _grace_period_seconds(),
            "since": since.isoformat(),
            "timestamp": datetime.now(timezone.utc).isoformat(),
            "clients": clients_payload,
        }
        session.close()
        return jsonify(dict_to_camel_case(payload))

    except Exception as e:
        session.close()
        return jsonify({"error": f"Server error: {str(e)}"}), 500


@client_logs_bp.route("/recent-errors", methods=["GET"])
@admin_or_higher
def get_recent_errors():
    """
    Get recent ERROR logs across all clients
    Query params:
      - limit: number of entries (default 20, max 100)

    Useful for system-wide error monitoring
    """
    session = Session()
    try:
        limit = min(int(request.args.get('limit', 20)), 100)

        # Get recent errors from all clients
        logs = session.query(ClientLog).filter_by(
            level=LogLevel.ERROR
        ).order_by(
            desc(ClientLog.timestamp)
        ).limit(limit).all()

        result = []
        for log in logs:
            result.append(_serialize_log_entry(log, include_client_uuid=True))

        session.close()
        return jsonify({
            "errors": result,
            "count": len(result)
        })

    except Exception as e:
        session.close()
        return jsonify({"error": f"Server error: {str(e)}"}), 500