# simclient/simclient.py from logging.handlers import RotatingFileHandler import time import uuid import json import socket import hashlib import paho.mqtt.client as mqtt import ssl import os import shutil import re import platform import logging import subprocess from dotenv import load_dotenv import requests import base64 from datetime import datetime, timezone, timedelta import threading from urllib.parse import urlsplit, urlunsplit, unquote def _sd_notify(msg: str) -> None: """Send a sd_notify message to systemd via NOTIFY_SOCKET. Uses raw socket so no extra package (systemd-python) is required. Safe to call when not running under systemd (NOTIFY_SOCKET unset). """ sock_path = os.environ.get("NOTIFY_SOCKET") if not sock_path: return try: addr = sock_path.lstrip("@") # abstract namespace sockets start with '@' with socket.socket(socket.AF_UNIX, socket.SOCK_DGRAM) as s: s.connect(addr) s.sendall(msg.encode()) except Exception: pass # never crash the app over a watchdog notification # ENV laden - support both container and native development env_paths = [ "/workspace/simclient/.env", # Container path os.path.join(os.path.dirname(__file__), ".env"), # Same directory os.path.join(os.path.expanduser("~"), "infoscreen-dev", ".env"), # Development path ] for env_path in env_paths: if os.path.exists(env_path): load_dotenv(env_path) break def _env_int(name, default): """Parse an int from environment variable, tolerating inline comments. Examples: - "10 # seconds" -> 10 - " 300ms" -> 300 - invalid or empty -> default """ raw = os.getenv(name) if raw is None or str(raw).strip() == "": return default try: # Remove inline comments sanitized = str(raw).split('#', 1)[0].strip() # Extract first integer occurrence m = re.search(r"-?\d+", sanitized) if m: return int(m.group(0)) except Exception: pass return default def _env_bool(name, default=False): raw = os.getenv(name) if raw is None: return default return str(raw).strip().lower() in ("1", "true", "yes", "on") def _env_host(name, default): """Parse a hostname/IP from env, stripping inline comments and whitespace. Example: "192.168.1.10 # comment" -> "192.168.1.10" """ raw = os.getenv(name) if raw is None: return default # Remove inline comments and extra spaces sanitized = str(raw).split('#', 1)[0].strip() # If any whitespace remains, take the first token as host if not sanitized: return default return sanitized.split()[0] def _env_str_clean(name, default=""): """Parse a generic string from env, removing inline comments and trimming. Returns the first whitespace-delimited token to avoid accidental comment tails. """ raw = os.getenv(name) if raw is None: return default sanitized = str(raw).split('#', 1)[0].strip() if not sanitized: return default return sanitized.split()[0] # Konfiguration aus ENV ENV = os.getenv("ENV", "development") HEARTBEAT_INTERVAL = _env_int("HEARTBEAT_INTERVAL", 5 if ENV == "development" else 60) SCREENSHOT_INTERVAL = _env_int("SCREENSHOT_INTERVAL", 30 if ENV == "development" else 300) LOG_LEVEL = os.getenv("LOG_LEVEL", "DEBUG" if ENV == "development" else "INFO") # Default to localhost in development, 'mqtt' (Docker compose service) otherwise MQTT_BROKER = _env_host("MQTT_BROKER", "localhost" if ENV == "development" else "mqtt") MQTT_PORT = _env_int("MQTT_PORT", 1883) DEBUG_MODE = _env_bool("DEBUG_MODE", ENV == "development") MQTT_USER = _env_str_clean("MQTT_USER", "") MQTT_PASSWORD_BROKER = _env_str_clean("MQTT_PASSWORD_BROKER", "") MQTT_USERNAME = _env_str_clean("MQTT_USERNAME", "") MQTT_PASSWORD = _env_str_clean("MQTT_PASSWORD", "") MQTT_TLS_CA_CERT = _env_str_clean("MQTT_TLS_CA_CERT", "") MQTT_TLS_CERT = _env_str_clean("MQTT_TLS_CERT", "") MQTT_TLS_KEY = _env_str_clean("MQTT_TLS_KEY", "") MQTT_TLS_INSECURE = _env_bool("MQTT_TLS_INSECURE", False) MQTT_TLS_ENABLED = _env_bool( "MQTT_TLS_ENABLED", bool(MQTT_TLS_CA_CERT or MQTT_TLS_CERT or MQTT_TLS_KEY), ) MQTT_BROKER_FALLBACKS = [] _fallbacks_raw = os.getenv("MQTT_BROKER_FALLBACKS", "") if _fallbacks_raw: for item in _fallbacks_raw.split(","): host = item.split('#', 1)[0].strip() if host: # Only take the first whitespace-delimited token MQTT_BROKER_FALLBACKS.append(host.split()[0]) # File server/API configuration # Defaults: use same host as MQTT broker, port 8000, http scheme FILE_SERVER_BASE_URL = _env_str_clean("FILE_SERVER_BASE_URL", "") _scheme_raw = _env_str_clean("FILE_SERVER_SCHEME", "http").lower() FILE_SERVER_SCHEME = _scheme_raw if _scheme_raw in ("http", "https") else "http" FILE_SERVER_HOST = _env_host("FILE_SERVER_HOST", MQTT_BROKER) FILE_SERVER_PORT = _env_int("FILE_SERVER_PORT", 8000) # Logging-Konfiguration LOG_PATH = os.path.join(os.path.dirname(__file__), "simclient.log") os.makedirs(os.path.dirname(LOG_PATH), exist_ok=True) log_handlers = [] log_handlers.append(RotatingFileHandler( LOG_PATH, maxBytes=2*1024*1024, backupCount=5, encoding="utf-8")) if DEBUG_MODE: log_handlers.append(logging.StreamHandler()) logging.basicConfig( level=getattr(logging, LOG_LEVEL.upper(), logging.INFO), format="%(asctime)s.%(msecs)03dZ [%(levelname)s] %(message)s", datefmt="%Y-%m-%dT%H:%M:%S", handlers=log_handlers ) # Force all logging timestamps to UTC (affects %(asctime)s in all formatters). logging.Formatter.converter = time.gmtime # Setup monitoring logger (separate for health/crash events, local rotation only) MONITORING_LOG_PATH = os.path.join(os.path.dirname(__file__), "..", "logs", "monitoring.log") os.makedirs(os.path.dirname(MONITORING_LOG_PATH), exist_ok=True) monitoring_logger = logging.getLogger("monitoring") monitoring_logger.setLevel(getattr(logging, LOG_LEVEL.upper(), logging.INFO)) monitoring_handler = RotatingFileHandler(MONITORING_LOG_PATH, maxBytes=5*1024*1024, backupCount=5) monitoring_handler.setFormatter(logging.Formatter("%(asctime)s.%(msecs)03dZ [%(levelname)s] %(message)s", "%Y-%m-%dT%H:%M:%S")) monitoring_logger.addHandler(monitoring_handler) monitoring_logger.propagate = False # Don't duplicate to main logger logging.info(f"Monitoring logger initialized: {MONITORING_LOG_PATH}") # Health state file (written by display_manager, read by simclient) HEALTH_STATE_FILE = os.path.join(os.path.dirname(__file__), "current_process_health.json") CLIENT_SETTINGS_FILE = os.path.join(os.path.dirname(__file__), "config", "client_settings.json") # Screenshot IPC (written by display_manager, polled by simclient) SCREENSHOT_DIR = os.path.join(os.path.dirname(__file__), "screenshots") SCREENSHOT_META_FILE = os.path.join(SCREENSHOT_DIR, "meta.json") POWER_CONTROL_MODE = os.getenv("POWER_CONTROL_MODE", "local").strip().lower() POWER_INTENT_STATE_FILE = os.path.join(os.path.dirname(__file__), "power_intent_state.json") POWER_STATE_FILE = os.path.join(os.path.dirname(__file__), "power_state.json") COMMAND_STATE_DIR = os.path.join(os.path.dirname(__file__), "config") PROCESSED_COMMANDS_FILE = os.path.join(COMMAND_STATE_DIR, "processed_commands.json") LAST_COMMAND_STATE_FILE = os.path.join(COMMAND_STATE_DIR, "last_command_state.json") COMMAND_HELPER_PATH = os.getenv("COMMAND_HELPER_PATH", "/usr/local/bin/infoscreen-cmd-helper.sh") COMMAND_EXEC_TIMEOUT_SEC = _env_int("COMMAND_EXEC_TIMEOUT_SEC", 15) COMMAND_DEDUPE_TTL_HOURS = _env_int("COMMAND_DEDUPE_TTL_HOURS", 24) COMMAND_DEDUPE_MAX_ENTRIES = _env_int("COMMAND_DEDUPE_MAX_ENTRIES", 5000) COMMAND_MOCK_REBOOT_IMMEDIATE_COMPLETE = _env_bool("COMMAND_MOCK_REBOOT_IMMEDIATE_COMPLETE", False) NIL_COMMAND_ID = "00000000-0000-0000-0000-000000000000" COMMAND_ACTIONS = ("reboot_host", "shutdown_host") ACK_STATUSES = ("accepted", "execution_started", "completed", "failed") COMMAND_ERROR_CODES = { "invalid_schema", "missing_field", "stale_command", "duplicate_command", "permission_denied_local", "execution_timeout", "execution_failed", "broker_unavailable", "internal_error", } def command_requires_recovery_completion(action): return action == "reboot_host" def command_mock_reboot_immediate_complete_enabled(action): if action != "reboot_host" or not COMMAND_MOCK_REBOOT_IMMEDIATE_COMPLETE: return False helper_basename = os.path.basename((COMMAND_HELPER_PATH or "").strip()) if helper_basename == "mock-command-helper.sh": return True logging.warning( "Ignoring COMMAND_MOCK_REBOOT_IMMEDIATE_COMPLETE because helper is not mock: %s", COMMAND_HELPER_PATH, ) return False discovered = False def save_event_to_json(event_data): """Speichert eine Event-Nachricht in der Datei current_event.json This function preserves ALL fields from the incoming event data, including scheduler-specific fields like: - page_progress: Current page/slide progress tracking - auto_progress: Auto-progression state - And any other fields sent by the scheduler """ try: json_path = os.path.join(os.path.dirname(__file__), "current_event.json") with open(json_path, "w", encoding="utf-8") as f: json.dump(event_data, f, ensure_ascii=False, indent=2) logging.info(f"Event message saved to {json_path}") # Log if scheduler-specific fields are present if isinstance(event_data, list): for idx, event in enumerate(event_data): if isinstance(event, dict): if 'page_progress' in event: logging.debug(f"Event {idx}: page_progress = {event['page_progress']}") if 'auto_progress' in event: logging.debug(f"Event {idx}: auto_progress = {event['auto_progress']}") elif isinstance(event_data, dict): if 'page_progress' in event_data: logging.debug(f"Event page_progress = {event_data['page_progress']}") if 'auto_progress' in event_data: logging.debug(f"Event auto_progress = {event_data['auto_progress']}") except Exception as e: logging.error(f"Error saving event message: {e}") def delete_event_file(): """Löscht die current_event.json Datei wenn kein Event aktiv ist""" try: json_path = os.path.join(os.path.dirname(__file__), "current_event.json") if os.path.exists(json_path): # Copy to last_event.json first so we keep a record of the last event try: last_path = os.path.join(os.path.dirname(__file__), "last_event.json") # Use atomic replace: write to temp then replace tmp_path = last_path + ".tmp" shutil.copyfile(json_path, tmp_path) os.replace(tmp_path, last_path) logging.info(f"Copied {json_path} to {last_path} (last event)") except Exception as e: logging.warning(f"Could not copy current_event.json to last_event.json: {e}") os.remove(json_path) logging.info(f"Event file {json_path} deleted - no active event") except Exception as e: logging.error(f"Error deleting event file: {e}") def is_empty_event(event_data): """Prüft ob eine Event-Nachricht bedeutet, dass kein Event aktiv ist""" if event_data is None: return True # Verschiedene Möglichkeiten für "kein Event": # 1. Leeres Dictionary if not event_data: return True # 2. Explizite "null" oder "empty" Werte if isinstance(event_data, dict): # Event ist null/None if event_data.get("event") is None or event_data.get("event") == "null": return True # Event ist explizit als "empty" oder "none" markiert if str(event_data.get("event", "")).lower() in ["empty", "none", ""]: return True # Status zeigt an dass kein Event aktiv ist status = str(event_data.get("status", "")).lower() if status in ["inactive", "none", "empty", "cleared"]: return True # 3. String-basierte Events if isinstance(event_data, str) and event_data.lower() in ["null", "none", "empty", ""]: return True return False def _parse_utc_iso(value: str): """Parse ISO8601 timestamp with optional trailing Z into UTC-aware datetime.""" if not isinstance(value, str) or not value.strip(): raise ValueError("timestamp must be a non-empty string") normalized = value.strip() if normalized.endswith('Z'): normalized = normalized[:-1] + '+00:00' dt = datetime.fromisoformat(normalized) if dt.tzinfo is None: dt = dt.replace(tzinfo=timezone.utc) return dt.astimezone(timezone.utc) def validate_power_intent_payload(payload, expected_group_id=None): """Validate frozen TV power intent contract v1 payload. Returns tuple: (is_valid, result_dict, error_message) """ if not isinstance(payload, dict): return False, None, "payload must be an object" required_fields = ( "schema_version", "intent_id", "group_id", "desired_state", "reason", "issued_at", "expires_at", "poll_interval_sec", "active_event_ids", "event_window_start", "event_window_end" ) for field in required_fields: if field not in payload: return False, None, f"missing required field: {field}" if payload.get("schema_version") != "1.0": return False, None, f"unsupported schema_version: {payload.get('schema_version')}" desired_state = payload.get("desired_state") if desired_state not in ("on", "off"): return False, None, f"invalid desired_state: {desired_state}" reason = payload.get("reason") if reason not in ("active_event", "no_active_event"): return False, None, f"invalid reason: {reason}" intent_id = payload.get("intent_id") if not isinstance(intent_id, str) or not intent_id.strip(): return False, None, "intent_id must be a non-empty string" try: group_id = int(payload.get("group_id")) except Exception: return False, None, f"invalid group_id: {payload.get('group_id')}" if expected_group_id is not None: try: expected_group_id_int = int(expected_group_id) except Exception: expected_group_id_int = None if expected_group_id_int is not None and expected_group_id_int != group_id: return False, None, f"group_id mismatch: payload={group_id} expected={expected_group_id_int}" try: issued_at = _parse_utc_iso(payload.get("issued_at")) expires_at = _parse_utc_iso(payload.get("expires_at")) except Exception as e: return False, None, f"invalid timestamp: {e}" if expires_at <= issued_at: return False, None, "expires_at must be later than issued_at" if datetime.now(timezone.utc) > expires_at: return False, None, "intent expired" try: poll_interval_sec = int(payload.get("poll_interval_sec")) except Exception: return False, None, f"invalid poll_interval_sec: {payload.get('poll_interval_sec')}" if poll_interval_sec <= 0: return False, None, "poll_interval_sec must be > 0" active_event_ids = payload.get("active_event_ids") if not isinstance(active_event_ids, list): return False, None, "active_event_ids must be a list" normalized_event_ids = [] for item in active_event_ids: try: normalized_event_ids.append(int(item)) except Exception: return False, None, f"invalid active_event_id value: {item}" for field in ("event_window_start", "event_window_end"): value = payload.get(field) if value is not None: try: _parse_utc_iso(value) except Exception as e: return False, None, f"invalid {field}: {e}" normalized = { "schema_version": "1.0", "intent_id": intent_id.strip(), "group_id": group_id, "desired_state": desired_state, "reason": reason, "issued_at": payload.get("issued_at"), "expires_at": payload.get("expires_at"), "poll_interval_sec": poll_interval_sec, "active_event_ids": normalized_event_ids, "event_window_start": payload.get("event_window_start"), "event_window_end": payload.get("event_window_end"), } return True, normalized, None def write_power_intent_state(data): """Atomically write power intent state for display_manager consumption.""" try: tmp_path = POWER_INTENT_STATE_FILE + ".tmp" with open(tmp_path, "w", encoding="utf-8") as f: json.dump(data, f, ensure_ascii=False, indent=2) os.replace(tmp_path, POWER_INTENT_STATE_FILE) except Exception as e: logging.error(f"Error writing power intent state: {e}") def _atomic_write_json(path, data): os.makedirs(os.path.dirname(path), exist_ok=True) tmp_path = path + ".tmp" with open(tmp_path, "w", encoding="utf-8") as f: json.dump(data, f, ensure_ascii=False, indent=2) os.replace(tmp_path, path) def _read_json_or_default(path, default): try: with open(path, "r", encoding="utf-8") as f: return json.load(f) except Exception: return default def _extract_command_id(payload): if isinstance(payload, dict): value = payload.get("command_id") if isinstance(value, str) and value.strip(): return value.strip() return NIL_COMMAND_ID def _as_uuid_str(value): if not isinstance(value, str) or not value.strip(): raise ValueError("must be a non-empty string") return str(uuid.UUID(value.strip())) def _prune_processed_commands(commands): if not isinstance(commands, dict): return {} cutoff = datetime.now(timezone.utc) - timedelta(hours=max(1, COMMAND_DEDUPE_TTL_HOURS)) kept = {} sortable = [] for command_id, entry in commands.items(): if not isinstance(entry, dict): continue processed_at = entry.get("processed_at") if not processed_at: continue try: processed_dt = _parse_utc_iso(processed_at) except Exception: continue if processed_dt < cutoff: continue kept[command_id] = entry sortable.append((processed_dt, command_id)) max_entries = max(1, COMMAND_DEDUPE_MAX_ENTRIES) if len(sortable) > max_entries: sortable.sort(reverse=True) keep_ids = {cid for _, cid in sortable[:max_entries]} kept = {cid: entry for cid, entry in kept.items() if cid in keep_ids} return kept def load_processed_commands(): state = _read_json_or_default(PROCESSED_COMMANDS_FILE, {"commands": {}}) commands = state.get("commands") if isinstance(state, dict) else {} commands = _prune_processed_commands(commands) _atomic_write_json(PROCESSED_COMMANDS_FILE, {"commands": commands}) return commands def persist_processed_commands(commands): sanitized = _prune_processed_commands(commands) _atomic_write_json(PROCESSED_COMMANDS_FILE, {"commands": sanitized}) def load_last_command_state(): state = _read_json_or_default(LAST_COMMAND_STATE_FILE, {}) if isinstance(state, dict): return state return {} def write_last_command_state(data): _atomic_write_json(LAST_COMMAND_STATE_FILE, data) def validate_command_payload(payload, expected_client_uuid): if not isinstance(payload, dict): return False, None, "invalid_schema", "payload must be a JSON object" required = { "schema_version", "command_id", "client_uuid", "action", "issued_at", "expires_at", "requested_by", "reason", } payload_keys = set(payload.keys()) missing = sorted(required - payload_keys) if missing: return False, None, "missing_field", f"missing required fields: {', '.join(missing)}" extras = sorted(payload_keys - required) if extras: return False, None, "invalid_schema", f"unexpected fields: {', '.join(extras)}" if payload.get("schema_version") != "1.0": return False, None, "invalid_schema", "schema_version must be 1.0" try: command_id = _as_uuid_str(payload.get("command_id")) except Exception: return False, None, "invalid_schema", "command_id must be a valid UUID" try: client_uuid = _as_uuid_str(payload.get("client_uuid")) except Exception: return False, None, "invalid_schema", "client_uuid must be a valid UUID" try: expected_uuid = _as_uuid_str(expected_client_uuid) except Exception: expected_uuid = str(expected_client_uuid).strip() if client_uuid != expected_uuid: return False, None, "invalid_schema", "client_uuid does not match this client" action = payload.get("action") if action not in COMMAND_ACTIONS: return False, None, "invalid_schema", f"action must be one of {COMMAND_ACTIONS}" try: issued_at = _parse_utc_iso(payload.get("issued_at")) expires_at = _parse_utc_iso(payload.get("expires_at")) except Exception as e: return False, None, "invalid_schema", f"invalid timestamp: {e}" if expires_at <= issued_at: return False, None, "invalid_schema", "expires_at must be later than issued_at" if datetime.now(timezone.utc) > expires_at: return False, None, "stale_command", "command expired" requested_by = payload.get("requested_by") if requested_by is not None: if not isinstance(requested_by, int) or requested_by < 1: return False, None, "invalid_schema", "requested_by must be integer >= 1 or null" reason = payload.get("reason") if reason is not None: if not isinstance(reason, str): return False, None, "invalid_schema", "reason must be string or null" if len(reason) > 2000: return False, None, "invalid_schema", "reason exceeds max length 2000" normalized = { "schema_version": "1.0", "command_id": command_id, "client_uuid": client_uuid, "action": action, "issued_at": issued_at.strftime("%Y-%m-%dT%H:%M:%SZ"), "expires_at": expires_at.strftime("%Y-%m-%dT%H:%M:%SZ"), "requested_by": requested_by, "reason": reason, } return True, normalized, None, None def publish_command_ack( client, client_uuid, command_id, status, error_code=None, error_message=None, expires_at=None, ): if status not in ACK_STATUSES: raise ValueError(f"invalid ack status: {status}") if status == "failed": if not isinstance(error_code, str) or not error_code.strip(): error_code = "internal_error" if not isinstance(error_message, str) or not error_message.strip(): error_message = "failed without diagnostic message" else: error_code = None error_message = None if isinstance(error_code, str): error_code = error_code[:128] if isinstance(error_message, str): error_message = error_message[:4000] ack_payload = { "command_id": command_id, "status": status, "error_code": error_code, "error_message": error_message, } encoded = json.dumps(ack_payload) ack_topics = [ f"infoscreen/{client_uuid}/commands/ack", f"infoscreen/{client_uuid}/command/ack", ] retry_schedule = [0.5, 1, 2, 4, 5] attempt = 0 while True: all_ok = True for topic in ack_topics: result = client.publish(topic, encoded, qos=1, retain=False) if result.rc != mqtt.MQTT_ERR_SUCCESS: all_ok = False logging.warning( "Command ack publish failed: topic=%s status=%s rc=%s", topic, status, result.rc, ) if all_ok: logging.info("Command ack published: command_id=%s status=%s", command_id, status) return True if expires_at: try: if datetime.now(timezone.utc) >= _parse_utc_iso(expires_at): logging.warning("Command ack retry stopped at expiry: command_id=%s", command_id) return False except Exception: pass delay = retry_schedule[min(attempt, len(retry_schedule) - 1)] attempt += 1 time.sleep(delay) def on_message(client, userdata, msg, properties=None): global discovered logging.info(f"Received: {msg.topic} {msg.payload.decode()}") if msg.topic.startswith("infoscreen/events/"): event_payload = msg.payload.decode() logging.info(f"Event message from scheduler received: {event_payload}") try: event_data = json.loads(event_payload) if is_empty_event(event_data): logging.info("No active event - deleting event file") delete_event_file() else: save_event_to_json(event_data) # Check if event_data is a list or a dictionary if isinstance(event_data, list): for event in event_data: presentation_files = event.get("presentation", {}).get("files", []) for file in presentation_files: file_url = file.get("url") if file_url: download_presentation_file(file_url) elif isinstance(event_data, dict): presentation_files = event_data.get("presentation", {}).get("files", []) for file in presentation_files: file_url = file.get("url") if file_url: download_presentation_file(file_url) except json.JSONDecodeError as e: logging.error(f"Invalid JSON in event message: {e}") if event_payload.strip().lower() in ["null", "none", "empty", ""]: logging.info("Empty event message received - deleting event file") delete_event_file() else: event_data = {"raw_message": event_payload, "error": "Invalid JSON format"} save_event_to_json(event_data) if msg.topic.endswith("/discovery_ack"): discovered = True logging.info("Discovery ACK received. Starting heartbeat.") def get_mac_addresses(): macs = set() try: for root, dirs, files in os.walk('/sys/class/net/'): for iface in dirs: try: with open(f'/sys/class/net/{iface}/address') as f: mac = f.read().strip() if mac and mac != '00:00:00:00:00:00': macs.add(mac) except Exception: continue break except Exception: pass return sorted(macs) def get_board_serial(): # Raspberry Pi: /proc/cpuinfo, andere: /sys/class/dmi/id/product_serial serial = None try: with open('/proc/cpuinfo') as f: for line in f: if line.lower().startswith('serial'): serial = line.split(':')[1].strip() break except Exception: pass if not serial: try: with open('/sys/class/dmi/id/product_serial') as f: serial = f.read().strip() except Exception: pass return serial or "unknown" def get_ip(): # Versucht, die lokale IP zu ermitteln (nicht 127.0.0.1) try: s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) s.connect(("8.8.8.8", 80)) ip = s.getsockname()[0] s.close() return ip except Exception: return "unknown" def get_hardware_token(): serial = get_board_serial() macs = get_mac_addresses() token_raw = serial + "_" + "_".join(macs) # Hashen für Datenschutz token_hash = hashlib.sha256(token_raw.encode()).hexdigest() return token_hash def get_model(): # Versucht, das Modell auszulesen (z.B. Raspberry Pi, PC, etc.) try: if os.path.exists('/proc/device-tree/model'): with open('/proc/device-tree/model') as f: return f.read().strip() elif os.path.exists('/sys/class/dmi/id/product_name'): with open('/sys/class/dmi/id/product_name') as f: return f.read().strip() except Exception: pass return "unknown" SOFTWARE_VERSION = "1.0.0" # Optional: Anpassen bei neuen Releases def _detect_watchdog_enabled(): env_flag = os.getenv("WATCHDOG_ENABLED", "").strip().lower() if env_flag in ("1", "true", "yes", "on"): return True if os.path.exists("/dev/watchdog"): return True return False def _detect_boot_source(): try: with open("/proc/cmdline", "r", encoding="utf-8") as f: cmdline = f.read().strip() for token in cmdline.split(): if token.startswith("root="): return token.split("=", 1)[1] except Exception: pass return "unknown" def configure_mqtt_security(client): # Prefer broker-scoped auth vars when present, fallback to legacy vars. auth_username = MQTT_USER or MQTT_USERNAME auth_password = MQTT_PASSWORD_BROKER if MQTT_USER else MQTT_PASSWORD configured = { "username": bool(auth_username), "tls": False, "tls_insecure": False, } if auth_username: client.username_pw_set(auth_username, auth_password or None) configured["username"] = True logging.info("Configured MQTT username/password authentication") if not MQTT_TLS_ENABLED: return configured tls_kwargs = { "ca_certs": MQTT_TLS_CA_CERT or None, "certfile": MQTT_TLS_CERT or None, "keyfile": MQTT_TLS_KEY or None, "tls_version": ssl.PROTOCOL_TLS_CLIENT, } client.tls_set(**tls_kwargs) configured["tls"] = True if MQTT_TLS_INSECURE: client.tls_insecure_set(True) configured["tls_insecure"] = True logging.warning("MQTT TLS hostname verification disabled via MQTT_TLS_INSECURE") else: client.tls_insecure_set(False) logging.info( "Configured MQTT TLS: ca=%s client_cert=%s client_key=%s", bool(MQTT_TLS_CA_CERT), bool(MQTT_TLS_CERT), bool(MQTT_TLS_KEY), ) return configured def send_discovery(client, client_id, hardware_token, ip_addr): macs = get_mac_addresses() discovery_msg = { "uuid": client_id, "hardware_token": hardware_token, "ip": ip_addr, "type": "infoscreen", "hostname": socket.gethostname(), "os_version": platform.platform(), "software_version": SOFTWARE_VERSION, "macs": macs, "model": get_model(), "capabilities": { "recovery_class": "software_only", "watchdog_enabled": _detect_watchdog_enabled(), "boot_source": _detect_boot_source(), "command_schema_version": "1.0", }, } client.publish("infoscreen/discovery", json.dumps(discovery_msg)) logging.info(f"Discovery message sent: {discovery_msg}") def get_persistent_uuid(uuid_path=None): if uuid_path is None: uuid_path = os.path.join(os.path.dirname(__file__), "config", "client_uuid.txt") # Prüfe, ob die Datei existiert if os.path.exists(uuid_path): with open(uuid_path, "r") as f: return f.read().strip() # Generiere neue UUID und speichere sie new_uuid = str(uuid.uuid4()) os.makedirs(os.path.dirname(uuid_path), exist_ok=True) with open(uuid_path, "w") as f: f.write(new_uuid) return new_uuid def load_last_group_id(path): try: with open(path, 'r') as f: return f.read().strip() except Exception: return None def save_last_group_id(path, group_id): try: os.makedirs(os.path.dirname(path), exist_ok=True) with open(path, 'w') as f: f.write(str(group_id)) except Exception as e: logging.error(f"Error saving group_id: {e}") def download_presentation_file(url): """Downloads the presentation file from the given URL.""" try: # Resolve URL to correct API host (same IP as MQTT broker by default) resolved_url = resolve_file_url(url) # Create the presentation directory if it doesn't exist presentation_dir = os.path.join(os.path.dirname(__file__), "presentation") os.makedirs(presentation_dir, exist_ok=True) # Extract the filename from the (possibly encoded) URL filename = unquote(urlsplit(resolved_url).path.split("/")[-1]) or "downloaded_file" file_path = os.path.join(presentation_dir, filename) # Check if the file already exists if os.path.exists(file_path): logging.info(f"File already exists: {file_path}") return # Download the file logging.info(f"Downloading file from: {resolved_url}") response = requests.get(resolved_url, timeout=20) response.raise_for_status() # Raise an error for bad responses # Save the file with open(file_path, "wb") as f: f.write(response.content) logging.info(f"File downloaded successfully: {file_path}") except Exception as e: logging.error(f"Error downloading file: {e}") def resolve_file_url(original_url: str) -> str: """Resolve/normalize a file URL to point to the configured file server. Rules: - If FILE_SERVER_BASE_URL is set, force scheme/host/port from it. - Else default to FILE_SERVER_HOST (defaults to MQTT_BROKER) and FILE_SERVER_PORT (8000). - Only rewrite host when incoming URL host is missing or equals 'server'. - Preserve path and query. """ try: parts = urlsplit(original_url) # Determine target base target_scheme = FILE_SERVER_SCHEME target_host = FILE_SERVER_HOST target_port = FILE_SERVER_PORT if FILE_SERVER_BASE_URL: base = urlsplit(FILE_SERVER_BASE_URL) # Only assign if present to allow partial base definitions if base.scheme: target_scheme = base.scheme if base.hostname: target_host = base.hostname if base.port: target_port = base.port # Decide whether to rewrite incoming_host = parts.hostname should_rewrite = (incoming_host is None) or (incoming_host.lower() == "server") if should_rewrite: # Build netloc with port (always include port to be explicit) netloc = f"{target_host}:{target_port}" if target_port else target_host new_parts = ( target_scheme, netloc, parts.path or "/", parts.query, parts.fragment, ) return urlunsplit(new_parts) else: # Keep original if it's already a proper absolute URL return original_url except Exception as e: logging.warning(f"Could not resolve URL, using original: {original_url} (error: {e})") return original_url def get_latest_screenshot(): """Get the latest screenshot from the host OS shared folder""" try: screenshot_dir = os.path.join(os.path.dirname(__file__), "screenshots") if not os.path.exists(screenshot_dir): return None # Prefer 'latest.jpg' symlink/copy if present (written by display_manager) preferred_path = os.path.join(screenshot_dir, "latest.jpg") if os.path.exists(preferred_path): try: with open(preferred_path, "rb") as f: screenshot_data = base64.b64encode(f.read()).decode('utf-8') file_stats = os.stat(preferred_path) logging.debug(f"Using preferred latest.jpg for screenshot ({file_stats.st_size} bytes)") return { "filename": os.path.basename(preferred_path), "data": screenshot_data, "timestamp": datetime.fromtimestamp(file_stats.st_mtime, tz=timezone.utc).isoformat(), "size": file_stats.st_size } except Exception as e: logging.debug(f"Could not read latest.jpg, falling back to newest file: {e}") # Find the most recent screenshot file # Exclude 'latest.jpg' (it's just a pointer) and any broken symlinks screenshot_files = [ f for f in os.listdir(screenshot_dir) if f.lower().endswith(('.png', '.jpg', '.jpeg')) and f != 'latest.jpg' and os.path.exists(os.path.join(screenshot_dir, f)) ] if not screenshot_files: return None # Get the most recent file latest_file = max(screenshot_files, key=lambda f: os.path.getmtime(os.path.join(screenshot_dir, f))) screenshot_path = os.path.join(screenshot_dir, latest_file) # Read and encode screenshot with open(screenshot_path, "rb") as f: screenshot_data = base64.b64encode(f.read()).decode('utf-8') # Get file info file_stats = os.stat(screenshot_path) info = { "filename": latest_file, "data": screenshot_data, "timestamp": datetime.fromtimestamp(file_stats.st_mtime, tz=timezone.utc).isoformat(), "size": file_stats.st_size } logging.debug(f"Selected latest screenshot: {latest_file} ({file_stats.st_size} bytes)") return info except Exception as e: logging.error(f"Error reading screenshot: {e}") return None def read_health_state(): """Read the health state file written by display_manager""" try: if not os.path.exists(HEALTH_STATE_FILE): return None with open(HEALTH_STATE_FILE, 'r') as f: return json.load(f) except Exception as e: logging.debug(f"Could not read health state file: {e}") return None def read_power_state(): """Read last power action state produced by display_manager.""" try: if not os.path.exists(POWER_STATE_FILE): return None with open(POWER_STATE_FILE, 'r', encoding='utf-8') as f: return json.load(f) except Exception as e: logging.debug(f"Could not read power state file: {e}") return None def publish_power_state_message(client, client_id, power_state: dict): """Publish power action telemetry to MQTT (best effort).""" try: if not isinstance(power_state, dict): return payload = dict(power_state) payload["client_id"] = client_id payload.setdefault("reported_at", datetime.now(timezone.utc).isoformat()) topic = f"infoscreen/{client_id}/power/state" res = client.publish(topic, json.dumps(payload), qos=0) if res.rc == mqtt.MQTT_ERR_SUCCESS: p = payload.get("power", {}) logging.info( "Power state published: state=%s source=%s result=%s", p.get("applied_state"), p.get("source"), p.get("result"), ) except Exception as e: logging.debug(f"Could not publish power state: {e}") def power_state_service_thread(client, client_id): """Background publisher for power action state changes.""" logging.info("Power state service started") last_mtime = None while True: try: time.sleep(1) if not os.path.exists(POWER_STATE_FILE): continue mtime = os.path.getmtime(POWER_STATE_FILE) if last_mtime is not None and mtime <= last_mtime: continue last_mtime = mtime state = read_power_state() if state: publish_power_state_message(client, client_id, state) except Exception as e: logging.debug(f"Power state service error: {e}") time.sleep(2) def save_client_settings(settings_data): """Persist dashboard-managed client settings for the display manager.""" try: os.makedirs(os.path.dirname(CLIENT_SETTINGS_FILE), exist_ok=True) with open(CLIENT_SETTINGS_FILE, 'w', encoding='utf-8') as f: json.dump(settings_data, f, ensure_ascii=False, indent=2) logging.info(f"Client settings saved to {CLIENT_SETTINGS_FILE}") except Exception as e: logging.error(f"Error saving client settings: {e}") def delete_client_settings(): """Delete persisted client settings so defaults apply again.""" try: if os.path.exists(CLIENT_SETTINGS_FILE): os.remove(CLIENT_SETTINGS_FILE) logging.info(f"Client settings deleted: {CLIENT_SETTINGS_FILE}") except Exception as e: logging.error(f"Error deleting client settings: {e}") def publish_health_message(client, client_id, connection_state=None): """Publish health status to server via MQTT""" try: health = read_health_state() if not health: return # No active process payload = { "timestamp": datetime.now(timezone.utc).isoformat(), "expected_state": { "event_id": health.get("event_id") }, "actual_state": { "process": health.get("current_process"), "pid": health.get("process_pid"), "status": health.get("process_status") } } if connection_state is not None: last_disc = connection_state.get("last_disconnect") payload["broker_connection"] = { "broker_reachable": bool(connection_state.get("connected")), "reconnect_count": connection_state.get("reconnect_count", 0), "last_disconnect_at": ( datetime.fromtimestamp(last_disc, tz=timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") if last_disc else None ), } topic = f"infoscreen/{client_id}/health" res = client.publish(topic, json.dumps(payload), qos=1) if res.rc == mqtt.MQTT_ERR_SUCCESS: logging.debug(f"Health message published: {health.get('current_process')} status={health.get('process_status')}") else: logging.debug(f"Health publish failed with code: {res.rc}") except Exception as e: logging.debug(f"Error publishing health: {e}") def publish_log_message(client, client_id, level: str, message: str, context: dict = None): """Publish log message to server via MQTT (only if level is ERROR or WARN, unless DEBUG_MODE)""" try: # Filter logs: only send ERROR/WARN to server, keep INFO/DEBUG local-only unless DEBUG_MODE if level.upper() == "INFO" and not DEBUG_MODE: return # Keep INFO logs local only in production if level.upper() == "DEBUG": return # DEBUG logs always local-only topic = f"infoscreen/{client_id}/logs/{level.lower()}" payload = { "timestamp": datetime.now(timezone.utc).isoformat(), "message": message, "context": context or {} } res = client.publish(topic, json.dumps(payload), qos=1) if res.rc == mqtt.MQTT_ERR_SUCCESS: monitoring_logger.log(getattr(logging, level.upper(), logging.INFO), f"[MQTT] {message}") else: logging.debug(f"Log publish failed ({level}) with code: {res.rc}") except Exception as e: logging.debug(f"Error publishing log: {e}") def _read_and_clear_meta(): """Read screenshots/meta.json and atomically clear the send_immediately flag. Returns the parsed dict (with the *original* send_immediately value) if the file exists and is valid JSON, else None. The flag is cleared on disk before returning so a crash between read and publish does not re-send on the next tick. """ try: if not os.path.exists(SCREENSHOT_META_FILE): return None with open(SCREENSHOT_META_FILE, 'r', encoding='utf-8') as f: meta = json.load(f) if meta.get('send_immediately'): # Write cleared copy atomically so the flag is gone before we return cleared = dict(meta) cleared['send_immediately'] = False tmp_path = SCREENSHOT_META_FILE + '.tmp' with open(tmp_path, 'w', encoding='utf-8') as f: json.dump(cleared, f) os.replace(tmp_path, SCREENSHOT_META_FILE) return meta # original dict; send_immediately is True if it was set except Exception as e: logging.debug(f"Could not read screenshot meta: {e}") return None def _build_dashboard_payload(client_id: str, screenshot_info: dict, health: dict, capture_type: str, trigger_meta: dict = None) -> dict: """Build the dashboard payload in one canonical place. Keeping payload assembly centralized avoids schema drift across call sites. """ published_at = datetime.now(timezone.utc).isoformat() screenshot_age_s = None if screenshot_info: try: ts = datetime.fromisoformat(screenshot_info["timestamp"]) screenshot_age_s = round((datetime.now(timezone.utc) - ts).total_seconds(), 1) except Exception: pass capture_meta = { "type": capture_type, "captured_at": (trigger_meta or {}).get("captured_at") or (screenshot_info or {}).get("timestamp"), "age_s": screenshot_age_s, "triggered": bool(trigger_meta and trigger_meta.get("send_immediately")), "send_immediately": bool(trigger_meta and trigger_meta.get("send_immediately")), } process_health_payload = None if health: process_health_payload = { "event_id": health.get("event_id"), "event_type": health.get("event_type"), "current_process": health.get("current_process"), "process_pid": health.get("process_pid"), "process_status": health.get("process_status"), "restart_count": health.get("restart_count", 0) } payload = { "message": { "client_id": client_id, "status": "alive", }, "content": { "screenshot": screenshot_info, }, "runtime": { "system_info": { "hostname": socket.gethostname(), "ip": get_ip(), "uptime": time.time(), }, "process_health": process_health_payload, }, "metadata": { "schema_version": "2.0", "producer": "simclient", "published_at": published_at, "capture": capture_meta, "transport": { "topic": f"infoscreen/{client_id}/dashboard", "qos": 0, "publisher": "simclient", }, }, } return payload def send_screenshot_heartbeat(client, client_id, capture_type: str = "periodic", trigger_meta: dict = None): """Send heartbeat with screenshot to server for dashboard monitoring""" try: screenshot_info = get_latest_screenshot() # Also read health state and include in heartbeat health = read_health_state() heartbeat_data = _build_dashboard_payload( client_id=client_id, screenshot_info=screenshot_info, health=health, capture_type=capture_type, trigger_meta=trigger_meta, ) # Send to dashboard monitoring topic dashboard_topic = f"infoscreen/{client_id}/dashboard" payload = json.dumps(heartbeat_data) res = client.publish(dashboard_topic, payload, qos=0) if res.rc == mqtt.MQTT_ERR_SUCCESS: age_str = f", age={heartbeat_data['metadata']['capture']['age_s']}s" if heartbeat_data['metadata']['capture']['age_s'] is not None else "" if screenshot_info: logging.info( f"Dashboard published: schema=2.0 type={capture_type}" f" screenshot={screenshot_info['filename']} ({screenshot_info['size']} bytes){age_str}" ) else: logging.info(f"Dashboard published: schema=2.0 type={capture_type} (no screenshot)") elif res.rc == mqtt.MQTT_ERR_NO_CONN: logging.warning("Dashboard heartbeat publish returned NO_CONN; will retry on next interval") else: logging.warning(f"Dashboard heartbeat publish failed with code: {res.rc}") except Exception as e: logging.error(f"Error sending screenshot heartbeat: {e}") def screenshot_service_thread(client, client_id): """Background thread for screenshot monitoring and transmission. Runs on a 1-second tick. A heartbeat is sent when either: - display_manager set send_immediately=True in screenshots/meta.json (event_start / event_stop triggered captures); fired within <=1 second, OR - the periodic SCREENSHOT_INTERVAL has elapsed since the last send. The interval timer resets on every send, so a triggered send pushes out the next periodic heartbeat rather than causing a double-send shortly after. """ logging.info(f"Screenshot service started with {SCREENSHOT_INTERVAL}s periodic interval") last_sent = 0.0 last_meta_type = None while True: try: time.sleep(1) now = time.time() meta = _read_and_clear_meta() triggered = bool(meta and meta.get('send_immediately')) interval_due = (now - last_sent) >= SCREENSHOT_INTERVAL if meta: current_type = meta.get('type', 'unknown') if current_type != last_meta_type: logging.debug(f"Meta.json detected: type={current_type}, send_immediately={meta.get('send_immediately')}, file={meta.get('file')}") last_meta_type = current_type if triggered or interval_due: capture_type = meta['type'] if (triggered and meta) else "periodic" if triggered: logging.info(f"Sending triggered screenshot: type={capture_type}") send_screenshot_heartbeat(client, client_id, capture_type, trigger_meta=meta) last_sent = now except Exception as e: logging.error(f"Screenshot service error: {e}") time.sleep(60) # Wait a minute before retrying def main(): global discovered print(f"[{datetime.now(timezone.utc).isoformat()}] simclient.py: program started") logging.info("Client starting - deleting old event file if present") delete_event_file() client_id = get_persistent_uuid() hardware_token = get_hardware_token() ip_addr = get_ip() # Persistenz für group_id (needed in on_connect) group_id_path = os.path.join(os.path.dirname(__file__), "config", "last_group_id.txt") current_group_id = load_last_group_id(group_id_path) event_topic = None power_intent_topic = None last_power_intent_id = None last_power_issued_at = None command_topic = f"infoscreen/{client_id}/commands" command_topic_alias = f"infoscreen/{client_id}/command" processed_commands = load_processed_commands() pending_recovery_command = load_last_command_state() # paho-mqtt v2: opt into latest callback API to avoid deprecation warnings. client_kwargs = {"protocol": mqtt.MQTTv311} try: # Use enum when available (paho-mqtt >= 2.0) if hasattr(mqtt, "CallbackAPIVersion"): client_kwargs["callback_api_version"] = mqtt.CallbackAPIVersion.VERSION2 except Exception: pass client = mqtt.Client(**client_kwargs) client.on_message = on_message configure_mqtt_security(client) # Enable automatic reconnection client.reconnect_delay_set(min_delay=1, max_delay=120) # Connection state tracking connection_state = { "connected": False, "last_disconnect": None, "reconnect_count": 0, "connect_count": 0, } # Optional: Enable MQTT debug logging in DEBUG_MODE if DEBUG_MODE: def on_log(client, userdata, level, buf): logging.debug(f"MQTT: {buf}") client.on_log = on_log # Define subscribe_event_topic BEFORE on_connect so it can be called from the callback def subscribe_event_topic(new_group_id): nonlocal event_topic, current_group_id # Check if group actually changed to handle cleanup group_changed = new_group_id != current_group_id if group_changed: if current_group_id is not None: logging.info(f"Group change from {current_group_id} to {new_group_id} - deleting old event file") delete_event_file() if event_topic: client.unsubscribe(event_topic) logging.info(f"Unsubscribed from event topic: {event_topic}") # Always ensure the event topic is subscribed new_event_topic = f"infoscreen/events/{new_group_id}" # Only subscribe if we don't already have this topic subscribed if event_topic != new_event_topic: if event_topic: client.unsubscribe(event_topic) logging.info(f"Unsubscribed from event topic: {event_topic}") event_topic = new_event_topic client.subscribe(event_topic) logging.info(f"Subscribing to event topic: {event_topic} for group_id: {new_group_id}") else: logging.info(f"Event topic already subscribed: {event_topic}") # Update current group_id and save it if group_changed: current_group_id = new_group_id save_last_group_id(group_id_path, new_group_id) def subscribe_power_intent_topic(new_group_id): nonlocal power_intent_topic if POWER_CONTROL_MODE not in ("hybrid", "mqtt"): return new_topic = f"infoscreen/groups/{new_group_id}/power/intent" if power_intent_topic == new_topic: logging.info(f"Power intent topic already subscribed: {power_intent_topic}") return if power_intent_topic: client.unsubscribe(power_intent_topic) logging.info(f"Unsubscribed from power intent topic: {power_intent_topic}") power_intent_topic = new_topic client.subscribe(power_intent_topic, qos=1) logging.info(f"Subscribed to power intent topic: {power_intent_topic}") # on_connect callback: Subscribe to all topics after connection is established def on_connect(client, userdata, flags, rc, properties=None): if rc == 0: connection_state["connected"] = True connection_state["last_disconnect"] = None connection_state["connect_count"] = connection_state.get("connect_count", 0) + 1 if connection_state["connect_count"] > 1: connection_state["reconnect_count"] = connection_state.get("reconnect_count", 0) + 1 # Check if this is a reconnection # paho-mqtt v2 provides ConnectFlags with attribute 'session_present' # Older versions may provide dict-like flags; default to False. is_reconnect = False try: if hasattr(flags, "session_present"): is_reconnect = bool(getattr(flags, "session_present")) elif isinstance(flags, dict): is_reconnect = bool(flags.get("session present", False)) except Exception: is_reconnect = False if is_reconnect: logging.info("MQTT reconnected successfully - resubscribing to all topics...") else: logging.info("MQTT connected successfully - subscribing to topics...") # Discovery-ACK-Topic abonnieren ack_topic = f"infoscreen/{client_id}/discovery_ack" client.subscribe(ack_topic) logging.info(f"Subscribed to: {ack_topic}") # Config topic client.subscribe(f"infoscreen/{client_id}/config") logging.info(f"Subscribed to: infoscreen/{client_id}/config") # group_id Topic abonnieren (retained) group_id_topic = f"infoscreen/{client_id}/group_id" client.subscribe(group_id_topic) logging.info(f"Subscribed to: {group_id_topic}") # Command topics (canonical + transitional) client.subscribe(command_topic, qos=1) client.subscribe(command_topic_alias, qos=1) logging.info(f"Subscribed to command topics: {command_topic}, {command_topic_alias}") # Wenn beim Start eine group_id vorhanden ist, sofort Event-Topic abonnieren # Reset event_topic so subscribe_event_topic always re-registers with the broker # (broker loses all subscriptions on reconnect, even if our local state still has it) if current_group_id: logging.info(f"Subscribing to event topic for saved group_id: {current_group_id}") nonlocal event_topic event_topic = None # force re-subscribe regardless of previous state subscribe_event_topic(current_group_id) nonlocal power_intent_topic power_intent_topic = None subscribe_power_intent_topic(current_group_id) # Send discovery message after reconnection to re-register with server if is_reconnect: logging.info("Sending discovery after reconnection to re-register with server") send_discovery(client, client_id, hardware_token, ip_addr) else: connection_state["connected"] = False logging.error(f"MQTT connection failed with code: {rc}") # on_disconnect callback (Paho v2 signature) def on_disconnect(client, userdata, disconnect_flags, rc, properties=None): connection_state["connected"] = False connection_state["last_disconnect"] = time.time() if rc == 0: logging.info("MQTT disconnected cleanly") else: logging.warning(f"MQTT disconnected unexpectedly with code: {rc}") logging.info("Automatic reconnection will be attempted...") client.on_connect = on_connect client.on_disconnect = on_disconnect # Robust MQTT connect with fallbacks and retries broker_candidates = [MQTT_BROKER] # Add environment-provided fallbacks broker_candidates.extend([b for b in MQTT_BROKER_FALLBACKS if b not in broker_candidates]) # Add common local fallbacks for alt in ("127.0.0.1", "localhost", "mqtt"): if alt not in broker_candidates: broker_candidates.append(alt) connect_ok = False last_error = None for attempt in range(1, 6): # up to 5 attempts for host in broker_candidates: try: logging.info(f"Connecting to MQTT broker {host}:{MQTT_PORT} (attempt {attempt}/5)...") client.connect(host, MQTT_PORT) connect_ok = True MQTT_HOST_USED = host # noqa: N816 local doc variable break except Exception as e: last_error = e logging.warning(f"MQTT connection to {host}:{MQTT_PORT} failed: {e}") if connect_ok: break backoff = min(5 * attempt, 20) logging.info(f"Retrying connection in {backoff}s...") time.sleep(backoff) if not connect_ok: logging.error(f"MQTT connection failed after multiple attempts: {last_error}") raise last_error # Start the network loop early to begin connection process client.loop_start() logging.info("MQTT network loop started - establishing connection...") # Wait for connection to complete and on_connect callback to fire logging.info("Waiting for initial connection and subscription setup...") connection_timeout = 30 # seconds start_wait = time.time() while not connection_state["connected"] and (time.time() - start_wait) < connection_timeout: time.sleep(0.5) if not connection_state["connected"]: logging.error(f"Failed to establish initial MQTT connection within {connection_timeout}s") raise Exception("MQTT connection timeout") logging.info("Initial connection established, subscription setup complete") # group_id message callback group_id_topic = f"infoscreen/{client_id}/group_id" def on_group_id_message(client, userdata, msg, properties=None): payload = msg.payload.decode().strip() new_group_id = None # Versuche, group_id aus JSON zu extrahieren, sonst als String verwenden try: data = json.loads(payload) if isinstance(data, dict) and "group_id" in data: new_group_id = str(data["group_id"]) else: new_group_id = str(data) except Exception: new_group_id = payload new_group_id = new_group_id.strip() if new_group_id: if new_group_id != current_group_id: logging.info(f"New group_id received: {new_group_id}") else: logging.info(f"group_id unchanged: {new_group_id}, ensuring event topic is subscribed") # Always call subscribe_event_topic to ensure subscription subscribe_event_topic(new_group_id) subscribe_power_intent_topic(new_group_id) else: logging.warning("Empty group_id received!") client.message_callback_add(group_id_topic, on_group_id_message) logging.info(f"Current group_id at start: {current_group_id if current_group_id else 'none'}") def mark_command_processed(command_id, status, error_code=None): processed_commands[command_id] = { "status": status, "error_code": error_code, "processed_at": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"), } persist_processed_commands(processed_commands) def execute_command_action(action): try: proc = subprocess.run( ["sudo", COMMAND_HELPER_PATH, action], timeout=max(1, COMMAND_EXEC_TIMEOUT_SEC), check=False, capture_output=True, text=True, ) except subprocess.TimeoutExpired: return False, "execution_timeout", f"command timed out after {COMMAND_EXEC_TIMEOUT_SEC}s" except PermissionError: return False, "permission_denied_local", "permission denied invoking command helper" except Exception as e: return False, "internal_error", f"internal execution error: {e}" if proc.returncode != 0: stderr = (proc.stderr or "").strip() if proc.returncode in (126, 127): return False, "permission_denied_local", stderr or "command helper unavailable" return False, "execution_failed", stderr or f"helper exited with code {proc.returncode}" return True, None, None def on_command_message(client, userdata, msg, properties=None): payload_text = msg.payload.decode().strip() received_at = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") expires_at = None if not payload_text: publish_command_ack( client, client_id, NIL_COMMAND_ID, "failed", error_code="invalid_schema", error_message="empty command payload", ) return try: payload = json.loads(payload_text) except json.JSONDecodeError as e: publish_command_ack( client, client_id, NIL_COMMAND_ID, "failed", error_code="invalid_schema", error_message=f"invalid JSON: {e}", ) return command_id_hint = _extract_command_id(payload) if isinstance(payload, dict): expires_at = payload.get("expires_at") is_valid, normalized, error_code, error_message = validate_command_payload(payload, client_id) if not is_valid: publish_command_ack( client, client_id, command_id_hint, "failed", error_code=error_code, error_message=error_message, expires_at=expires_at, ) return command_id = normalized["command_id"] expires_at = normalized["expires_at"] action = normalized["action"] if command_id in processed_commands: publish_command_ack( client, client_id, command_id, "failed", error_code="duplicate_command", error_message="command_id already processed", expires_at=expires_at, ) return publish_command_ack( client, client_id, command_id, "accepted", expires_at=expires_at, ) publish_command_ack( client, client_id, command_id, "execution_started", expires_at=expires_at, ) write_last_command_state({ "command_id": command_id, "action": action, "ack_status": "execution_started", "received_at": received_at, "execution_started_at": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"), "expires_at": expires_at, "source_topic": msg.topic, }) ok, exec_error_code, exec_error_message = execute_command_action(action) if ok: if command_requires_recovery_completion(action): if command_mock_reboot_immediate_complete_enabled(action): logging.info( "Mock reboot immediate completion enabled: command_id=%s action=%s", command_id, action, ) else: logging.info( "Command entered recovery completion path: command_id=%s action=%s", command_id, action, ) return logging.info( "Command continuing to immediate completion path: command_id=%s action=%s", command_id, action, ) publish_command_ack( client, client_id, command_id, "completed", expires_at=expires_at, ) mark_command_processed(command_id, "completed") write_last_command_state({ "command_id": command_id, "action": action, "ack_status": "completed", "completed_at": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"), "expires_at": expires_at, }) return publish_command_ack( client, client_id, command_id, "failed", error_code=exec_error_code, error_message=exec_error_message, expires_at=expires_at, ) mark_command_processed(command_id, "failed", error_code=exec_error_code) write_last_command_state({ "command_id": command_id, "action": action, "ack_status": "failed", "error_code": exec_error_code, "error_message": exec_error_message, "failed_at": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"), "expires_at": expires_at, }) client.message_callback_add(command_topic, on_command_message) client.message_callback_add(command_topic_alias, on_command_message) def on_power_intent_message(client, userdata, msg, properties=None): nonlocal last_power_intent_id, last_power_issued_at payload_text = msg.payload.decode().strip() received_at = datetime.now(timezone.utc).isoformat() # A retained null-message clears the topic and arrives as an empty payload. if not payload_text: logging.info("Power intent retained message cleared (empty payload)") write_power_intent_state({ "valid": False, "mode": POWER_CONTROL_MODE, "error": "retained_cleared", "received_at": received_at, "topic": msg.topic, }) return try: payload = json.loads(payload_text) except json.JSONDecodeError as e: logging.warning(f"Invalid power intent JSON: {e}") write_power_intent_state({ "valid": False, "mode": POWER_CONTROL_MODE, "error": f"invalid_json: {e}", "received_at": received_at, "topic": msg.topic, }) return is_valid, normalized, error = validate_power_intent_payload(payload, expected_group_id=current_group_id) if not is_valid: logging.warning(f"Rejected power intent: {error}") write_power_intent_state({ "valid": False, "mode": POWER_CONTROL_MODE, "error": error, "received_at": received_at, "topic": msg.topic, }) return try: issued_dt = _parse_utc_iso(normalized["issued_at"]) except Exception: issued_dt = None if last_power_issued_at and issued_dt and issued_dt < last_power_issued_at: logging.warning( f"Rejected out-of-order power intent {normalized['intent_id']} issued_at={normalized['issued_at']}" ) write_power_intent_state({ "valid": False, "mode": POWER_CONTROL_MODE, "error": "out_of_order_intent", "received_at": received_at, "topic": msg.topic, }) return duplicate_intent_id = normalized["intent_id"] == last_power_intent_id if issued_dt: last_power_issued_at = issued_dt last_power_intent_id = normalized["intent_id"] logging.info( "Power intent accepted: id=%s desired_state=%s reason=%s expires_at=%s duplicate=%s", normalized["intent_id"], normalized["desired_state"], normalized["reason"], normalized["expires_at"], duplicate_intent_id, ) write_power_intent_state({ "valid": True, "mode": POWER_CONTROL_MODE, "received_at": received_at, "topic": msg.topic, "duplicate_intent_id": duplicate_intent_id, "payload": normalized, }) config_topic = f"infoscreen/{client_id}/config" def on_config_message(client, userdata, msg, properties=None): payload = msg.payload.decode().strip() if not payload or payload.lower() in ("null", "none", "empty", "{}"): logging.info("Empty client config received - deleting persisted client settings") delete_client_settings() return try: config_data = json.loads(payload) except json.JSONDecodeError as e: logging.error(f"Invalid JSON in client config message: {e}") return if not isinstance(config_data, dict): logging.warning("Ignoring non-object client config payload") return save_client_settings(config_data) client.message_callback_add(config_topic, on_config_message) if POWER_CONTROL_MODE in ("hybrid", "mqtt"): if current_group_id: subscribe_power_intent_topic(current_group_id) else: logging.info("Power control mode active but no group_id yet; waiting for group assignment") def on_power_intent_dispatch(client, userdata, msg, properties=None): on_power_intent_message(client, userdata, msg, properties) # Register a generic callback so topic changes on group switch do not require re-registration. client.message_callback_add("infoscreen/groups/+/power/intent", on_power_intent_dispatch) logging.info(f"Power control mode active: {POWER_CONTROL_MODE}") else: logging.info(f"Power control mode is local; MQTT power intents disabled") # Discovery-Phase: Sende Discovery bis ACK empfangen # The loop is already started, just wait and send discovery messages discovery_attempts = 0 max_discovery_attempts = 20 while not discovered and discovery_attempts < max_discovery_attempts: if connection_state["connected"]: send_discovery(client, client_id, hardware_token, ip_addr) discovery_attempts += 1 # Wait for ACK, checking every second for _ in range(int(HEARTBEAT_INTERVAL)): if discovered: break time.sleep(1) else: logging.info("Waiting for MQTT connection before sending discovery...") time.sleep(2) if discovered: break if not discovered: logging.warning(f"Discovery ACK not received after {max_discovery_attempts} attempts - continuing anyway") # Start screenshot service in background thread screenshot_thread = threading.Thread( target=screenshot_service_thread, args=(client, client_id), daemon=True ) screenshot_thread.start() logging.info("Screenshot service thread started") power_state_thread = threading.Thread( target=power_state_service_thread, args=(client, client_id), daemon=True, ) power_state_thread.start() logging.info("Power state service thread started") # Heartbeat-Loop with connection state monitoring last_heartbeat = 0 logging.info("Entering heartbeat loop (network loop already running in background thread)") _sd_notify("READY=1") # tell systemd the process is fully initialised while True: try: current_time = time.time() # Check connection state and log warnings if disconnected if not connection_state["connected"]: if connection_state["last_disconnect"]: disconnect_duration = current_time - connection_state["last_disconnect"] logging.warning(f"MQTT disconnected for {disconnect_duration:.1f}s - waiting for reconnection...") else: logging.warning("MQTT not connected - waiting for connection...") # Send heartbeat only when connected if current_time - last_heartbeat >= HEARTBEAT_INTERVAL: if client.is_connected(): result = client.publish(f"infoscreen/{client_id}/heartbeat", "alive", qos=0) if result.rc == mqtt.MQTT_ERR_SUCCESS: logging.info("Heartbeat sent.") # Also send health and screenshot heartbeats publish_health_message(client, client_id, connection_state) if ( isinstance(pending_recovery_command, dict) and pending_recovery_command.get("ack_status") == "execution_started" and pending_recovery_command.get("action") == "reboot_host" and pending_recovery_command.get("command_id") ): recovered_command_id = pending_recovery_command.get("command_id") recovered_expires = pending_recovery_command.get("expires_at") publish_command_ack( client, client_id, recovered_command_id, "completed", expires_at=recovered_expires, ) mark_command_processed(recovered_command_id, "completed") write_last_command_state({ "command_id": recovered_command_id, "action": pending_recovery_command.get("action"), "ack_status": "recovered", "recovered_at": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"), "expires_at": recovered_expires, }) pending_recovery_command = None elif result.rc == mqtt.MQTT_ERR_NO_CONN: logging.debug("Heartbeat publish returned NO_CONN; retrying in 2s...") time.sleep(2) if client.is_connected(): retry = client.publish(f"infoscreen/{client_id}/heartbeat", "alive", qos=0) if retry.rc == mqtt.MQTT_ERR_SUCCESS: logging.info("Heartbeat sent after retry.") publish_health_message(client, client_id, connection_state) else: logging.warning(f"Heartbeat publish failed after retry with code: {retry.rc}") else: logging.warning("Skipping heartbeat retry - MQTT still not connected") else: logging.warning(f"Heartbeat publish failed with code: {result.rc}") else: logging.debug("Skipping heartbeat - MQTT not connected (is_connected=False)") last_heartbeat = current_time _sd_notify("WATCHDOG=1") # kick systemd watchdog each loop iteration time.sleep(5) except KeyboardInterrupt: logging.info("Shutting down gracefully...") client.loop_stop() client.disconnect() break except Exception as e: logging.error(f"Error in main loop: {e}") time.sleep(5) if __name__ == "__main__": main()