diff --git a/.env.template b/.env.template index 4871df9..71270a6 100644 --- a/.env.template +++ b/.env.template @@ -2,6 +2,9 @@ # Copy this file to .env and fill in values appropriate for your environment. # Environment +# IMPORTANT: CEC TV control is automatically DISABLED when ENV=development +# to avoid constantly switching the TV on/off during testing. +# Set to 'production' to enable automatic CEC TV control. ENV=development # development | production DEBUG_MODE=0 # 1 to enable debug mode LOG_LEVEL=INFO # DEBUG | INFO | WARNING | ERROR @@ -12,17 +15,27 @@ MQTT_BROKER= MQTT_PORT=1883 # Timing Configuration (seconds) -HEARTBEAT_INTERVAL=60 -SCREENSHOT_INTERVAL=180 -DISPLAY_CHECK_INTERVAL=15 +HEARTBEAT_INTERVAL=60 # Heartbeat frequency in seconds +SCREENSHOT_INTERVAL=180 # Screenshot capture frequency in seconds +DISPLAY_CHECK_INTERVAL=15 # Display Manager event check frequency in seconds # File/API Server (used to download presentation files) -# If event URLs use host 'server', the client rewrites them to this server. -# You can either set FILE_SERVER_BASE_URL (preferred) or FILE_SERVER_HOST+PORT+SCHEME. -FILE_SERVER_HOST= -FILE_SERVER_PORT=8000 -FILE_SERVER_SCHEME=http -# FILE_SERVER_BASE_URL=http://:8000 # optional, takes precedence +# By default, the client rewrites incoming file URLs that point to 'server' to this host. +# If not set, FILE_SERVER_HOST defaults to the same host as MQTT_BROKER. +# You can also set FILE_SERVER_BASE_URL (e.g., http://192.168.1.10:8000) to override entirely. +FILE_SERVER_HOST= # optional: e.g., 192.168.43.100 +FILE_SERVER_PORT=8000 # default port for API server +FILE_SERVER_SCHEME=http # http or https +# FILE_SERVER_BASE_URL= # optional: takes precedence over the above when set + +# HDMI-CEC TV Control (optional) +# Automatically turn TV on/off based on event scheduling +# NOTE: CEC is automatically DISABLED when ENV=development to avoid constantly switching TV on/off during testing +CEC_ENABLED=true # Enable automatic TV power control in production (true/false) +CEC_DEVICE=0 # Target CEC device (TV, 0, etc.) +CEC_TURN_OFF_DELAY=30 # Seconds to wait before turning off TV after last event ends +CEC_POWER_ON_WAIT=5 # Seconds to wait after power ON command (for TV to boot up) +CEC_POWER_OFF_WAIT=5 # Seconds to wait after power OFF command (increased for slower TVs) # Optional: MQTT authentication (if your broker requires username/password) #MQTT_USERNAME= diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md index 7befffe..8902e18 100644 --- a/.github/copilot-instructions.md +++ b/.github/copilot-instructions.md @@ -33,6 +33,11 @@ The server now performs all PPTX → PDF conversion. The client only ever downlo ### MQTT Communication Patterns - **Discovery**: `infoscreen/discovery` → `infoscreen/{client_id}/discovery_ack` - **Heartbeat**: Regular `infoscreen/{client_id}/heartbeat` messages +### MQTT Reconnection & Heartbeat (Nov 2025) +- The client uses Paho MQTT v2 callback API with `client.loop_start()` and `client.reconnect_delay_set()` to handle automatic reconnection. +- `on_connect` re-subscribes to all topics (`discovery_ack`, `config`, `group_id`, current group events) and re-sends discovery on reconnect to re-register with the server. +- Heartbeats are gated by `client.is_connected()` and retry once on `NO_CONN` (rc=4). Occasional rc=4 warnings are normal right after broker restarts or brief network stalls and typically followed by a successful heartbeat. +- Do not treat single rc=4 heartbeat warnings as failures. Investigate only if multiple consecutive heartbeats fail without recovery. - **Dashboard**: Screenshot transmission via `infoscreen/{client_id}/dashboard` - **Group Assignment**: Server sends group via `infoscreen/{client_id}/group_id` - **Events**: Content commands via `infoscreen/events/{group_id}` diff --git a/README.md b/README.md index e25317b..f79d38e 100644 --- a/README.md +++ b/README.md @@ -469,6 +469,13 @@ sudo systemctl status mosquitto **Try fallback brokers:** Edit `.env` and add `MQTT_BROKER_FALLBACKS` +#### Client auto-reconnect and heartbeat behavior (Nov 2025) +- The MQTT client now uses Paho v2 callbacks and `loop_start()` for automatic reconnection with exponential backoff. +- All topic subscriptions are restored in `on_connect` and a discovery message is re-sent on reconnect to re-register the client. +- Heartbeats are sent only when connected; if publish occurs during a brief reconnect window, Paho may return rc=4 (NO_CONN). The client performs a short retry and logs the outcome. +- Occasional `Heartbeat publish failed with code: 4` after broker restart or transient network hiccups is expected and not dangerous. It indicates "not connected at this instant"; the next heartbeat typically succeeds. +- When to investigate: repeated rc=4 with no succeeding "Heartbeat sent" entries over multiple intervals. + ### Screenshots not uploading **Test screenshot capture:** diff --git a/src/pi-setup.sh b/src/pi-setup.sh index a119716..9d3d877 100644 --- a/src/pi-setup.sh +++ b/src/pi-setup.sh @@ -35,7 +35,7 @@ log_ok "Development tools installed" # 3. Display / presentation dependencies log_step "Installing presentation & display tools (incl. HDMI-CEC)..." -sudo apt install -y chromium-browser libreoffice vlc feh scrot imagemagick xdotool wmctrl cec-utils +sudo apt install -y chromium-browser libreoffice vlc feh scrot imagemagick xdotool wmctrl cec-utils impressive log_ok "Presentation + HDMI-CEC tools installed (cec-utils)" # 4. MQTT tools diff --git a/src/simclient.py b/src/simclient.py index 519a520..4204d5e 100644 --- a/src/simclient.py +++ b/src/simclient.py @@ -552,6 +552,7 @@ def screenshot_service_thread(client, client_id): def main(): global discovered + print(f"[{datetime.now().isoformat()}] simclient.py: program started") logging.info("Client starting - deleting old event file if present") delete_event_file() @@ -575,6 +576,18 @@ def main(): client = mqtt.Client(**client_kwargs) client.on_message = on_message + # Enable automatic reconnection + client.reconnect_delay_set(min_delay=1, max_delay=120) + + # Connection state tracking + connection_state = {"connected": False, "last_disconnect": None} + + # Optional: Enable MQTT debug logging in DEBUG_MODE + if DEBUG_MODE: + def on_log(client, userdata, level, buf): + logging.debug(f"MQTT: {buf}") + client.on_log = on_log + # Define subscribe_event_topic BEFORE on_connect so it can be called from the callback def subscribe_event_topic(new_group_id): nonlocal event_topic, current_group_id @@ -613,7 +626,26 @@ def main(): # on_connect callback: Subscribe to all topics after connection is established def on_connect(client, userdata, flags, rc, properties=None): if rc == 0: - logging.info("MQTT connected successfully - subscribing to topics...") + connection_state["connected"] = True + connection_state["last_disconnect"] = None + + # Check if this is a reconnection + # paho-mqtt v2 provides ConnectFlags with attribute 'session_present' + # Older versions may provide dict-like flags; default to False. + is_reconnect = False + try: + if hasattr(flags, "session_present"): + is_reconnect = bool(getattr(flags, "session_present")) + elif isinstance(flags, dict): + is_reconnect = bool(flags.get("session present", False)) + except Exception: + is_reconnect = False + + if is_reconnect: + logging.info("MQTT reconnected successfully - resubscribing to all topics...") + else: + logging.info("MQTT connected successfully - subscribing to topics...") + # Discovery-ACK-Topic abonnieren ack_topic = f"infoscreen/{client_id}/discovery_ack" client.subscribe(ack_topic) @@ -632,10 +664,28 @@ def main(): if current_group_id: logging.info(f"Subscribing to event topic for saved group_id: {current_group_id}") subscribe_event_topic(current_group_id) + + # Send discovery message after reconnection to re-register with server + if is_reconnect: + logging.info("Sending discovery after reconnection to re-register with server") + send_discovery(client, client_id, hardware_token, ip_addr) else: + connection_state["connected"] = False logging.error(f"MQTT connection failed with code: {rc}") + # on_disconnect callback (Paho v2 signature) + def on_disconnect(client, userdata, disconnect_flags, rc, properties=None): + connection_state["connected"] = False + connection_state["last_disconnect"] = time.time() + + if rc == 0: + logging.info("MQTT disconnected cleanly") + else: + logging.warning(f"MQTT disconnected unexpectedly with code: {rc}") + logging.info("Automatic reconnection will be attempted...") + client.on_connect = on_connect + client.on_disconnect = on_disconnect # Robust MQTT connect with fallbacks and retries broker_candidates = [MQTT_BROKER] @@ -669,13 +719,22 @@ def main(): logging.error(f"MQTT connection failed after multiple attempts: {last_error}") raise last_error + # Start the network loop early to begin connection process + client.loop_start() + logging.info("MQTT network loop started - establishing connection...") + # Wait for connection to complete and on_connect callback to fire - # This ensures subscriptions are set up before we start discovery - logging.info("Waiting for on_connect callback and subscription setup...") - for _ in range(10): # Wait up to ~1 second - client.loop(timeout=0.1) - time.sleep(0.1) - logging.info("Subscription setup complete, starting discovery phase") + logging.info("Waiting for initial connection and subscription setup...") + connection_timeout = 30 # seconds + start_wait = time.time() + while not connection_state["connected"] and (time.time() - start_wait) < connection_timeout: + time.sleep(0.5) + + if not connection_state["connected"]: + logging.error(f"Failed to establish initial MQTT connection within {connection_timeout}s") + raise Exception("MQTT connection timeout") + + logging.info("Initial connection established, subscription setup complete") # group_id message callback group_id_topic = f"infoscreen/{client_id}/group_id" @@ -705,16 +764,27 @@ def main(): logging.info(f"Current group_id at start: {current_group_id if current_group_id else 'none'}") # Discovery-Phase: Sende Discovery bis ACK empfangen - while not discovered: - send_discovery(client, client_id, hardware_token, ip_addr) - # Check for messages and discovered flag more frequently - for _ in range(int(HEARTBEAT_INTERVAL)): - client.loop(timeout=1.0) - if discovered: - break - time.sleep(1) + # The loop is already started, just wait and send discovery messages + discovery_attempts = 0 + max_discovery_attempts = 20 + while not discovered and discovery_attempts < max_discovery_attempts: + if connection_state["connected"]: + send_discovery(client, client_id, hardware_token, ip_addr) + discovery_attempts += 1 + # Wait for ACK, checking every second + for _ in range(int(HEARTBEAT_INTERVAL)): + if discovered: + break + time.sleep(1) + else: + logging.info("Waiting for MQTT connection before sending discovery...") + time.sleep(2) + if discovered: break + + if not discovered: + logging.warning(f"Discovery ACK not received after {max_discovery_attempts} attempts - continuing anyway") # Start screenshot service in background thread screenshot_thread = threading.Thread( @@ -725,16 +795,54 @@ def main(): screenshot_thread.start() logging.info("Screenshot service thread started") - # Heartbeat-Loop + # Heartbeat-Loop with connection state monitoring last_heartbeat = 0 + logging.info("Entering heartbeat loop (network loop already running in background thread)") + while True: - current_time = time.time() - if current_time - last_heartbeat >= HEARTBEAT_INTERVAL: - client.publish(f"infoscreen/{client_id}/heartbeat", "alive") - logging.info("Heartbeat sent.") - last_heartbeat = current_time - client.loop(timeout=5.0) - time.sleep(5) + try: + current_time = time.time() + + # Check connection state and log warnings if disconnected + if not connection_state["connected"]: + if connection_state["last_disconnect"]: + disconnect_duration = current_time - connection_state["last_disconnect"] + logging.warning(f"MQTT disconnected for {disconnect_duration:.1f}s - waiting for reconnection...") + else: + logging.warning("MQTT not connected - waiting for connection...") + + # Send heartbeat only when connected + if current_time - last_heartbeat >= HEARTBEAT_INTERVAL: + if client.is_connected(): + result = client.publish(f"infoscreen/{client_id}/heartbeat", "alive", qos=0) + if result.rc == mqtt.MQTT_ERR_SUCCESS: + logging.info("Heartbeat sent.") + elif result.rc == mqtt.MQTT_ERR_NO_CONN: + logging.debug("Heartbeat publish returned NO_CONN; retrying in 2s...") + time.sleep(2) + if client.is_connected(): + retry = client.publish(f"infoscreen/{client_id}/heartbeat", "alive", qos=0) + if retry.rc == mqtt.MQTT_ERR_SUCCESS: + logging.info("Heartbeat sent after retry.") + else: + logging.warning(f"Heartbeat publish failed after retry with code: {retry.rc}") + else: + logging.warning("Skipping heartbeat retry - MQTT still not connected") + else: + logging.warning(f"Heartbeat publish failed with code: {result.rc}") + else: + logging.debug("Skipping heartbeat - MQTT not connected (is_connected=False)") + last_heartbeat = current_time + + time.sleep(5) + except KeyboardInterrupt: + logging.info("Shutting down gracefully...") + client.loop_stop() + client.disconnect() + break + except Exception as e: + logging.error(f"Error in main loop: {e}") + time.sleep(5) if __name__ == "__main__":