feat: crash recovery, service_failed monitoring, broker health fields, command expiry sweep

- Add GET /api/clients/crashed endpoint (process_status=crashed or stale heartbeat)
- Add restart_app command action with same lifecycle + lockout as reboot_host
- Scheduler: crash auto-recovery loop (CRASH_RECOVERY_ENABLED flag, lockout, MQTT publish)
- Scheduler: unconditional command expiry sweep per poll cycle (sweep_expired_commands)
- Listener: subscribe to infoscreen/+/service_failed; persist service_failed_at + unit
- Listener: extract broker_connection block from health payload; persist reconnect_count + last_disconnect_at
- DB migration b1c2d3e4f5a6: service_failed_at, service_failed_unit, mqtt_reconnect_count, mqtt_last_disconnect_at on clients
- Add GET /api/clients/service_failed and POST /api/clients/<uuid>/clear_service_failed
- Monitoring overview API: include mqtt_reconnect_count + mqtt_last_disconnect_at per client
- Frontend: orange service-failed alert panel (hidden when empty, auto-refresh, quittieren action)
- Frontend: MQTT reconnect count + last disconnect in client detail panel
- MQTT auth hardening: listener/scheduler/server use env credentials; broker enforces allow_anonymous false
- Client command lifecycle foundation: ClientCommand model, reboot_host/shutdown_host, full ACK lifecycle
- Docs: TECH-CHANGELOG, DEV-CHANGELOG, MQTT_EVENT_PAYLOAD_GUIDE, copilot-instructions updated
- Add implementation-plans/, RESTART_VALIDATION_CHECKLIST.md, TODO.md
This commit is contained in:
2026-04-05 10:17:56 +00:00
parent 4d652f0554
commit 03e3c11e90
35 changed files with 2511 additions and 80 deletions

View File

@@ -45,15 +45,37 @@ services:
image: eclipse-mosquitto:2.0.21
container_name: infoscreen-mqtt
restart: unless-stopped
command: >
sh -c 'set -eu;
: "$${MQTT_USER:?MQTT_USER not set}";
: "$${MQTT_PASSWORD:?MQTT_PASSWORD not set}";
touch /mosquitto/config/passwd;
chmod 600 /mosquitto/config/passwd;
mosquitto_passwd -b /mosquitto/config/passwd "$${MQTT_USER}" "$${MQTT_PASSWORD}";
if [ -n "$${MQTT_CANARY_USER:-}" ] && [ -n "$${MQTT_CANARY_PASSWORD:-}" ]; then
mosquitto_passwd -b /mosquitto/config/passwd "$${MQTT_CANARY_USER}" "$${MQTT_CANARY_PASSWORD}";
fi;
exec mosquitto -c /mosquitto/config/mosquitto.conf'
volumes:
- ./mosquitto/config/mosquitto.conf:/mosquitto/config/mosquitto.conf:ro
- ./mosquitto/config:/mosquitto/config
- ./mosquitto/data:/mosquitto/data
- ./mosquitto/log:/mosquitto/log
ports:
- "1883:1883"
- "9001:9001"
environment:
- MQTT_USER=${MQTT_USER}
- MQTT_PASSWORD=${MQTT_PASSWORD}
- MQTT_CANARY_USER=${MQTT_CANARY_USER:-}
- MQTT_CANARY_PASSWORD=${MQTT_CANARY_PASSWORD:-}
networks:
- infoscreen-net
healthcheck:
test: ["CMD-SHELL", "mosquitto_pub -h localhost -t test -m 'health' || exit 1"]
test:
[
"CMD-SHELL",
"mosquitto_pub -h localhost -u $$MQTT_USER -P $$MQTT_PASSWORD -t test -m 'health' || exit 1",
]
interval: 30s
timeout: 5s
retries: 3
@@ -125,6 +147,11 @@ services:
DB_PASSWORD: ${DB_PASSWORD}
DB_NAME: ${DB_NAME}
DB_ROOT_PASSWORD: ${DB_ROOT_PASSWORD}
API_BASE_URL: http://server:8000
MQTT_BROKER_HOST: ${MQTT_BROKER_HOST:-mqtt}
MQTT_BROKER_PORT: ${MQTT_BROKER_PORT:-1883}
MQTT_USER: ${MQTT_USER}
MQTT_PASSWORD: ${MQTT_PASSWORD}
networks:
- infoscreen-net
@@ -141,7 +168,18 @@ services:
environment:
# HINZUGEFÜGT: Datenbank-Verbindungsstring
DB_CONN: "mysql+pymysql://${DB_USER}:${DB_PASSWORD}@db/${DB_NAME}"
MQTT_PORT: 1883
MQTT_BROKER_HOST: ${MQTT_BROKER_HOST:-mqtt}
MQTT_BROKER_PORT: ${MQTT_BROKER_PORT:-1883}
MQTT_USER: ${MQTT_USER}
MQTT_PASSWORD: ${MQTT_PASSWORD}
POLL_INTERVAL_SECONDS: ${POLL_INTERVAL_SECONDS:-30}
POWER_INTENT_PUBLISH_ENABLED: ${POWER_INTENT_PUBLISH_ENABLED:-false}
POWER_INTENT_HEARTBEAT_ENABLED: ${POWER_INTENT_HEARTBEAT_ENABLED:-true}
POWER_INTENT_EXPIRY_MULTIPLIER: ${POWER_INTENT_EXPIRY_MULTIPLIER:-3}
POWER_INTENT_MIN_EXPIRY_SECONDS: ${POWER_INTENT_MIN_EXPIRY_SECONDS:-90}
CRASH_RECOVERY_ENABLED: ${CRASH_RECOVERY_ENABLED:-false}
CRASH_RECOVERY_GRACE_SECONDS: ${CRASH_RECOVERY_GRACE_SECONDS:-180}
CRASH_RECOVERY_LOCKOUT_MINUTES: ${CRASH_RECOVERY_LOCKOUT_MINUTES:-15}
networks:
- infoscreen-net