feat: crash recovery, service_failed monitoring, broker health fields, command expiry sweep
- Add GET /api/clients/crashed endpoint (process_status=crashed or stale heartbeat) - Add restart_app command action with same lifecycle + lockout as reboot_host - Scheduler: crash auto-recovery loop (CRASH_RECOVERY_ENABLED flag, lockout, MQTT publish) - Scheduler: unconditional command expiry sweep per poll cycle (sweep_expired_commands) - Listener: subscribe to infoscreen/+/service_failed; persist service_failed_at + unit - Listener: extract broker_connection block from health payload; persist reconnect_count + last_disconnect_at - DB migration b1c2d3e4f5a6: service_failed_at, service_failed_unit, mqtt_reconnect_count, mqtt_last_disconnect_at on clients - Add GET /api/clients/service_failed and POST /api/clients/<uuid>/clear_service_failed - Monitoring overview API: include mqtt_reconnect_count + mqtt_last_disconnect_at per client - Frontend: orange service-failed alert panel (hidden when empty, auto-refresh, quittieren action) - Frontend: MQTT reconnect count + last disconnect in client detail panel - MQTT auth hardening: listener/scheduler/server use env credentials; broker enforces allow_anonymous false - Client command lifecycle foundation: ClientCommand model, reboot_host/shutdown_host, full ACK lifecycle - Docs: TECH-CHANGELOG, DEV-CHANGELOG, MQTT_EVENT_PAYLOAD_GUIDE, copilot-instructions updated - Add implementation-plans/, RESTART_VALIDATION_CHECKLIST.md, TODO.md
This commit is contained in:
@@ -19,6 +19,10 @@ services:
|
||||
- DB_CONN=mysql+pymysql://${DB_USER}:${DB_PASSWORD}@db/${DB_NAME}
|
||||
- DB_URL=mysql+pymysql://${DB_USER}:${DB_PASSWORD}@db/${DB_NAME}
|
||||
- API_BASE_URL=http://server:8000
|
||||
- MQTT_BROKER_HOST=${MQTT_BROKER_HOST:-mqtt}
|
||||
- MQTT_BROKER_PORT=${MQTT_BROKER_PORT:-1883}
|
||||
- MQTT_USER=${MQTT_USER}
|
||||
- MQTT_PASSWORD=${MQTT_PASSWORD}
|
||||
- ENV=${ENV:-development}
|
||||
- FLASK_SECRET_KEY=${FLASK_SECRET_KEY:-dev-secret-key-change-in-production}
|
||||
- DEFAULT_SUPERADMIN_USERNAME=${DEFAULT_SUPERADMIN_USERNAME:-superadmin}
|
||||
@@ -70,6 +74,17 @@ services:
|
||||
image: eclipse-mosquitto:2.0.21 # ✅ GUT: Version ist bereits spezifisch
|
||||
container_name: infoscreen-mqtt
|
||||
restart: unless-stopped
|
||||
command: >
|
||||
sh -c 'set -eu;
|
||||
: "$${MQTT_USER:?MQTT_USER not set}";
|
||||
: "$${MQTT_PASSWORD:?MQTT_PASSWORD not set}";
|
||||
touch /mosquitto/config/passwd;
|
||||
chmod 600 /mosquitto/config/passwd;
|
||||
mosquitto_passwd -b /mosquitto/config/passwd "$${MQTT_USER}" "$${MQTT_PASSWORD}";
|
||||
if [ -n "$${MQTT_CANARY_USER:-}" ] && [ -n "$${MQTT_CANARY_PASSWORD:-}" ]; then
|
||||
mosquitto_passwd -b /mosquitto/config/passwd "$${MQTT_CANARY_USER}" "$${MQTT_CANARY_PASSWORD}";
|
||||
fi;
|
||||
exec mosquitto -c /mosquitto/config/mosquitto.conf'
|
||||
volumes:
|
||||
- ./mosquitto/config:/mosquitto/config
|
||||
- ./mosquitto/data:/mosquitto/data
|
||||
@@ -77,13 +92,18 @@ services:
|
||||
ports:
|
||||
- "1883:1883" # Standard MQTT
|
||||
- "9001:9001" # WebSocket (falls benötigt)
|
||||
environment:
|
||||
- MQTT_USER=${MQTT_USER}
|
||||
- MQTT_PASSWORD=${MQTT_PASSWORD}
|
||||
- MQTT_CANARY_USER=${MQTT_CANARY_USER:-}
|
||||
- MQTT_CANARY_PASSWORD=${MQTT_CANARY_PASSWORD:-}
|
||||
networks:
|
||||
- infoscreen-net
|
||||
healthcheck:
|
||||
test:
|
||||
[
|
||||
"CMD-SHELL",
|
||||
"mosquitto_pub -h localhost -t test -m 'health' || exit 1",
|
||||
"mosquitto_pub -h localhost -u $$MQTT_USER -P $$MQTT_PASSWORD -t test -m 'health' || exit 1",
|
||||
]
|
||||
interval: 30s
|
||||
timeout: 5s
|
||||
@@ -169,13 +189,18 @@ services:
|
||||
environment:
|
||||
# HINZUGEFÜGT: Datenbank-Verbindungsstring
|
||||
- DB_CONN=mysql+pymysql://${DB_USER}:${DB_PASSWORD}@db/${DB_NAME}
|
||||
- MQTT_BROKER_URL=mqtt
|
||||
- MQTT_PORT=1883
|
||||
- MQTT_BROKER_HOST=${MQTT_BROKER_HOST:-mqtt}
|
||||
- MQTT_BROKER_PORT=${MQTT_BROKER_PORT:-1883}
|
||||
- MQTT_USER=${MQTT_USER}
|
||||
- MQTT_PASSWORD=${MQTT_PASSWORD}
|
||||
- POLL_INTERVAL_SECONDS=${POLL_INTERVAL_SECONDS:-30}
|
||||
- POWER_INTENT_PUBLISH_ENABLED=${POWER_INTENT_PUBLISH_ENABLED:-false}
|
||||
- POWER_INTENT_HEARTBEAT_ENABLED=${POWER_INTENT_HEARTBEAT_ENABLED:-true}
|
||||
- POWER_INTENT_EXPIRY_MULTIPLIER=${POWER_INTENT_EXPIRY_MULTIPLIER:-3}
|
||||
- POWER_INTENT_MIN_EXPIRY_SECONDS=${POWER_INTENT_MIN_EXPIRY_SECONDS:-90}
|
||||
- CRASH_RECOVERY_ENABLED=${CRASH_RECOVERY_ENABLED:-false}
|
||||
- CRASH_RECOVERY_GRACE_SECONDS=${CRASH_RECOVERY_GRACE_SECONDS:-180}
|
||||
- CRASH_RECOVERY_LOCKOUT_MINUTES=${CRASH_RECOVERY_LOCKOUT_MINUTES:-15}
|
||||
networks:
|
||||
- infoscreen-net
|
||||
|
||||
|
||||
Reference in New Issue
Block a user