Compare commits

5 Commits

Author SHA1 Message Date
RobbStarkAustria
3230ec5bb4 fix: move StartLimitIntervalSec/Burst to [Unit] for older systemd compat
Older systemd (< 230, common on Pi OS Buster/Bullseye) rejects these
keys in [Service] with 'Unknown key' warning and ignores them, meaning
no restart rate-limiting was enforced.

Also includes pi-setup.sh fix for hardcoded user/home from previous session.
2026-04-05 09:15:55 +02:00
RobbStarkAustria
fa7efae346 fix: pi-setup.sh substitutes actual username when installing systemd units
Service files had User=olafn and /home/olafn/ hardcoded. pi-setup.sh now
uses ACTUAL_USER=${SUDO_USER:-$USER} and sed-substitutes at install time
so units work on any Pi regardless of the OS user.
2026-04-05 08:59:19 +02:00
RobbStarkAustria
0cd0d95612 feat: remote commands, systemd units, process observability, broker auth split
- Command intake (reboot/shutdown) on infoscreen/{uuid}/commands with ack lifecycle
- MQTT_USER/MQTT_PASSWORD_BROKER split from identity vars; configure_mqtt_security() updated
- infoscreen-simclient.service: Type=notify, WatchdogSec=60, Restart=on-failure
- infoscreen-notify-failure@.service + script: retained MQTT alert when systemd gives up (Gap 3)
- _sd_notify() watchdog keepalive in simclient main loop (Gap 1)
- broker_connection block in health payload: reconnect_count, last_disconnect_at (Gap 2)
- COMMAND_MOCK_REBOOT_IMMEDIATE_COMPLETE canary flag with safety guard
- SERVER_TEAM_ACTIONS.md: server-side integration action items
- Docs: README, CHANGELOG, src/README, copilot-instructions updated
- 43 tests passing
2026-04-05 08:36:50 +02:00
RobbStarkAustria
82f43f75ba docs: refactor docs structure and tighten assistant instruction policy
shrink root README into a landing page with a docs map and focused contributor guidance
add TV_POWER_RUNBOOK as the canonical TV power rollout and canary runbook
add CHANGELOG and move project history out of README-style docs
refactor src README into a developer-focused guide (architecture, runtime files, MQTT, debugging)
prune redundant older HDMI docs and keep a canonical HDMI_CEC_SETUP path
update copilot instructions to a high-signal policy format with strict anti-shadow-README design rules
align references across docs to current files, scripts, and TV power behavior
2026-04-01 10:01:58 +02:00
RobbStarkAustria
fb0980aa88 fix(mqtt): force event topic resubscribe on reconnect [consent: user-approved] 2026-03-31 15:47:10 +02:00
41 changed files with 4710 additions and 2290 deletions

View File

@@ -16,6 +16,17 @@ LOG_LEVEL=INFO # DEBUG | INFO | WARNING | ERROR
# MQTT Broker Configuration # MQTT Broker Configuration
MQTT_BROKER=<your-mqtt-broker-host-or-ip> # Change to your MQTT server IP MQTT_BROKER=<your-mqtt-broker-host-or-ip> # Change to your MQTT server IP
MQTT_PORT=1883 MQTT_PORT=1883
# Broker login used by simclient to connect to MQTT
MQTT_USER=<broker-username>
MQTT_PASSWORD_BROKER=<broker-password>
# Optional per-device identity credentials (legacy fallback)
MQTT_USERNAME=infoscreen-client-<client-uuid-prefix>
MQTT_PASSWORD=<set-per-device-20-char-random-password>
MQTT_TLS_ENABLED=0 # 1 when broker TLS is enabled for this client
# MQTT_TLS_CA_CERT=/etc/infoscreen/mqtt/ca.crt
# MQTT_TLS_CERT=/etc/infoscreen/mqtt/client.crt
# MQTT_TLS_KEY=/etc/infoscreen/mqtt/client.key
# MQTT_TLS_INSECURE=0 # only for controlled test environments
# Timing Configuration (quieter intervals for productive test) # Timing Configuration (quieter intervals for productive test)
HEARTBEAT_INTERVAL=60 # Heartbeat frequency in seconds HEARTBEAT_INTERVAL=60 # Heartbeat frequency in seconds
@@ -41,14 +52,32 @@ CEC_TURN_OFF_DELAY=30 # Seconds to wait before turning off TV after last
CEC_POWER_ON_WAIT=5 # Seconds to wait after power ON command (for TV to boot up) CEC_POWER_ON_WAIT=5 # Seconds to wait after power ON command (for TV to boot up)
CEC_POWER_OFF_WAIT=5 # Seconds to wait after power OFF command (increased for slower TVs) CEC_POWER_OFF_WAIT=5 # Seconds to wait after power OFF command (increased for slower TVs)
# Optional: MQTT authentication (if your broker requires username/password) # TV Power Intent (MQTT-based coordinated power control, Phase 1)
#MQTT_USERNAME= # Controls how the display manager decides whether to use local CEC or server-issued intent.
#MQTT_PASSWORD= # local — ignore MQTT intents; all power decisions are local (safe default for rollout)
# hybrid — prefer MQTT intent when present and valid; fall back to local CEC if not
# mqtt — MQTT intent is authoritative; local CEC only fires as last-resort guard
# See README.md "TV Power Intent — Rollout Runbook" before changing from 'local'.
POWER_CONTROL_MODE=hybrid # local | hybrid | mqtt
# Reboot/Shutdown command handling
# Helper installed by ./scripts/install-command-helper.sh
COMMAND_HELPER_PATH=/usr/local/bin/infoscreen-cmd-helper.sh
# Mock mode (safe canary): uncomment next line and comment the live path above
# COMMAND_HELPER_PATH=/home/olafn/infoscreen-dev/scripts/mock-command-helper.sh
# Timeout for helper execution (seconds)
COMMAND_EXEC_TIMEOUT_SEC=15
# Test mode: for reboot_host with mock helper, send completed without restart (0/1)
COMMAND_MOCK_REBOOT_IMMEDIATE_COMPLETE=0
# Command deduplication retention window (hours)
COMMAND_DEDUPE_TTL_HOURS=24
# Maximum processed command IDs kept in dedupe cache
COMMAND_DEDUPE_MAX_ENTRIES=5000
# MQTT authentication
# Use a per-client service account. Keep this file mode 600 on the device.
# Optional TLS settings (if using secure MQTT) # Optional TLS settings (if using secure MQTT)
#MQTT_TLS_CA_CERT=
#MQTT_TLS_CERT=
#MQTT_TLS_KEY=
# Notes: # Notes:
# - Keep actual secrets and host-specific values in a local .env file that is NOT committed. # - Keep actual secrets and host-specific values in a local .env file that is NOT committed.

View File

@@ -1,596 +1,128 @@
# Copilot Instructions - Infoscreen Client # Copilot Instructions - Infoscreen Client
## Quick Start for AI Assistants ## Purpose
This file defines durable, high-signal instructions for AI assistants working in this repository.
### Critical Rules ## Instruction File Design Rules
-**ALWAYS use Impressive** for PDF presentations (has native auto-advance/loop)
-**NEVER suggest xdotool** approaches (failed on Raspberry Pi due to focus issues)
-**NEVER suggest video conversion** (adds complexity, had black screen issues)
-**Virtual environment MUST have** pygame + pillow (required for Impressive)
-**Client-side resize/compress** screenshots before MQTT transmission
-**Server renders PPTX → PDF via Gotenberg** (client only displays PDFs, no LibreOffice needed)
-**Keep screenshot consent notice in docs** when describing dashboard screenshot feature
-**Event-start/event-stop screenshots must preserve metadata** - See SCREENSHOT_MQTT_FIX.md for critical race condition that was fixed
-**Screenshot updates must keep `latest.jpg` and `meta.json` in sync** (simclient prefers `latest.jpg`)
-**Dashboard payload uses grouped v2 schema** (`message/content/runtime/metadata`, `schema_version="2.0"`)
-**Event-triggered screenshots**: `display_manager` arms a `threading.Timer` after start/stop, captures, writes `meta.json` with `send_immediately=true`; simclient fires within ≤1s
-**Payload assembly is centralized** in `_build_dashboard_payload()` — do not build dashboard JSON at call sites
### Key Files & Locations Treat this file as policy, not as project handbook.
- **Display logic**: `src/display_manager.py` (controls presentations/video/web)
- **MQTT client**: `src/simclient.py` (event management, heartbeat, discovery)
- **Runtime state**: `src/current_event.json` (current active event)
- **Process health bridge**: `src/current_process_health.json` (display_manager -> simclient)
- **Config**: `src/config/client_uuid.txt`, `src/config/last_group_id.txt`, `.env`
- **Logs**: `logs/display_manager.log`, `logs/simclient.log`, `logs/monitoring.log`
- **Screenshots**: `src/screenshots/` (shared volume between processes)
### Common Tasks Quick Reference - Scope rule: keep only durable constraints, architectural invariants, and high-value task pointers for assistants.
| Task | File | Key Method/Section | - Size rule: target 80-140 lines; hard cap 180 lines.
|------|------|-------------------| - Canonical-doc rule: link to specialist docs for operational depth instead of copying their content.
| Add event type | `display_manager.py` | `start_display_for_event()` | - Single-source rule: each topic has one canonical document; this file should only reference it.
| Modify presentation | `display_manager.py` | `start_presentation()` | - No shadow-README rule: do not add long setup guides, full command catalogs, troubleshooting playbooks, or large directory trees.
| Modify process monitoring | `display_manager.py` | `ProcessHealthState`, `process_events()` |
| Publish health/log topics | `simclient.py` | `read_health_state()`, `publish_health_message()`, `publish_log_message()` |
| Change MQTT topics | `simclient.py` | Topic constants/handlers |
| Update screenshot | `display_manager.py` | `_capture_screenshot()` |
| File downloads | `simclient.py` | `resolve_file_url()` |
--- Allowed content:
## Project Overview - Critical do/don't rules.
**Infoscreen Client** - Digital signage system for Raspberry Pi. Displays presentations, videos, and web content in kiosk mode. Server-managed via MQTT for educational/research environments with multiple displays. - Short architecture snapshot.
- Runtime coordination file map.
- Minimal task pointers to key methods.
- Documentation policy for where detailed content belongs.
**Architecture**: Two-process design Disallowed content:
- `simclient.py` - MQTT communication (container/native)
- `display_manager.py` - Display control (host OS with X11/Wayland access)
## Architecture & Technology Stack - Comprehensive installation/deployment tutorials.
- Large environment-variable reference sections.
- Extended troubleshooting matrices.
- Repeated feature deep-dives already documented elsewhere.
- Historical release notes (keep those in `CHANGELOG.md`).
### Core Technologies Update checklist for contributors:
- **Python 3.x** - Main application language
- **MQTT (paho-mqtt)** - Real-time messaging with server
- **Impressive** - PDF presenter with native auto-advance and loop support
- **Environment Variables** - Configuration management via `.env` files
- **JSON** - Data exchange format for events and configuration
- **Base64** - Screenshot transmission encoding
- **Threading** - Background services (screenshot monitoring)
### System Components 1. Is the new text a durable assistant rule or invariant?
- **Main Client** (`simclient.py`) - Core MQTT client and event processor 2. If it is operational detail, did you place it in the specialist doc and only link it here?
- **Display Manager** (`display_manager.py`) - Controls display applications (presentations, videos, web) 3. Did you avoid duplicating existing docs?
- **Discovery System** - Automatic client registration with server 4. Does this file remain below the hard cap?
- **Heartbeat Monitoring** - Regular status updates and keepalive
- **Event Processing** - Handles presentation/content switching commands
- **Screenshot Service** - Dashboard monitoring via image capture (captured by display_manager.py, transmitted by simclient.py)
- **File Management** - Downloads and manages presentation files
- **Group Management** - Supports organizing clients into groups
## Key Features & Functionality Use specialist docs for deep operational details:
### MQTT Communication Patterns - `README.md` (landing page + docs map)
- **Discovery**: `infoscreen/discovery``infoscreen/{client_id}/discovery_ack` - `TV_POWER_RUNBOOK.md` (TV power rollout and canary)
- **Heartbeat**: Regular `infoscreen/{client_id}/heartbeat` messages - `TV_POWER_INTENT_SERVER_CONTRACT_V1.md` (frozen contract)
- **Health**: `infoscreen/{client_id}/health` (event/process/pid/status) - `IMPRESSIVE_INTEGRATION.md` (presentation behavior)
- **Client logs**: `infoscreen/{client_id}/logs/error|warn` (selective forwarding) - `HDMI_CEC_SETUP.md` (CEC setup/troubleshooting)
### MQTT Reconnection & Heartbeat (Nov 2025) - `SCREENSHOT_MQTT_FIX.md` (screenshot race-condition fixes)
- The client uses Paho MQTT v2 callback API with `client.loop_start()` and `client.reconnect_delay_set()` to handle automatic reconnection. - `src/README.md` (developer-focused architecture/debugging)
- `on_connect` re-subscribes to all topics (`discovery_ack`, `config`, `group_id`, current group events) and re-sends discovery on reconnect to re-register with the server. - `SERVER_TEAM_ACTIONS.md` (server-side integration action items)
- Heartbeats are gated by `client.is_connected()` and retry once on `NO_CONN` (rc=4). Occasional rc=4 warnings are normal right after broker restarts or brief network stalls and typically followed by a successful heartbeat.
- Do not treat single rc=4 heartbeat warnings as failures. Investigate only if multiple consecutive heartbeats fail without recovery.
- **Dashboard**: Screenshot transmission via `infoscreen/{client_id}/dashboard` (includes base64-encoded screenshot, timestamp, client status, system info)
- **Group Assignment**: Server sends group via `infoscreen/{client_id}/group_id`
- **Events**: Content commands via `infoscreen/events/{group_id}`
### Event Types Supported ## Critical Rules
```json
{
"presentation": {
"files": [{"url": "https://server/file.pptx", "filename": "file.pptx"}],
"auto_advance": true,
"slide_interval": 10,
"loop": true
},
"web": {
"url": "https://example.com"
},
"video": {
"url": "https://server/video.mp4",
"loop": false,
"autoplay": true,
"volume": 0.8
}
}
```
### Presentation System (Impressive-Based) - ALWAYS use Impressive for PDF presentations.
- **Server-side conversion**: PPTX files are converted to PDF by the server using Gotenberg - NEVER suggest xdotool-based slideshow control.
- **Client receives PDFs**: All presentations arrive as pre-rendered PDF files - NEVER suggest converting presentations to video as a workaround.
- **Direct display**: PDF files are displayed natively with Impressive (no client-side conversion) - Virtual environment must include `pygame` and `pillow` for Impressive.
- **Auto-advance**: Native Impressive `--auto` parameter (no xdotool needed) - Keep screenshot consent notice in docs when describing dashboard screenshots.
- **Loop mode**: Impressive `--wrap` parameter for infinite looping - Keep screenshot updates consistent between `latest.jpg` and `meta.json`.
- **Auto-quit**: Impressive `--autoquit` parameter to exit after last slide - Event-trigger screenshots must preserve metadata and send quickly (`send_immediately=true`).
- **Virtual Environment**: Uses venv with pygame + pillow for reliable operation - Dashboard payload must stay grouped v2 (`message/content/runtime/metadata`, `schema_version="2.0"`).
- **Reliable**: Works consistently on Raspberry Pi without window focus issues - Dashboard payload assembly is centralized in `_build_dashboard_payload()`.
- Root `README.md` is a landing page; do not re-expand it into a full manual.
- TV power rollout guidance lives in `TV_POWER_RUNBOOK.md`.
- TV power contract truth lives in `TV_POWER_INTENT_SERVER_CONTRACT_V1.md`.
- `MQTT_USER`/`MQTT_PASSWORD_BROKER` are broker login credentials; `MQTT_USERNAME`/`MQTT_PASSWORD` are legacy identity fields. Never confuse the two.
### Client Identification ## Architecture Snapshot
- **Hardware Token**: SHA256 hash of serial number + MAC addresses
- **Persistent UUID**: Stored in `config/client_uuid.txt`
- **Group Membership**: Persistent group assignment in `config/last_group_id.txt`
## Directory Structure Two-process design:
```
~/infoscreen-dev/
├── .env # Environment configuration
├── README.md # Complete project documentation
├── IMPRESSIVE_INTEGRATION.md # Presentation system details
├── QUICK_REFERENCE.md # Quick command reference
├── .github/ # GitHub configuration
│ └── copilot-instructions.md
├── src/ # Source code
│ ├── simclient.py # MQTT client (event management)
│ ├── display_manager.py # Display controller (Impressive integration)
│ ├── current_event.json # Current active event (runtime)
│ ├── config/ # Persistent client data
│ │ ├── client_uuid.txt
│ │ └── last_group_id.txt
│ ├── presentation/ # Downloaded presentation files & PDFs
│ └── screenshots/ # Screenshot captures for monitoring
├── scripts/ # Production & testing utilities
│ ├── start-dev.sh # Start development client
│ ├── start-display-manager.sh # Start Display Manager
│ ├── test-display-manager.sh # Interactive testing menu
│ ├── test-impressive.sh # Test Impressive (auto-quit)
│ ├── test-impressive-loop.sh # Test Impressive (loop mode)
│ ├── test-mqtt.sh # MQTT connectivity test
│ ├── test-screenshot.sh # Screenshot capture test
│ └── present-pdf-auto-advance.sh # PDF presentation wrapper
├── logs/ # Application logs
│ ├── simclient.log
│ └── display_manager.log
└── venv/ # Python virtual environment
```
## Configuration & Environment Variables - `src/simclient.py`: MQTT communication, discovery, group assignment, event intake, heartbeat, dashboard publish, power intent ingestion, remote command intake.
- `src/display_manager.py`: content display lifecycle, HDMI-CEC, screenshot capture, runtime process health.
### Development vs Production Runtime coordination files:
- **Development**: `ENV=development`, verbose logging, frequent heartbeats
- **Production**: `ENV=production`, minimal logging, longer intervals
HDMI-CEC behavior: - `src/current_event.json` (active event)
- In development mode (`ENV=development`) the Display Manager automatically disables HDMI-CEC to avoid constantly switching the TV during local testing. The test helper `scripts/test-hdmi-cec.sh` also respects this: option 5 (Display Manager CEC integration) detects dev mode and skips running CEC commands. Manual options (14) still work for direct `cec-client` checks. - `src/current_process_health.json` (health bridge)
- `src/power_intent_state.json` (simclient -> display_manager)
- `src/power_state.json` (display_manager -> simclient -> MQTT)
- `src/screenshots/meta.json` and `src/screenshots/latest.jpg`
### Key Environment Variables ## TV Power Coordination Rules
```bash
# Environment
ENV=development|production
DEBUG_MODE=1|0
LOG_LEVEL=DEBUG|INFO|WARNING|ERROR
# MQTT Configuration - `POWER_CONTROL_MODE` supports: `local`, `hybrid`, `mqtt`.
MQTT_BROKER=192.168.1.100 # Primary MQTT broker - Phase 1 intent topic is group-scoped: `infoscreen/groups/{group_id}/power/intent`.
MQTT_PORT=1883 # MQTT port - In hybrid mode, valid fresh MQTT intent is preferred with local fallback behavior.
MQTT_BROKER_FALLBACKS=host1,host2 # Fallback brokers - Retained clear is an empty payload and should be handled cleanly (not as broken JSON).
- Use `scripts/test-power-intent.sh` for ON/OFF, stale, malformed, retained-clear, and telemetry checks.
# Timing (seconds) ## HDMI-CEC Rules
HEARTBEAT_INTERVAL=10 # Status update frequency
SCREENSHOT_INTERVAL=30 # Dashboard screenshot transmission frequency (simclient.py)
SCREENSHOT_CAPTURE_INTERVAL=30 # Screenshot capture frequency (display_manager.py)
# Screenshot Configuration - In `ENV=development`, display manager automatically disables CEC.
SCREENSHOT_MAX_WIDTH=800 # Downscale width (preserves aspect ratio) - `scripts/test-hdmi-cec.sh` integration path respects development mode; manual CEC options still work.
SCREENSHOT_JPEG_QUALITY=70 # JPEG compression quality (1-95) - Keep delayed turn-off behavior safe across adjacent events.
SCREENSHOT_MAX_FILES=20 # Number of screenshots to keep (rotation)
SCREENSHOT_ALWAYS=0 # Force capture even when no display active (testing)
# File/API Server (used to download presentation files) ## Screenshot System Rules
# Defaults to the same host as MQTT_BROKER, port 8000, scheme http.
# If incoming event URLs use host 'server' (or are host-less), simclient rewrites them to this server.
FILE_SERVER_HOST= # optional; if empty, defaults to MQTT_BROKER
FILE_SERVER_PORT=8000 # default API port
FILE_SERVER_SCHEME=http # http or https
# FILE_SERVER_BASE_URL= # optional full override, e.g., http://192.168.1.100:8000
```
### File Server URL Resolution - Capture is performed by `display_manager.py`; transmission by `simclient.py`.
- The MQTT client (`simclient.py`) downloads presentation files listed in events. - Keep event-trigger screenshot behavior intact (`event_start` / `event_stop`).
- To avoid DNS issues when event URLs use `http://server:8000/...`, the client normalizes such URLs to the configured file server. - Maintain one-second responsiveness for triggered send handling.
- By default, the file server host is the same as `MQTT_BROKER`, with port `8000` and scheme `http`. - Prefer `latest.jpg` for dashboard transmission, with safe fallback to newest timestamped file.
- You can override behavior using `.env` variables above; `FILE_SERVER_BASE_URL` takes precedence over individual host/port/scheme.
- Inline comments in `.env` are supported; keep comments after a space and `#` so values stay clean.
## Development Patterns & Best Practices ## Common Task Pointers
### Error Handling - Add event type: `src/display_manager.py` -> `start_display_for_event()`
- Robust MQTT connection with fallbacks and retries - Presentation behavior: `src/display_manager.py` -> `start_presentation()`
- Graceful degradation when services unavailable - Power intent validation: `src/simclient.py` -> `validate_power_intent_payload()`
- Comprehensive logging with rotating file handlers - Power intent application: `src/display_manager.py` -> `_apply_mqtt_power_intent()`
- Exception handling for all external operations - Screenshot capture logic: `src/display_manager.py` -> `_capture_screenshot()`
- Dashboard payload: `src/simclient.py` -> `_build_dashboard_payload()`
- Remote command intake: `src/simclient.py` -> `on_command_message()`
- Command validation: `src/simclient.py` -> `validate_command_payload()`
- File URL rewriting: `src/simclient.py` -> `resolve_file_url()`
## Documentation Policy
### State Management When updating docs:
- Event state persisted in `current_event.json`
- Client configuration persisted across restarts - Keep `README.md` concise and link-heavy.
- Group membership maintained with server synchronization - Put rollout/runbook content into specialist docs (for example `TV_POWER_RUNBOOK.md`).
- Clean state transitions (delete old events on group changes) - Keep implementation history in `CHANGELOG.md`.
- Prefer updating one canonical doc per topic instead of duplicating the same content in multiple files.
### Threading Architecture ## Assistant Workflow Expectations
- Main thread: MQTT communication and heartbeat
- Background thread: Screenshot monitoring service - Prefer minimal, targeted changes.
- Thread-safe operations for shared resources - Preserve existing behavior unless explicitly changing it.
- Validate changes with relevant scripts/log checks where possible.
### File Operations - Keep references and examples aligned with current files and topics.
- Automatic directory creation for all output paths
- Safe file operations with proper exception handling
- Atomic writes for configuration files
- Automatic cleanup of temporary/outdated files
## Development Workflow
### Local Development Setup
1. Clone repository to `~/infoscreen-dev`
2. Create virtual environment: `python3 -m venv venv`
3. Install dependencies: `pip install -r src/requirements.txt` (includes pygame + pillow for PDF slideshows)
4. Configure `.env` file with MQTT broker settings
5. Use `./scripts/start-dev.sh` for MQTT client or `./scripts/start-display-manager.sh` for display manager
6. **Important**: Virtual environment must include pygame and pillow for PDF auto-advance to work
### Testing Components
- `./scripts/test-mqtt.sh` - MQTT connectivity
- `./scripts/test-screenshot.sh` - Screenshot capture
- `./scripts/test-display-manager.sh` - Interactive testing menu
- `./scripts/test-impressive.sh` - Test auto-quit presentation mode
- `./scripts/test-impressive-loop.sh` - Test loop presentation mode
- `./scripts/test-utc-timestamps.sh` - Event timing validation
- Manual event testing via mosquitto_pub or test-display-manager.sh
### Production Deployment
- Docker containerization available (`docker-compose.production.yml`)
- Systemd service integration for auto-start
- Resource limits and health checks configured
- Persistent volume mounts for data
### System Dependencies
- Python 3.x runtime + virtual environment
- MQTT broker connectivity
- Display server: X11 or Wayland (for screenshots)
- **Impressive** - PDF presenter (primary tool, requires pygame + pillow in venv)
- **Chromium/Chrome** - Web kiosk mode
- **VLC** - Video playback (python-vlc preferred, vlc binary fallback)
- **Screenshot tools**:
- X11: `scrot` or `import` (ImageMagick) or `xwd`+`convert`
- Wayland: `grim` or `gnome-screenshot` or `spectacle`
**Note:** LibreOffice is NOT required on the client. PPTX→PDF conversion is handled server-side by Gotenberg.
### Video Playback (python-vlc)
- **Preferred**: python-vlc (programmatic control: autoplay, loop, volume)
- **Fallback**: External vlc binary
- **Fields**: `url`, `autoplay` (bool), `loop` (bool), `volume` (0.0-1.0 → 0-100)
- **URL rewriting**: `server` host → configured file server
- **Fullscreen**: enforced for python-vlc on startup (with short retry toggles); external fallback uses `--fullscreen`
- **External VLC audio**: `muted=true` (or effective volume 0%) starts with `--no-audio`; otherwise startup loudness is applied via `--gain=<0.00-1.00>`
- **Runtime volume semantics**: python-vlc supports live updates; external VLC fallback is startup-parameter based
- **Monitoring PID semantics**: python-vlc runs in-process, so PID is `display_manager.py` runtime PID; external fallback uses external `vlc` PID
- **HW decode errors**: `h264_v4l2m2m` failures are normal if V4L2 M2M unavailable; use software decode
- Robust payload parsing with fallbacks
- Topic-specific message handlers
- Retained message support where appropriate
### Logging & Timestamp Policy (Mar 2026)
- Client logs are standardized to UTC with `Z` suffix to avoid DST/localtime drift.
- Applies to `display_manager.log`, `simclient.log`, and `monitoring.log`.
- MQTT payload timestamps for heartbeat/dashboard/health/log messages are UTC ISO timestamps.
- Screenshot metadata timestamps included by `simclient.py` are UTC ISO timestamps.
- Prefer UTC-aware calls (`datetime.now(timezone.utc)`) and UTC log formatters for new code.
## Hardware Considerations
### Target Platform
- **Primary**: Raspberry Pi 4/5 with desktop environment
- **Storage**: SSD recommended for performance
- **Display**: HDMI output for presentation display
- **Network**: WiFi or Ethernet connectivity required
### System Dependencies
- Python 3.x runtime
- Network connectivity for MQTT
- Display server (X11 or Wayland) for screenshot capture
- **Impressive** - PDF presenter with auto-advance (primary presentation tool)
- **pygame** - Required for Impressive (installed in venv)
- **Pillow/PIL** - Required for Impressive PDF rendering (installed in venv)
- Chromium/Chrome - Web content display (kiosk mode)
- VLC or MPV - Video playback
**Note:** LibreOffice is NOT needed on the client. The server converts PPTX to PDF using Gotenberg.
### Video playback details (python-vlc)
- The Display Manager now prefers using python-vlc (libvlc) when available for video playback. This enables programmatic control (autoplay, loop, volume) and cleaner termination/cleanup. If python-vlc is not available, the external `vlc` binary is used as a fallback.
- Supported video event fields: `url`, `autoplay` (boolean), `loop` (boolean), `volume` (float 0.0-1.0). The manager converts `volume` to VLC's 0-100 scale.
- External VLC fallback applies audio at startup: `--no-audio` when muted/effective 0%, otherwise `--gain` from effective volume.
- Live volume adjustments are reliable in python-vlc mode; external VLC fallback uses startup parameters and should be treated as static per launch.
- URLs using the placeholder host `server` (for example `http://server:8000/...`) are rewritten to the configured file server before playback. The resolution priority is: `FILE_SERVER_BASE_URL` > `FILE_SERVER_HOST` (or `MQTT_BROKER`) + `FILE_SERVER_PORT` + `FILE_SERVER_SCHEME`.
- Hardware-accelerated decoding errors (e.g., `h264_v4l2m2m`) may appear when the platform does not expose a V4L2 M2M device. To avoid these errors the Display Manager can be configured to disable hw-decoding (see README env var `VLC_HW_ACCEL`). By default the manager will attempt hw-acceleration when libvlc supports it.
- Fullscreen / kiosk: the manager will attempt to make libVLC windows fullscreen (remove decorations) when using python-vlc, and the README contains recommended system-level kiosk/session setup for a truly panel-free fullscreen experience.
## Security & Privacy
### Data Protection
- Hardware identification via cryptographic hash
- No sensitive data in plain text logs
- Local storage of minimal required data only
- Secure MQTT communication (configurable)
### Network Security
- Configurable MQTT authentication (if broker requires)
- Firewall-friendly design (outbound connections only)
- Multiple broker fallback for reliability
## Presentation System Architecture
### How It Works
1. **Server-side Conversion** → Server converts PPTX to PDF using Gotenberg
2. **Event Received** → Client receives event with pre-rendered PDF file reference
3. **Download PDF** → Client downloads PDF from file server
4. **Cache PDF** → Downloaded PDF stored in `presentation/` directory
5. **Display with Impressive** → Launch with venv environment and parameters:
- `--fullscreen` - Full screen mode
- `--nooverview` - No slide overview
- `--auto N` - Auto-advance every N seconds
- `--wrap` - Loop infinitely (if `loop: true`)
- `--autoquit` - Exit after last slide (if `loop: false`)
### Key Parameters
| Parameter | Type | Default | Description |
|-----------|------|---------|-------------|
| `auto_advance` | boolean | `false` | Enable automatic slide advancement |
| `slide_interval` | integer | `10` | Seconds between slides |
| `loop` | boolean | `false` | Loop presentation vs. quit after last slide |
### Why Impressive?
-**Native auto-advance** - No xdotool or window management hacks
-**Built-in loop support** - Reliable `--wrap` parameter
-**Works on Raspberry Pi** - No focus/window issues
-**Simple integration** - Clean command-line interface
-**Maintainable** - ~50 lines of code vs. 200+ with xdotool approaches
### Implementation Location
- **File**: `src/display_manager.py`
- **Method**: `start_presentation()`
- **Key Logic**:
1. Receive event with PDF file reference (server already converted PPTX)
2. Download PDF file if not cached
3. Set up virtual environment for Impressive (pygame + pillow)
4. Build Impressive command with appropriate parameters
5. Launch process and monitor
## Common Development Tasks
When working on this codebase:
1. **Adding new event types**: Extend the event processing logic in `display_manager.py``start_display_for_event()`
2. **Modifying presentation behavior**: Update `display_manager.py``start_presentation()`
3. **Configuration changes**: Update environment variable parsing and validation
4. **MQTT topics**: Follow the established `infoscreen/` namespace pattern
5. **Error handling**: Always include comprehensive logging and graceful fallbacks
6. **State persistence**: Use the established `config/` directory pattern
7. **Testing**: Use `./scripts/test-display-manager.sh` for interactive testing
8. **Presentation testing**: Use `./scripts/test-impressive*.sh` scripts
9. **File download host resolution**: If the API server differs from the MQTT broker or uses HTTPS, set `FILE_SERVER_*` in `.env` or adjust `resolve_file_url()` in `src/simclient.py`.
## Troubleshooting Guidelines
### Common Issues
- **MQTT Connection**: Check broker reachability, try fallback brokers
- **Screenshots**: Verify display environment and permissions
- **File Downloads**: Check network connectivity and disk space
- If event URLs use host `server` and DNS fails, the client rewrites to `MQTT_BROKER` by default.
- Ensure `MQTT_BROKER` points to the correct server IP; if the API differs, set `FILE_SERVER_HOST` or `FILE_SERVER_BASE_URL`.
- Match scheme/port via `FILE_SERVER_SCHEME`/`FILE_SERVER_PORT` for HTTPS or non-default ports.
- **Group Changes**: Monitor log for group assignment messages
- **Service Startup**: Check systemd logs and environment configuration
### Debugging Tools
- Log files in `logs/simclient.log` and `logs/display_manager.log` with rotation
- MQTT message monitoring with mosquitto_sub
- Interactive testing menu: `./scripts/test-display-manager.sh`
- Component test scripts: `test-impressive*.sh`, `test-mqtt.sh`, etc.
- Process monitoring: Check for `impressive`, `libreoffice`, `chromium`, `vlc` processes
### File download URL troubleshooting
- Symptoms:
- `Failed to resolve 'server'` or `NameResolutionError` when downloading files
- `Invalid URL 'http # http or https://...'` in `simclient.log`
- What to check:
- Look for lines like `Lade Datei herunter von:` in `logs/simclient.log` to see the effective URL used
- Ensure the URL host is the MQTT broker IP (or your configured file server), not `server`
- Verify `.env` values dont include inline comments as part of the value (e.g., keep `FILE_SERVER_SCHEME=http` on its own line)
- Fixes:
- If your API is on the same host as the broker: leave `FILE_SERVER_HOST` empty (defaults to `MQTT_BROKER`), keep `FILE_SERVER_PORT=8000`, and set `FILE_SERVER_SCHEME=http` or `https`
- To override fully, set `FILE_SERVER_BASE_URL` (e.g., `http://192.168.1.100:8000`); this takes precedence
- After changing `.env`, restart the simclient process
- Expected healthy log sequence:
- `Lade Datei herunter von: http://<broker-ip>:8000/...`
- Followed by `"GET /... HTTP/1.1" 200` and `Datei erfolgreich heruntergeladen:`
### Screenshot MQTT Transmission Issue (Event-Start/Event-Stop)
- **Symptom**: Event-triggered screenshots (event_start, event_stop) are NOT appearing on the dashboard, only periodic screenshots transmitted
- **Root Cause**: Race condition in metadata/file-pointer handling where periodic captures can overwrite event-triggered metadata or `latest.jpg` before simclient processes it (See SCREENSHOT_MQTT_FIX.md for details)
- **What to check**:
- Display manager logs show event_start/event_stop screenshots ARE being captured: `Screenshot captured: ... type=event_start`
- But `meta.json` is stale or `latest.jpg` does not move
- MQTT heartbeats lack screenshot data at event transitions
- **How to verify the fix**:
- Run: `./test-screenshot-meta-fix.sh` should output `[SUCCESS] Event-triggered metadata preserved!`
- Check display_manager.py: `_write_screenshot_meta()` has protection logic to skip periodic overwrites of event metadata
- Check display_manager.py: periodic `latest.jpg` updates are also protected when triggered metadata is pending
- Check simclient.py: `screenshot_service_thread()` logs show pending event-triggered captures being processed immediately
- **Permanent Fix**: Already applied in display_manager.py and simclient.py. Prevents periodic captures from overwriting pending trigger state and includes stale-trigger self-healing.
### Screenshot Capture After Restart (No Active Event)
- In `ENV=development`, display_manager performs periodic idle captures so dashboard does not appear dead during no-event windows.
- In `ENV=production`, periodic captures remain event/process-driven unless `SCREENSHOT_ALWAYS=1`.
- If display_manager is started from non-interactive shells (systemd/nohup/ssh), it now attempts `DISPLAY=:0` and `XAUTHORITY=~/.Xauthority` fallback for X11 capture tools.
## Important Notes for AI Assistants
### Virtual Environment Requirements (Critical)
- **pygame and pillow MUST be installed in venv** - Required for Impressive to work
- **Display manager uses venv context** - Ensures Impressive has access to dependencies
- **Installation command**: `pip install pygame pillow` (already in requirements.txt)
- **If pygame missing**: Impressive will fail with "No module named 'pygame'" error
### Presentation System
- **ALWAYS use Impressive** for PDF presentations (primary solution)
- **DO NOT suggest xdotool approaches** - they failed on Raspberry Pi due to window focus issues
- **DO NOT suggest video conversion** - adds complexity, had black screen issues
- **All presentations are PDFs** - server converts PPTX to PDF using Gotenberg
- **No client-side conversion** - client only displays pre-rendered PDFs
- **Virtual environment is required** - pygame + pillow must be available for Impressive
- **Loop mode uses `--wrap`** - not custom scripts or workarounds
- **Auto-quit uses `--autoquit`** - native Impressive parameter
### Testing Approach
- Use `./scripts/test-display-manager.sh` for interactive testing
- Use `./scripts/test-impressive-loop.sh` to verify loop functionality
- Test individual components with specific test scripts
- Always check logs in `logs/` directory for debugging
CEC testing notes:
- In development mode, the CEC integration path is skipped on purpose. To test end-to-end, either set `ENV=production` temporarily or use the manual options (14) in `scripts/test-hdmi-cec.sh`.
### Code Changes
- Display logic is in `src/display_manager.py`, not `simclient.py`
- MQTT client (`simclient.py`) writes events to `current_event.json`
- Display Manager reads `current_event.json` and launches appropriate applications
- Two separate processes: simclient.py (MQTT) + display_manager.py (display control)
### Documentation
- **README.md** - Start here for comprehensive overview
- **IMPRESSIVE_INTEGRATION.md** - Deep dive into presentation system
- **QUICK_REFERENCE.md** - Quick commands and examples
- Source code has extensive comments and logging
This system is designed for reliability and ease of maintenance in educational environments with multiple deployed clients. The Impressive-based presentation solution provides native auto-advance and loop support without complex window management hacks.
## Screenshot System (Nov 2025)
The screenshot capture and transmission system has been implemented with separation of concerns:
### Architecture
- **Capture**: `display_manager.py` captures screenshots in a background thread and writes to shared `screenshots/` directory
- **Transmission**: `simclient.py` reads latest screenshot from shared directory and transmits via MQTT dashboard topic
- **Sharing**: Volume-based sharing between display_manager (host OS) and simclient (container)
### Capture Strategy (display_manager.py)
- **Session Detection**: Automatically detects Wayland vs X11 session
- **Wayland Tools**: Tries `grim`, `gnome-screenshot`, `spectacle` (in order)
- **X11 Tools**: Tries `scrot`, `import` (ImageMagick), `xwd`+`convert` (in order)
- **Processing**: Downscales to max width (default 800px), JPEG compresses (default quality 70)
- **Output**: Creates timestamped files (`screenshot_YYYYMMDD_HHMMSS.jpg`) plus `latest.jpg` symlink
- **Rotation**: Keeps max N files (default 20), deletes older
- **Timing**: Production captures when display process is active (unless `SCREENSHOT_ALWAYS=1`); development allows periodic idle captures to keep dashboard fresh
- **Reliability**: Stale/invalid pending trigger metadata is ignored automatically to avoid lock-up of periodic updates
- **Event-triggered captures**: `_trigger_event_screenshot(type, delay)` arms a one-shot `threading.Timer` after event start/stop; timer is cancelled and replaced on rapid event switches; default delays: presentation=4s, video=2s, web=5s (env-configurable)
- **IPC signal file** (`screenshots/meta.json`): written atomically by `display_manager` after each capture; contains `type`, `captured_at`, `file`, `send_immediately`; `send_immediately=true` for event-triggered, `false` for periodic
### Transmission Strategy (simclient.py)
- **Source**: Prefers `screenshots/latest.jpg` if present, falls back to newest timestamped file
- **Topic**: `infoscreen/{client_id}/dashboard`
- **Format**: JSON with base64-encoded image data, grouped v2 schema
- **Schema version**: `"2.0"` (legacy flat fields removed; all fields grouped)
- **Payload builder**: `_build_dashboard_payload()` in `simclient.py` — single source of truth
- **Payload Structure** (v2):
```json
{
"message": { "client_id": "UUID", "status": "alive" },
"content": {
"screenshot": {
"filename": "latest.jpg",
"data": "base64...",
"timestamp": "ISO datetime",
"size": 12345
}
},
"runtime": {
"system_info": { "hostname": "...", "ip": "...", "uptime": 123456.78 },
"process_health": { "event_type": "...", "process_status": "...", ... }
},
"metadata": {
"schema_version": "2.0",
"producer": "simclient",
"published_at": "ISO datetime",
"capture": {
"type": "periodic | event_start | event_stop",
"captured_at": "ISO datetime",
"age_s": 0.9,
"triggered": false,
"send_immediately": false
},
"transport": { "topic": "infoscreen/.../dashboard", "qos": 0, "publisher": "simclient" }
}
}
```
- **Capture types**: `periodic` (interval-based), `event_start` (N seconds after event launch), `event_stop` (1s after process killed)
- **Triggered send**: `display_manager` sets `send_immediately=true` in `meta.json`; simclient 1-second tick detects and fires within ≤1s
- **Logging**: `Dashboard published: schema=2.0 type=<type> screenshot=<file> (<bytes>) age=<s>`
### Scalability Considerations
- **Client-side resize/compress**: Reduces bandwidth and broker load (recommended for 50+ clients)
- **Recommended production settings**: `SCREENSHOT_CAPTURE_INTERVAL=60`, `SCREENSHOT_MAX_WIDTH=800`, `SCREENSHOT_JPEG_QUALITY=60-70`
- **Future optimization**: Hash-based deduplication to skip identical screenshots
- **Alternative for large scale**: HTTP storage + MQTT metadata (200+ clients)
### Testing
- Install capture tools: `sudo apt install scrot imagemagick` (X11) or `sudo apt install grim gnome-screenshot` (Wayland)
- Force capture for testing: `export SCREENSHOT_ALWAYS=1`
- Check logs: `tail -f logs/display_manager.log logs/simclient.log`
- Verify files: `ls -lh src/screenshots/`
### Troubleshooting
- **No screenshots**: Check session type in logs, install appropriate tools
- **Large payloads**: Reduce `SCREENSHOT_MAX_WIDTH` or `SCREENSHOT_JPEG_QUALITY`
- **Stale screenshots**: Check `latest.jpg` symlink, verify display_manager is running
- **MQTT errors**: Check dashboard topic logs for publish return codes
- **Pulse overflow in remote sessions**: warnings like `pulse audio output error: overflow, flushing` can occur with NoMachine/dummy displays; if HDMI playback is stable, treat as environment-related
- **After restarts**: Ensure both processes are restarted (`simclient.py` and `display_manager.py`) so metadata consumption and capture behavior use the same code version
### Testing & Troubleshooting
**Setup:**
- X11: `sudo apt install scrot imagemagick`
- Wayland: `sudo apt install grim gnome-screenshot`
- Force capture: `export SCREENSHOT_ALWAYS=1`
**Verify:**
```bash
tail -f logs/display_manager.log | grep screenshot # Check capture
tail -f logs/simclient.log | grep dashboard # Check transmission
ls -lh src/screenshots/ # Check files
```
**Common Issues:**
| Issue | Check | Fix |
|-------|-------|-----|
| No screenshots | Session type in logs | Install tools for X11/Wayland |
| Large payloads | File sizes | Reduce `SCREENSHOT_MAX_WIDTH` or `SCREENSHOT_JPEG_QUALITY` |
| Stale data | `latest.jpg` timestamp | Restart display_manager |
| MQTT errors | Publish return codes | Check broker connectivity |
---
## Development Notes
### Event Types (Scheduler Integration)
- **Supported**: `presentation`, `webuntis`, `webpage`, `website`
- **Auto-scroll**: Only for `event_type: "website"` (CDP injection + Chrome extension fallback)
- **Display manager**: Uses `event_type` to select renderer when available
### HDMI-CEC Behavior
- **Development mode**: CEC auto-disabled (prevents TV cycling during testing)
- **Test script**: `test-hdmi-cec.sh` option 5 skips integration test in dev mode
- **Manual testing**: Options 1-4 work regardless of mode
### Code Modification Guidelines
- **Presentations**: Always use Impressive (`--auto`, `--wrap`, `--autoquit`)
- **Web autoscroll**: Only for `event_type: "website"`, keep CDP optional
- **Screenshot changes**: Remember two-process architecture (capture ≠ transmission)
- **URL resolution**: Handle `server` host placeholder in file/video URLs

2
.gitignore vendored
View File

@@ -135,3 +135,5 @@ logs/
src/pi-dev-setup-new.sh src/pi-dev-setup-new.sh
src/current_process_health.json src/current_process_health.json
src/power_intent_state.json
src/power_state.json

70
CHANGELOG.md Normal file
View File

@@ -0,0 +1,70 @@
# Changelog
## April 2026
### Remote Command Intake
- Added MQTT command intake on `infoscreen/{client_id}/commands` (supports `reboot` and `shutdown`).
- Added command acknowledgement publishing to `infoscreen/{client_id}/commands/ack` and `infoscreen/{client_id}/command/ack` with states `accepted`, `rejected`, `execution_started`, `completed`, `failed`.
- Added `COMMAND_HELPER_PATH` environment variable; command execution delegated to an external shell helper so `simclient.py` requires no elevated privileges.
- Added deduplication of commands by `command_id` with configurable TTL (`COMMAND_DEDUPE_TTL_HOURS`) and max-entries cap (`COMMAND_DEDUPE_MAX_ENTRIES`).
- Added execution timeout (`COMMAND_EXEC_TIMEOUT_SEC`).
- Added `COMMAND_MOCK_REBOOT_IMMEDIATE_COMPLETE` flag for canary and test environments — immediately completes a mock reboot without waiting for process restart. Safety-guarded: only activates when the helper basename is `mock-command-helper.sh`.
### MQTT Broker Authentication Split
- Split broker connection credentials (`MQTT_USER`, `MQTT_PASSWORD_BROKER`) from legacy per-device identity fields (`MQTT_USERNAME`, `MQTT_PASSWORD`).
- `configure_mqtt_security()` now prefers `MQTT_USER`/`MQTT_PASSWORD_BROKER` for broker login, with fallback to legacy vars if broker-specific vars are absent.
### Systemd Service Units
- Added `scripts/infoscreen-simclient.service` — systemd unit for `simclient.py` with `Type=notify`, `WatchdogSec=60`, `Restart=on-failure`, `StartLimitBurst=5`.
- Added `scripts/start-simclient.sh` — launcher script mirroring `start-display-manager.sh`.
- Updated `scripts/infoscreen-display.service` with `OnFailure=infoscreen-notify-failure@%n.service`.
- Updated `src/pi-setup.sh` to install and enable both units plus the failure notifier template.
### Process Watchdog (Gap 1 — Hung Process Detection)
- Added zero-dependency `_sd_notify()` raw socket helper in `simclient.py` (no `systemd-python` package required).
- Sends `READY=1` on main loop entry and `WATCHDOG=1` on every 5-second iteration.
- Service unit uses `Type=notify` and `WatchdogSec=60`; systemd will restart the process if it stops sending keepalives for 60 seconds.
### OnFailure MQTT Notifier (Gap 3 — systemd Give-Up Detection)
- Added `scripts/infoscreen-notify-failure@.service` — systemd template unit triggered by `OnFailure=`.
- Added `scripts/infoscreen-notify-failure.sh` — publishes a retained JSON payload to `infoscreen/{uuid}/service_failed` via `mosquitto_pub` so the monitoring dashboard gets an alert even when the process is fully dead.
- Payload: `{"event":"service_failed","unit":"<unit-name>","client_uuid":"...","failed_at":"<ISO-UTC>"}`.
### Health Payload Broker Connection Block (Gap 2 — Broker vs. Process Ambiguity)
- Added `broker_connection` block to the health payload: `broker_reachable`, `reconnect_count`, `connect_count`, `last_disconnect_at`.
- `simclient.py` now tracks `reconnect_count` and `connect_count` on every `on_connect` callback and `last_disconnect` timestamp on `on_disconnect`.
- `publish_health_message()` accepts an optional `connection_state` parameter; both heartbeat-success call sites pass the enriched state.
### TV Power Coordination
- Added Phase 1 TV power coordination on `infoscreen/groups/{group_id}/power/intent`.
- Added `POWER_CONTROL_MODE` with `local`, `hybrid`, and `mqtt` behavior.
- Added `src/power_intent_state.json` and `src/power_state.json` for power IPC and telemetry.
- Added `infoscreen/{client_id}/power/state` publishing from `simclient.py`.
- Added turn-off guard logic to avoid unintended TV-off races at event boundaries.
- Added [TV_POWER_RUNBOOK.md](TV_POWER_RUNBOOK.md) and test tooling in `scripts/test-power-intent.sh`.
## March 2026
- Hardened event-trigger screenshots (`event_start`, `event_stop`) against periodic overwrite races.
- Improved `latest.jpg` and `meta.json` synchronization for reliable dashboard updates.
- Added self-healing for stale or invalid pending screenshot trigger metadata.
- Improved display environment fallbacks (`DISPLAY=:0`, `XAUTHORITY`) for non-interactive starts.
- Allowed periodic idle captures in development mode so dashboard previews stay fresh without active events.
- Added content-type-aware trigger delays for event screenshots.
- Changed screenshot transmission to a 1-second polling tick so triggered sends fire within <=1s.
- Migrated dashboard payload to grouped schema v2 (`message`, `content`, `runtime`, `metadata`).
## November 2025
- Implemented the two-process screenshot pipeline (`display_manager.py` capture, `simclient.py` transmission).
- Added Wayland/X11 screenshot tool fallback chains.
- Extended dashboard payloads with screenshot and system metadata.
- Extended scheduler event type support for `presentation`, `webuntis`, `webpage`, and `website`.
- Added website autoscroll support via CDP injection and extension fallback.

View File

@@ -1,5 +1,7 @@
# HDMI-CEC Development Mode Behavior # HDMI-CEC Development Mode Behavior
This is a focused reference for development-mode behavior. For the canonical HDMI-CEC setup and operator guide, use [HDMI_CEC_SETUP.md](HDMI_CEC_SETUP.md).
## Overview ## Overview
HDMI-CEC TV control is **automatically disabled** in development mode to prevent constantly switching the TV on/off during testing and development work. HDMI-CEC TV control is **automatically disabled** in development mode to prevent constantly switching the TV on/off during testing and development work.
@@ -152,7 +154,7 @@ vim .env # Set ENV=production
- **Implementation**: `src/display_manager.py` (lines 48-76) - **Implementation**: `src/display_manager.py` (lines 48-76)
- **Configuration**: `.env` (CEC section) - **Configuration**: `.env` (CEC section)
- **Testing**: `scripts/test-hdmi-cec.sh`, `scripts/test-tv-response.sh` - **Testing**: `scripts/test-hdmi-cec.sh`, `scripts/test-tv-response.sh`
- **Documentation**: `HDMI_CEC_SETUP.md`, `HDMI_CEC_IMPLEMENTATION.md` - **Documentation**: `HDMI_CEC_SETUP.md`, `HDMI_CEC_FLOW_DIAGRAM.md`
## Summary ## Summary

View File

@@ -1,473 +0,0 @@
# HDMI-CEC Implementation Summary
## Overview
Added automatic TV power control via HDMI-CEC to the Infoscreen Client. The system now automatically turns the connected TV on when events start and off (with configurable delay) when events end.
## Changes Made
### 1. Core Implementation (display_manager.py)
#### New Class: HDMICECController
Located at lines ~60-280 in `src/display_manager.py`
**Features:**
- Automatic TV power on/off via CEC commands
- Configurable turn-off delay to prevent rapid on/off cycles
- State tracking to avoid redundant commands
- Threaded delayed turn-off with cancellation support
- Graceful fallback if cec-client not available
**Key Methods:**
- `turn_on()` - Turn TV on immediately
- `turn_off(delayed=False)` - Turn TV off (optionally with delay)
- `cancel_turn_off()` - Cancel pending delayed turn-off
- `_detect_tv_state()` - Query current TV power status
- `_run_cec_command()` - Execute CEC commands via cec-client
#### Integration Points
**DisplayManager.__init__()** (line ~435)
- Initialize HDMICECController instance
- Pass configuration from environment variables
**DisplayManager._signal_handler()** (line ~450)
- Turn off TV on service shutdown (with delay)
**DisplayManager.stop_current_display()** (line ~580)
- Added `turn_off_tv` parameter
- Schedule TV turn-off when stopping display
**DisplayManager.process_events()** (line ~1350)
- Turn on TV before starting new event
- Cancel turn-off timer when event is still active
- Don't turn off TV when switching between events
### 2. Configuration
#### Environment Variables (.env)
```bash
CEC_ENABLED=true # Enable/disable CEC (default: true)
CEC_DEVICE=TV # Target device (default: TV)
CEC_TURN_OFF_DELAY=30 # Turn-off delay in seconds (default: 30)
```
#### Configuration Loading (display_manager.py lines ~45-48)
```python
CEC_ENABLED = os.getenv("CEC_ENABLED", "true").lower() in ("true", "1", "yes")
CEC_DEVICE = os.getenv("CEC_DEVICE", "TV")
CEC_TURN_OFF_DELAY = int(os.getenv("CEC_TURN_OFF_DELAY", "30"))
```
### 3. Documentation
#### New Files
**HDMI_CEC_SETUP.md** - Comprehensive setup and troubleshooting guide
- Installation instructions
- Configuration options
- Troubleshooting steps
- Hardware requirements
- TV brand compatibility
- Advanced usage examples
**HDMI_CEC_IMPLEMENTATION.md** (this file) - Implementation details
#### Updated Files
**README.md**
- Added HDMI-CEC to key features
- Added cec-utils to installation
- Added CEC configuration section
- Added HDMI-CEC TV Control section with quick start
**QUICK_REFERENCE.md**
- Added test-hdmi-cec.sh to testing commands
- Added cec-utils to installation
- Added CEC configuration to .env example
- Added HDMI-CEC commands section
- Added HDMI_CEC_SETUP.md to documentation list
- Added CEC to key features
### 4. Testing Script
**scripts/test-hdmi-cec.sh** - Interactive test menu
Features:
- Check for cec-client installation
- Scan for CEC devices
- Query TV power status
- Manual TV on/off commands
- Test Display Manager CEC integration
- View CEC-related logs
Usage:
```bash
./scripts/test-hdmi-cec.sh
```
## Technical Details
### CEC Command Execution
Commands are executed via shell using `cec-client`:
```python
result = subprocess.run(
f'echo "{command}" | cec-client -s -d 1',
shell=True,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
timeout=5
)
```
Flags:
- `-s` - Single command mode (exit after execution)
- `-d 1` - Debug level 1 (minimal output)
### Turn-Off Delay Mechanism
Uses Python's `threading.Timer` for delayed execution:
```python
self.turn_off_timer = threading.Timer(
self.turn_off_delay,
self._turn_off_now
)
self.turn_off_timer.daemon = True
self.turn_off_timer.start()
```
Benefits:
- Non-blocking operation
- Can be cancelled if new event arrives
- Prevents TV from turning off between closely-spaced events
### State Tracking
The controller maintains TV state to avoid redundant commands:
```python
self.tv_state = None # None = unknown, True = on, False = off
```
On initialization, attempts to detect current state:
```python
if 'power status: on' in output:
self.tv_state = True
elif 'power status: standby' in output:
self.tv_state = False
```
### Event Lifecycle with CEC
1. **Event Starts**
- `process_events()` detects new event
- Calls `cec.turn_on()` before starting display
- Cancels any pending turn-off timer
- Starts display process
2. **Event Running**
- Process monitored in main loop
- Turn-off timer cancelled on each check (keeps TV on)
3. **Event Ends**
- `stop_current_display(turn_off_tv=True)` called
- Schedules delayed turn-off: `cec.turn_off(delayed=True)`
- Timer starts countdown
4. **New Event Before Timeout**
- Turn-off timer cancelled: `cec.cancel_turn_off()`
- TV stays on
- New event starts immediately
5. **Timeout Expires (No New Events)**
- Timer executes: `_turn_off_now()`
- TV turns off
- System goes to idle state
### Event Switching Behavior
**Switching Between Events:**
```python
# Different event - stop current and start new
logging.info(f"Event changed from {self.current_process.event_id} to {event_id}")
# Don't turn off TV when switching between events
self.stop_current_display(turn_off_tv=False)
```
**No Active Events:**
```python
if self.current_process:
logging.info("No active events in time window - stopping current display")
# Turn off TV with delay
self.stop_current_display(turn_off_tv=True)
```
## Error Handling
### Missing cec-client
If `cec-client` is not installed:
```python
if not self._check_cec_available():
logging.warning("cec-client not found - HDMI-CEC control disabled")
logging.info("Install with: sudo apt-get install cec-utils")
self.enabled = False
return
```
The system continues to work normally, just without TV control.
### CEC Command Failures
Commands check for success indicators in output:
```python
success = (
result.returncode == 0 or
'power status changed' in output.lower() or
'power on' in output.lower() or
'standby' in output.lower()
)
```
Failures are logged but don't crash the system:
```python
if success:
logging.debug(f"CEC command '{command}' executed successfully")
else:
logging.warning(f"CEC command '{command}' may have failed")
```
### Command Timeouts
All CEC commands have 5-second timeout:
```python
try:
result = subprocess.run(..., timeout=5)
except subprocess.TimeoutExpired:
logging.error(f"CEC command '{command}' timed out after 5s")
return False
```
## Configuration Examples
### Conservative (Long Delay)
```bash
CEC_ENABLED=true
CEC_DEVICE=TV
CEC_TURN_OFF_DELAY=120 # 2 minutes
```
Good for: Frequent events, preventing screen flicker
### Standard (Default)
```bash
CEC_ENABLED=true
CEC_DEVICE=TV
CEC_TURN_OFF_DELAY=30 # 30 seconds
```
Good for: General use, balance between responsiveness and stability
### Aggressive (Short Delay)
```bash
CEC_ENABLED=true
CEC_DEVICE=TV
CEC_TURN_OFF_DELAY=5 # 5 seconds
```
Good for: Power saving, scheduled events with clear gaps
### Disabled
```bash
CEC_ENABLED=false
```
Good for: Testing, TVs without CEC support, manual control
## Testing Strategy
### 1. Unit Testing (Python)
Test script creates HDMICECController instance and exercises all methods:
```bash
./scripts/test-hdmi-cec.sh
# Choose option 5: Test Display Manager CEC integration
```
### 2. Manual CEC Testing
Direct cec-client commands:
```bash
# Turn on
echo "on 0" | cec-client -s -d 1
# Turn off
echo "standby 0" | cec-client -s -d 1
# Check status
echo "pow 0" | cec-client -s -d 1
```
### 3. Integration Testing
1. Start Display Manager with CEC enabled
2. Send event via MQTT
3. Verify TV turns on
4. Let event end
5. Verify TV turns off after delay
6. Check logs for CEC activity
### 4. Log Monitoring
```bash
tail -f ~/infoscreen-dev/logs/display_manager.log | grep -i cec
```
Expected log entries:
- "HDMI-CEC controller initialized"
- "TV detected as ON/OFF"
- "Turning TV ON/OFF via HDMI-CEC"
- "Scheduling TV turn-off in Xs"
- "Cancelled TV turn-off timer"
## Performance Impact
### CPU Usage
- Minimal: < 0.1% during idle
- CEC commands: ~1-2% spike for 1-2 seconds
- No continuous polling
### Memory Usage
- HDMICECController: ~1KB
- Timer threads: ~8KB each (max 1 active)
- Total impact: Negligible
### Latency
- TV turn-on: 1-3 seconds (CEC protocol + TV response)
- TV turn-off: Same + configured delay
- No impact on display performance
### Network
- None - CEC is local HDMI bus only
## Known Limitations
1. **Single TV per HDMI**
- Each HDMI output controls one TV
- Multi-monitor setups need per-output management
2. **TV CEC Support Required**
- TV must have HDMI-CEC enabled
- Different brands use different names (Anynet+, SimpLink, etc.)
3. **Limited Status Feedback**
- Can query power status
- Cannot reliably verify command execution
- Some TVs respond slowly or inconsistently
4. **No Volume Control**
- Current implementation only handles power
- Volume control could be added in future
5. **Device Address Assumptions**
- Assumes TV is device 0 (standard)
- Can be configured via CEC_DEVICE if different
## Future Enhancements
### Potential Additions
1. **Volume Control**
```python
def set_volume(self, level: int):
"""Set TV volume (0-100)"""
# CEC volume commands
```
2. **Input Switching**
```python
def switch_input(self, input_num: int):
"""Switch TV to specific HDMI input"""
# CEC input selection
```
3. **Multi-Monitor Support**
```python
def __init__(self, devices: List[str]):
"""Support multiple displays"""
self.controllers = [
HDMICECController(device=dev) for dev in devices
]
```
4. **Enhanced State Detection**
```python
def get_tv_info(self):
"""Get detailed TV information"""
# Query manufacturer, model, capabilities
```
5. **Event-Specific Behaviors**
```json
{
"presentation": {...},
"cec_config": {
"turn_on": true,
"turn_off_delay": 60,
"volume": 50
}
}
```
## Dependencies
### System
- `cec-utils` package (provides cec-client binary)
- HDMI connection to TV with CEC support
### Python
- No additional packages required
- Uses standard library: subprocess, threading, logging
## Compatibility
### Tested Platforms
- Raspberry Pi 4/5
- Raspberry Pi OS Bookworm
- Python 3.9+
### TV Brands Tested
- Samsung (Anynet+)
- LG (SimpLink)
- Sony (Bravia Sync)
- Generic HDMI-CEC TVs
### HDMI Requirements
- HDMI 1.4 or newer (for reliable CEC)
- Quality HDMI cable (cheap cables may have CEC issues)
- Cable length < 5 meters recommended
## Troubleshooting
See [HDMI_CEC_SETUP.md](HDMI_CEC_SETUP.md) for comprehensive troubleshooting guide.
Quick checks:
1. `cec-client` installed? `which cec-client`
2. TV CEC enabled? Check TV settings
3. Devices detected? `echo "scan" | cec-client -s -d 1`
4. Logs showing CEC activity? `grep -i cec logs/display_manager.log`
## Conclusion
The HDMI-CEC integration provides seamless automatic TV control that enhances the user experience by:
- Eliminating manual TV on/off operations
- Preventing unnecessary power consumption
- Creating a truly automated digital signage solution
- Working reliably with minimal configuration
The implementation is robust, well-tested, and production-ready for deployment in educational and research environments.

View File

@@ -1,5 +1,12 @@
# HDMI-CEC Setup and Configuration # HDMI-CEC Setup and Configuration
This is the canonical HDMI-CEC operator document.
Related reference material:
- [HDMI_CEC_DEV_MODE.md](HDMI_CEC_DEV_MODE.md): development-mode behavior.
- [HDMI_CEC_FLOW_DIAGRAM.md](HDMI_CEC_FLOW_DIAGRAM.md): flow and sequence diagrams.
## Overview ## Overview
The Infoscreen Client now includes automatic TV control via HDMI-CEC (Consumer Electronics Control). This allows the Raspberry Pi to turn the connected TV on/off automatically based on event scheduling. The Infoscreen Client now includes automatic TV control via HDMI-CEC (Consumer Electronics Control). This allows the Raspberry Pi to turn the connected TV on/off automatically based on event scheduling.

View File

@@ -1,316 +0,0 @@
# HDMI-CEC Integration - Complete Summary
## ✅ Implementation Complete
Successfully added HDMI-CEC TV control functionality to the Infoscreen Client. The TV now automatically turns on when events start and off when events end.
## 📝 What Was Done
### 1. Core Implementation
- **File**: `src/display_manager.py`
- **New Class**: `HDMICECController` (220 lines)
- **Integration**: 5 integration points in `DisplayManager` class
- **Features**:
- Auto TV power on when events start
- Auto TV power off (with delay) when events end
- Smart event switching (TV stays on)
- Configurable turn-off delay
- State tracking to avoid redundant commands
- Threaded delayed turn-off with cancellation
### 2. Configuration
- **Environment Variables** in `.env`:
- `CEC_ENABLED=true` - Enable/disable CEC control
- `CEC_DEVICE=TV` - Target device identifier
- `CEC_TURN_OFF_DELAY=30` - Delay in seconds before TV turns off
### 3. Documentation Created
-`HDMI_CEC_SETUP.md` - Comprehensive setup and troubleshooting (400+ lines)
-`HDMI_CEC_IMPLEMENTATION.md` - Technical implementation details (700+ lines)
-`HDMI_CEC_FLOW_DIAGRAM.md` - Visual flow diagrams and architecture (500+ lines)
- ✅ Updated `README.md` - Added CEC to features, installation, configuration
- ✅ Updated `QUICK_REFERENCE.md` - Added CEC commands and testing
- ✅ Updated `.env` - Added CEC configuration section
### 4. Testing Script
- **File**: `scripts/test-hdmi-cec.sh`
- **Features**:
- Interactive menu for testing CEC commands
- Scan for CEC devices
- Manual TV on/off commands
- Test Display Manager integration
- View CEC logs
- Python integration test
## 🎯 How It Works
### Event Lifecycle
```
1. Event Starts → Turn TV ON → Start Display
2. Event Running → Keep TV ON (cancel turn-off timer)
3. Event Ends → Schedule TV turn-off (30s delay by default)
4. Timer Expires → Turn TV OFF
5. New Event Before Timeout → Cancel turn-off → TV stays ON
```
### Key Behaviors
- **Starting events**: TV turns ON before display starts
- **Switching events**: TV stays ON (seamless transition)
- **No active events**: TV turns OFF after configurable delay
- **Service shutdown**: TV turns OFF (with delay)
## 🚀 Usage
### Installation
```bash
# Install CEC utilities
sudo apt-get install cec-utils
# Test CEC connection
echo "scan" | cec-client -s -d 1
```
### Configuration
Edit `.env` file:
```bash
CEC_ENABLED=true # Enable TV control
CEC_DEVICE=TV # Device identifier
CEC_TURN_OFF_DELAY=30 # Delay before turn-off (seconds)
```
### Testing
```bash
# Interactive test menu
./scripts/test-hdmi-cec.sh
# Manual commands
echo "on 0" | cec-client -s -d 1 # Turn on
echo "standby 0" | cec-client -s -d 1 # Turn off
echo "pow 0" | cec-client -s -d 1 # Check status
```
### Monitor Logs
```bash
tail -f ~/infoscreen-dev/logs/display_manager.log | grep -i cec
```
## 📊 Technical Details
### CEC Commands Used
- `on 0` - Turn TV on (device 0)
- `standby 0` - Turn TV off (standby mode)
- `pow 0` - Query TV power status
- `scan` - Scan for CEC devices
### Implementation Features
- **Non-blocking**: CEC commands don't block event processing
- **Threaded timers**: Delayed turn-off uses Python threading.Timer
- **State tracking**: Avoids redundant commands
- **Graceful fallback**: Works without cec-client (disabled mode)
- **Error handling**: Timeouts, failures logged but don't crash
- **Configurable**: All behavior controlled via environment variables
### Performance
- CPU: < 0.1% idle, 1-2% spike during CEC commands
- Memory: ~10KB total (controller + timer thread)
- Latency: 1-3 seconds for TV response
- No network usage (HDMI-CEC is local bus)
## 🔧 Configuration Options
### Quick Turn-Off (Power Saving)
```bash
CEC_TURN_OFF_DELAY=5 # 5 seconds
```
Good for: Scheduled events with clear gaps
### Standard (Balanced) - Default
```bash
CEC_TURN_OFF_DELAY=30 # 30 seconds
```
Good for: General use, prevents flicker
### Conservative (Smooth)
```bash
CEC_TURN_OFF_DELAY=120 # 2 minutes
```
Good for: Frequent events, maximize smoothness
### Disabled
```bash
CEC_ENABLED=false
```
Good for: Testing, manual control, non-CEC TVs
## 📚 Documentation
| File | Purpose |
|------|---------|
| `HDMI_CEC_SETUP.md` | Complete setup guide, troubleshooting, TV compatibility |
| `HDMI_CEC_IMPLEMENTATION.md` | Technical details, code walkthrough, API reference |
| `HDMI_CEC_FLOW_DIAGRAM.md` | Visual diagrams, state machines, flow charts |
| `README.md` | Quick start, feature overview |
| `QUICK_REFERENCE.md` | Commands cheat sheet |
## ✨ Features
✅ Automatic TV power on when events start
✅ Automatic TV power off when events end
✅ Configurable turn-off delay (prevent flicker)
✅ Smart event switching (TV stays on)
✅ State tracking (avoid redundant commands)
✅ Graceful fallback (works without CEC)
✅ Comprehensive logging
✅ Interactive test script
✅ Production-ready
✅ Well-documented
## 🎓 Example Scenarios
### Scenario 1: Single Morning Presentation
```
08:55 - System idle, TV off
09:00 - Event starts → TV turns ON → Presentation displays
09:30 - Event ends → Presentation stops → 30s countdown starts
09:30:30 - TV turns OFF
```
### Scenario 2: Back-to-Back Events
```
10:00 - Event A starts → TV ON → Display A
10:30 - Event A ends → Turn-off scheduled (30s)
10:35 - Event B starts (within 30s) → Turn-off cancelled → Display B
11:00 - Event B ends → Turn-off scheduled
11:00:30 - TV OFF
```
### Scenario 3: All-Day Display
```
08:00 - Morning event → TV ON
10:00 - Switch to midday event → TV stays ON
14:00 - Switch to afternoon event → TV stays ON
17:00 - Last event ends → 30s countdown
17:00:30 - TV OFF
```
## 🔍 Troubleshooting
### TV Not Responding
1. Check if CEC enabled on TV (settings menu)
2. Verify TV detected: `echo "scan" | cec-client -s -d 1`
3. Test manual command: `echo "on 0" | cec-client -s -d 1`
4. Check logs: `grep -i cec logs/display_manager.log`
### cec-client Not Found
```bash
sudo apt-get install cec-utils
which cec-client # Should show /usr/bin/cec-client
```
### TV Turns Off Too Soon
Increase delay in `.env`:
```bash
CEC_TURN_OFF_DELAY=60 # Wait 60 seconds
```
### Disable CEC Temporarily
```bash
# In .env
CEC_ENABLED=false
```
## 🎯 Integration Points
The CEC controller integrates at these key points:
1. **DisplayManager.__init__**: Initialize CEC controller
2. **_signal_handler**: Turn off TV on shutdown
3. **stop_current_display**: Schedule turn-off when stopping
4. **process_events**: Turn on TV when starting, cancel turn-off when active
## 📦 Files Modified/Created
### Modified
-`src/display_manager.py` - Added HDMICECController class and integration
-`.env` - Added CEC configuration section
-`README.md` - Added CEC to features, installation, configuration
-`QUICK_REFERENCE.md` - Added CEC commands and testing
### Created
-`HDMI_CEC_SETUP.md` - Setup and troubleshooting guide
-`HDMI_CEC_IMPLEMENTATION.md` - Technical documentation
-`HDMI_CEC_FLOW_DIAGRAM.md` - Visual diagrams
-`scripts/test-hdmi-cec.sh` - Interactive test script
-`HDMI_CEC_SUMMARY.md` - This file
## ✅ Testing Checklist
- [x] Python syntax check passes
- [x] Module loads without errors
- [x] HDMICECController class defined
- [x] Integration points added to DisplayManager
- [x] Configuration variables added
- [x] Test script created and made executable
- [x] Documentation complete
- [x] README updated
- [x] QUICK_REFERENCE updated
## 🚀 Next Steps
### To Start Using
1. **Install CEC utils**:
```bash
sudo apt-get install cec-utils
```
2. **Test CEC connection**:
```bash
./scripts/test-hdmi-cec.sh
```
3. **Enable in .env**:
```bash
CEC_ENABLED=true
CEC_DEVICE=TV
CEC_TURN_OFF_DELAY=30
```
4. **Restart Display Manager**:
```bash
./scripts/start-display-manager.sh
```
5. **Monitor logs**:
```bash
tail -f logs/display_manager.log | grep -i cec
```
### Expected Log Output
```
[INFO] HDMI-CEC controller initialized (device: TV, turn_off_delay: 30s)
[INFO] TV detected as ON
[INFO] Starting display for event: presentation_slides.pdf
[INFO] Turning TV ON via HDMI-CEC...
[INFO] TV turned ON successfully
```
## 📖 Documentation Quick Links
- **Setup Guide**: [HDMI_CEC_SETUP.md](HDMI_CEC_SETUP.md)
- **Technical Details**: [HDMI_CEC_IMPLEMENTATION.md](HDMI_CEC_IMPLEMENTATION.md)
- **Flow Diagrams**: [HDMI_CEC_FLOW_DIAGRAM.md](HDMI_CEC_FLOW_DIAGRAM.md)
- **Main README**: [README.md](README.md)
- **Quick Reference**: [QUICK_REFERENCE.md](QUICK_REFERENCE.md)
## 🎉 Success!
HDMI-CEC TV control is now fully integrated into the Infoscreen Client. The system will automatically manage TV power based on event scheduling, creating a truly automated digital signage solution.
---
**Implementation Date**: November 12, 2025
**Status**: ✅ Production Ready
**Tested**: Python syntax, module loading
**Next**: Install cec-utils and test with physical TV

850
README.md
View File

@@ -1,48 +1,29 @@
# Infoscreen Client - Display Manager # Infoscreen Client
Digital signage system for Raspberry Pi that displays presentations, videos, and web content in kiosk mode. Centrally managed via MQTT with automatic client discovery, heartbeat monitoring, and screenshot-based dashboard monitoring. Digital signage client for Raspberry Pi that displays presentations, videos, and web content in kiosk mode. It is managed centrally via MQTT and includes HDMI-CEC TV control, screenshot-based dashboard monitoring, and process health reporting.
## 🎯 Key Features Dashboard screenshots can contain visible on-screen content. Keep that in mind when enabling or documenting remote monitoring.
- **Automatic Presentation Display** - Server renders PPTX to PDF; client displays PDFs with Impressive ## Key Features
- **Auto-Advance Slideshows** - Configurable timing for automatic slide progression
- **Loop Mode** - Presentations can loop infinitely or quit after last slide
- **HDMI-CEC TV Control** - Automatic TV power on/off based on event scheduling
- **MQTT Integration** - Real-time event management from central server
- **Group Management** - Organize clients into groups for targeted content
- **Heartbeat Monitoring** - Regular status updates and screenshot dashboard
- **Client Process Monitoring** - Health-state bridge, crash/restart tracking, and monitoring log
- **Screenshot Dashboard** - Automatic screen capture with Wayland/X11 support, client-side compression
- **Multi-Content Support** - Presentations, videos, and web pages
- **Kiosk Mode** - Full-screen display with automatic startup
## 📋 System Requirements - Server-side PPTX to PDF rendering; client displays PDFs with Impressive.
- Presentation auto-advance, loop mode, and progress indicators.
- Video playback with `python-vlc` when available and external VLC fallback.
- Web and WebUntis display in kiosk mode.
- HDMI-CEC TV power control with local fallback and MQTT-coordinated power intent.
- MQTT discovery, heartbeat, group assignment, and event delivery.
- Screenshot dashboard with Wayland/X11 capture tool fallbacks.
- Process health bridge between `display_manager.py` and `simclient.py`.
### Hardware ## Quick Start
- Raspberry Pi 4/5 (or compatible)
- HDMI display
- Network connectivity (WiFi or Ethernet)
- SSD storage recommended
### Software ### 1. Install Dependencies
- Raspberry Pi OS (Bookworm or newer)
- Python 3.x
- Impressive (PDF presenter with auto-advance)
- Chromium browser (for web content)
- VLC or MPV (for video playback)
- Screenshot tools: `scrot` or ImageMagick (X11) OR `grim` or `gnome-screenshot` (Wayland)
- CEC Utils (for HDMI-CEC TV control - optional)
## 🚀 Quick Start
### 1. Installation
```bash ```bash
# Clone repository
cd ~/ cd ~/
git clone <repository-url> infoscreen-dev git clone <repository-url> infoscreen-dev
cd infoscreen-dev cd infoscreen-dev
# Install system dependencies
sudo apt-get update sudo apt-get update
sudo apt-get install -y \ sudo apt-get install -y \
python3 python3-pip python3-venv \ python3 python3-pip python3-venv \
@@ -51,729 +32,228 @@ sudo apt-get install -y \
cec-utils \ cec-utils \
scrot imagemagick scrot imagemagick
# For Wayland systems, install screenshot tools: # For Wayland systems:
# sudo apt-get install grim gnome-screenshot # sudo apt-get install grim gnome-screenshot
# Create Python virtual environment
python3 -m venv venv python3 -m venv venv
source venv/bin/activate source venv/bin/activate
# Install Python dependencies
pip install -r src/requirements.txt pip install -r src/requirements.txt
``` ```
### 2. Configuration ### 2. Configure `.env`
Create `.env` file in project root (or copy from `.env.template`): Copy `.env.template` to `.env` and set at least:
```bash ```bash
# Screenshot capture behavior ENV=production
SCREENSHOT_ALWAYS=0 # Set to 1 for testing (forces capture even without active display) DEBUG_MODE=0
LOG_LEVEL=INFO
# Environment MQTT_BROKER=192.168.1.100
ENV=production # development | production (CEC disabled in development)
DEBUG_MODE=0 # 1 to enable debug mode
LOG_LEVEL=INFO # DEBUG | INFO | WARNING | ERROR
# MQTT Configuration
MQTT_BROKER=192.168.1.100 # Your MQTT broker IP/hostname
MQTT_PORT=1883 MQTT_PORT=1883
MQTT_USER=<broker-username>
MQTT_PASSWORD_BROKER=<broker-password>
MQTT_USERNAME=infoscreen-client-<client-uuid-prefix>
MQTT_PASSWORD=<per-device-random-password>
MQTT_TLS_ENABLED=0
# Timing (seconds) HEARTBEAT_INTERVAL=60
HEARTBEAT_INTERVAL=60 # How often client sends status updates SCREENSHOT_INTERVAL=180
SCREENSHOT_INTERVAL=180 # How often simclient transmits screenshots SCREENSHOT_CAPTURE_INTERVAL=180
SCREENSHOT_CAPTURE_INTERVAL=180 # How often display_manager captures screenshots DISPLAY_CHECK_INTERVAL=15
DISPLAY_CHECK_INTERVAL=15 # How often display_manager checks for new events
# File/API Server (used to download presentation files) FILE_SERVER_HOST=
# Defaults to MQTT_BROKER host with port 8000 and http scheme FILE_SERVER_PORT=8000
FILE_SERVER_HOST= # Optional; if empty, defaults to MQTT_BROKER FILE_SERVER_SCHEME=http
FILE_SERVER_PORT=8000 # Default API port
FILE_SERVER_SCHEME=http # http or https
# FILE_SERVER_BASE_URL= # Optional full override, e.g., http://192.168.1.100:8000
# HDMI-CEC TV Control (optional) CEC_ENABLED=true
CEC_ENABLED=true # Enable automatic TV power control CEC_DEVICE=0
CEC_DEVICE=0 # Target device (0 recommended for TV) CEC_TURN_OFF_DELAY=30
CEC_TURN_OFF_DELAY=30 # Seconds to wait before turning off TV CEC_POWER_ON_WAIT=5
CEC_POWER_ON_WAIT=5 # Seconds to wait after power ON (for TV boot) CEC_POWER_OFF_WAIT=5
CEC_POWER_OFF_WAIT=5 # Seconds to wait after power OFF
POWER_CONTROL_MODE=local
COMMAND_HELPER_PATH=/usr/local/bin/infoscreen-cmd-helper.sh
COMMAND_EXEC_TIMEOUT_SEC=15
COMMAND_DEDUPE_TTL_HOURS=24
COMMAND_DEDUPE_MAX_ENTRIES=5000
COMMAND_MOCK_REBOOT_IMMEDIATE_COMPLETE=0
``` ```
MQTT auth/TLS notes:
- `MQTT_USER` / `MQTT_PASSWORD_BROKER` are the broker credentials used at connection time.
- `MQTT_USERNAME` / `MQTT_PASSWORD` are legacy per-device identity fields kept for fallback and identity purposes.
- Store real broker credentials only in the local [/.env](.env), which is gitignored.
- When TLS is enabled, also set `MQTT_TLS_CA_CERT`, and if client certificates are used, `MQTT_TLS_CERT` and `MQTT_TLS_KEY`.
- Keep the local [/.env](.env) readable only by the service user and admins, for example mode `600`.
Mode summary:
- `POWER_CONTROL_MODE=local`: local event-time CEC only.
- `POWER_CONTROL_MODE=hybrid`: prefer fresh MQTT intent, fallback to local timing.
- `POWER_CONTROL_MODE=mqtt`: MQTT intent authoritative, with safe fallback behavior.
### 3. Start Services ### 3. Start Services
```bash The preferred method on deployed devices is systemd:
# Start MQTT client (handles events, heartbeat, discovery)
cd ~/infoscreen-dev/src
python3 simclient.py
# In another terminal: Start Display Manager ```bash
cd ~/infoscreen-dev/src sudo systemctl start infoscreen-simclient infoscreen-display
python3 display_manager.py sudo systemctl status infoscreen-simclient infoscreen-display
sudo journalctl -u infoscreen-simclient -u infoscreen-display -f
``` ```
Or use the startup script: For first-time setup, run `src/pi-setup.sh` to install and enable the units. See [src/README.md](src/README.md) for the systemd setup steps.
For local development without systemd:
```bash ```bash
# Terminal 1
./scripts/start-simclient.sh
# Terminal 2
./scripts/start-display-manager.sh ./scripts/start-display-manager.sh
``` ```
## 📊 Presentation System ## Runtime Model
### How It Works
The system uses **Impressive** as the PDF presenter with native auto-advance and loop support:
1. **Server-side rendering**: PPTX files are converted to PDF by the server using Gotenberg
2. **Client receives PDFs**: Events contain pre-rendered PDF files ready for display
3. **Direct display**: PDF files are displayed directly with Impressive (no client-side conversion needed)
4. **Auto-advance** uses Impressive's built-in `--auto` parameter
5. **Loop mode** uses Impressive's `--wrap` parameter (infinite loop)
6. **Auto-quit** uses Impressive's `--autoquit` parameter (exit after last slide)
### Event JSON Format
#### Looping Presentation (Typical for Events)
```json
{
"id": "event_123",
"start": "2025-10-01 14:00:00",
"end": "2025-10-01 16:00:00",
"presentation": {
"files": [
{
"name": "slides.pptx",
"url": "https://server/files/slides.pptx"
}
],
"auto_advance": true,
"slide_interval": 10,
"loop": true
}
}
```
**Result:** Slides advance every 10 seconds, presentation loops infinitely until event ends.
#### Single Playthrough
```json
{
"presentation": {
"files": [{"name": "welcome.pptx"}],
"auto_advance": true,
"slide_interval": 5,
"loop": false
}
}
```
**Result:** Slides advance every 5 seconds, exits after last slide.
### Presentation Parameters
| Parameter | Type | Default | Description |
|-----------|------|---------|-------------|
| `auto_advance` | boolean | `false` | Enable automatic slide advancement |
| `slide_interval` | integer | `10` | Seconds between slides |
| `loop` | boolean | `false` | Loop presentation vs. quit after last slide |
### Scheduler-Specific Fields
The scheduler may send additional fields that are preserved in `current_event.json`:
| Field | Type | Description |
|-------|------|-------------|
| `page_progress` | boolean | Show overall progress bar in presentation (Impressive `--page-progress`). Can be provided at `presentation.page_progress` (preferred) or top-level. |
| `auto_progress` | boolean | Show per-page auto-advance countdown (Impressive `--auto-progress`). Can be provided at `presentation.auto_progress` (preferred) or top-level. |
| `occurrence_of_id` | integer | Original event ID for recurring events |
| `recurrence_rule` | string | iCal recurrence rule (RRULE format) |
| `recurrence_end` | string | End date for recurring events |
**Note:** All fields from the scheduler are automatically preserved when events are stored in `current_event.json`. The client does not filter or modify scheduler-specific metadata.
#### Progress Bar Display
When using Impressive PDF presenter:
- `page_progress: true` - Shows a progress bar at the bottom indicating position in the presentation
- `auto_progress: true` - Shows a countdown progress bar for each slide during auto-advance
- Both options can be enabled simultaneously for maximum visual feedback
## 🎥 Video Events
```json
{
"video": {
"url": "https://server/videos/intro.mp4",
"loop": true,
"autoplay": true,
"volume": 0.8
}
}
```
Notes:
- The Display Manager prefers `python-vlc` (libvlc) when available. This gives programmatic control over playback (autoplay, loop, volume) and ensures the player is cleanly stopped and released when events end.
- Supported video event fields:
- `url` (string): HTTP/HTTPS or streaming URL. URLs using the placeholder host `server` are rewritten to the configured file server (see File/API Server configuration).
- `autoplay` (boolean): start playback automatically when the event becomes active (default: true).
- `loop` (boolean): loop playback indefinitely.
- `volume` (float): 0.01.0 (mapped internally to VLC's 0100 volume scale).
- Effective playback volume is calculated as `event.video.volume * client_config.audio.video_volume_multiplier` and then mapped to VLC's 0100 scale. Example: `volume: 0.8` with `audio.video_volume_multiplier: 0.5` results in 40% VLC volume.
- If `python-vlc` is not installed, the Display Manager will fall back to launching the external `vlc` binary.
- External VLC audio rendering behavior:
- When `muted: true` (or effective volume resolves to 0), fallback starts VLC with `--no-audio`.
- When not muted, fallback applies startup loudness with `--gain=<0.00-1.00>` derived from effective volume.
- Runtime volume updates are best-effort in `python-vlc` mode; external VLC fallback is startup-parameter based.
- HDMI-CEC remains the recommended mechanism for TV power control only. TV volume via CEC is not implemented because support is device-dependent and much less reliable than controlling VLC directly.
- The client-wide multiplier is intended to be sent over the existing MQTT config topic `infoscreen/{client_id}/config` and is persisted locally in `src/config/client_settings.json` for the Display Manager.
- Fullscreen behavior:
- External VLC fallback uses `--fullscreen`.
- `python-vlc` mode enforces fullscreen on startup and retries fullscreen toggling briefly because video outputs may attach asynchronously.
- For a truly panel-free fullscreen (no taskbar), run the Display Manager inside a minimal kiosk X session or a dedicated user session without a desktop panel.
- Monitoring PID behavior:
- External VLC fallback reports the external `vlc` process PID.
- `python-vlc` mode is in-process, so monitoring reports the `display_manager.py` runtime PID.
## 🌐 Web Events
```json
{
"web": {
"url": "https://dashboard.example.com"
}
}
```
Opens webpage in Chromium kiosk mode (fullscreen, no UI).
## 🗂️ Project Structure
```
infoscreen-dev/
├── .env # Environment configuration
├── README.md # This file
├── IMPRESSIVE_INTEGRATION.md # Detailed presentation system docs
├── src/
│ ├── simclient.py # MQTT client (events, heartbeat, discovery)
│ ├── display_manager.py # Display controller (manages applications)
│ ├── requirements.txt # Python dependencies
│ ├── current_event.json # Current active event (runtime)
│ ├── config/ # Persistent client data
│ │ ├── client_uuid.txt
│ │ └── last_group_id.txt
│ ├── presentation/ # Downloaded presentation files
│ └── screenshots/ # Dashboard screenshots
├── scripts/
│ ├── start-dev.sh # Start development client
│ ├── start-display-manager.sh # Start Display Manager
│ ├── test-display-manager.sh # Interactive testing
│ ├── test-impressive.sh # Test Impressive (auto-quit mode)
│ ├── test-impressive-loop.sh # Test Impressive (loop mode)
│ ├── test-mqtt.sh # Test MQTT connectivity
│ ├── test-screenshot.sh # Test screenshot capture
│ ├── test-utc-timestamps.sh # Test event timing
│ └── present-pdf-auto-advance.sh # PDF presentation wrapper
└── logs/ # Application logs
```
## 🧪 Testing
### Test Display Manager
```bash
./scripts/test-display-manager.sh
```
Interactive menu for testing:
- Check Display Manager status
- Create test events (presentation, video, webpage)
- View active processes
- Cycle through different event types
### Test Impressive Presentation
**Single playthrough (auto-quit):**
```bash
./scripts/test-impressive.sh
```
**Loop mode (infinite):**
```bash
./scripts/test-impressive-loop.sh
```
### Test MQTT Connectivity
```bash
./scripts/test-mqtt.sh
```
Verifies MQTT broker connectivity and topic access.
### Test Screenshot Capture
```bash
./scripts/test-screenshot.sh
```
Captures test screenshot for dashboard monitoring.
**Manual test:**
```bash
export SCREENSHOT_ALWAYS=1
export SCREENSHOT_CAPTURE_INTERVAL=5
python3 src/display_manager.py &
sleep 15
ls -lh src/screenshots/
```
## 🔧 Configuration Details
### Environment Variables
All configuration is done via `.env` file in the project root. Copy `.env.template` to `.env` and adjust values for your environment.
#### Environment
- `ENV` - Environment mode: `development` or `production`
- **Important:** CEC TV control is automatically disabled in `development` mode
- `DEBUG_MODE` - Enable debug output: `1` (on) or `0` (off)
- `LOG_LEVEL` - Logging verbosity: `DEBUG`, `INFO`, `WARNING`, or `ERROR`
#### MQTT Configuration
- `MQTT_BROKER` - **Required.** MQTT broker IP address or hostname
- `MQTT_PORT` - MQTT broker port (default: `1883`)
- `MQTT_USERNAME` - Optional. MQTT authentication username (if broker requires it)
- `MQTT_PASSWORD` - Optional. MQTT authentication password (if broker requires it)
#### Timing Configuration (seconds)
- `HEARTBEAT_INTERVAL` - How often client sends status updates to server (default: `60`)
- `SCREENSHOT_INTERVAL` - How often simclient transmits screenshots via MQTT (default: `180`)
- `SCREENSHOT_CAPTURE_INTERVAL` - How often display_manager captures screenshots (default: `180`)
- `DISPLAY_CHECK_INTERVAL` - How often display_manager checks for new events (default: `15`)
#### Screenshot Configuration
- `SCREENSHOT_ALWAYS` - Force screenshot capture even when no display is active
- `0` - In production: capture only when a display process is active; in development: periodic idle captures are allowed so dashboard stays fresh
- `1` - Always capture screenshots (useful for testing)
#### File/API Server Configuration
These settings control how the client downloads presentation files and other content.
- `FILE_SERVER_HOST` - Optional. File server hostname/IP. Defaults to `MQTT_BROKER` if empty The client runs as two cooperating processes:
- `FILE_SERVER_PORT` - File server port (default: `8000`)
- `FILE_SERVER_SCHEME` - Protocol: `http` or `https` (default: `http`)
- `FILE_SERVER_BASE_URL` - Optional. Full base URL override (e.g., `http://192.168.1.100:8000`)
- When set, this takes precedence over HOST/PORT/SCHEME settings
#### HDMI-CEC TV Control (Optional) - `src/simclient.py`: MQTT communication, discovery, heartbeats, event ingestion, dashboard publishing, power intent intake.
Automatic TV power management based on event scheduling. - `src/display_manager.py`: display orchestration, HDMI-CEC, screenshots, local runtime health state.
- `CEC_ENABLED` - Enable automatic TV control: `true` or `false` Important runtime files:
- **Note:** Automatically disabled when `ENV=development` to avoid TV cycling during testing
- `CEC_DEVICE` - Target CEC device address (recommended: `0` for TV)
- `CEC_TURN_OFF_DELAY` - Seconds to wait before turning off TV after last event ends (default: `30`)
- `CEC_POWER_ON_WAIT` - Seconds to wait after power ON command for TV to boot (default: `5`)
- `CEC_POWER_OFF_WAIT` - Seconds to wait after power OFF command (default: `5`, increase for slower TVs)
### File Server URL Resolution - `src/current_event.json`: active event from scheduler.
- `src/current_process_health.json`: process health bridge for dashboard and monitoring.
- `src/power_intent_state.json`: latest validated MQTT power intent.
- `src/power_state.json`: last applied power action telemetry.
- `src/screenshots/`: shared screenshot directory.
The MQTT client ([src/simclient.py](src/simclient.py)) downloads presentation files and videos from the configured file server. ## Content Types
**URL Rewriting:** ### Presentations
- Event URLs using placeholder host `server` (e.g., `http://server:8000/...`) are automatically rewritten to the configured file server
- By default, file server = `MQTT_BROKER` host with port `8000` and `http` scheme
- Use `FILE_SERVER_BASE_URL` for complete override, or set individual HOST/PORT/SCHEME variables
**Best practices:** Presentations are rendered server-side to PDF and displayed with Impressive. Auto-advance, loop, page progress, and auto-progress are supported.
- Keep inline comments in `.env` after a space and `#` to avoid parsing issues
- Match the scheme (`http`/`https`) to your actual server configuration
- For HTTPS or non-standard ports, explicitly set `FILE_SERVER_SCHEME` and `FILE_SERVER_PORT`
### MQTT Topics See [IMPRESSIVE_INTEGRATION.md](IMPRESSIVE_INTEGRATION.md) for full behavior, event examples, and troubleshooting.
#### Client → Server ### Videos
- `infoscreen/discovery` - Initial client announcement
- `infoscreen/{client_id}/heartbeat` - Regular status updates
- `infoscreen/{client_id}/dashboard` - Dashboard payload v2 (grouped schema: message/content/runtime/metadata, includes screenshot base64, capture type, schema version)
- `infoscreen/{client_id}/health` - Process health state (`event_id`, process, pid, status)
- `infoscreen/{client_id}/logs/error` - Forwarded client error logs
- `infoscreen/{client_id}/logs/warn` - Forwarded client warning logs
#### Server → Client Video events support:
- `infoscreen/{client_id}/discovery_ack` - Server response with client ID
- `infoscreen/{client_id}/group_id` - Group assignment
- `infoscreen/events/{group_id}` - Event commands for group
### Client Identification - `url`
- `autoplay`
- `loop`
- `volume`
**Hardware Token:** SHA256 hash of: The Display Manager prefers `python-vlc`; if unavailable it falls back to the external VLC binary.
- CPU serial number
- MAC addresses (all network interfaces)
**Persistent UUID:** Stored in `src/config/client_uuid.txt` ### Web Pages
**Group Membership:** Stored in `src/config/last_group_id.txt` Web and WebUntis events are displayed in Chromium kiosk mode.
## 🔍 Troubleshooting ## TV Power Intent
### Display Manager doesn't start presentations Phase 1 TV power coordination uses the group topic:
**Check Impressive installation:** - `infoscreen/groups/{group_id}/power/intent`
```bash
which impressive
# If not found: sudo apt-get install impressive
```
**Check logs:** Key references:
```bash
tail -f logs/display_manager.log
```
**Check disk space:** - Frozen contract: [TV_POWER_INTENT_SERVER_CONTRACT_V1.md](TV_POWER_INTENT_SERVER_CONTRACT_V1.md)
```bash - Rollout and canary testing: [TV_POWER_RUNBOOK.md](TV_POWER_RUNBOOK.md)
df -h - Client implementation handoff: [TV_POWER_HANDOFF_CLIENT.md](TV_POWER_HANDOFF_CLIENT.md)
```
**Note:** PPTX conversion happens server-side via Gotenberg. The client only receives and displays pre-rendered PDF files. ## Testing
### Slides don't auto-advance Use the helper scripts in `scripts/` for focused tests:
**Verify event JSON:** - `./scripts/test-display-manager.sh`: event and process testing.
- `auto_advance: true` is set - `./scripts/test-impressive.sh`: single-play presentation.
- `slide_interval` is specified (default: 10) - `./scripts/test-impressive-loop.sh`: looping presentation.
- `./scripts/test-mqtt.sh`: MQTT broker connectivity.
- `./scripts/test-reboot-command.sh`: end-to-end reboot/shutdown command lifecycle canary (`accepted -> execution_started -> completed/failed`).
- `./scripts/test-screenshot.sh`: screenshot capture.
- `./scripts/test-hdmi-cec.sh`: HDMI-CEC diagnostics and runtime state inspection.
- `./scripts/test-power-intent.sh`: MQTT power intent publishing, rejection tests, and telemetry checks.
**Test Impressive directly:** ## Troubleshooting
```bash
./scripts/test-impressive.sh
```
### Presentation doesn't loop Use the specialist docs instead of treating this file as the full troubleshooting manual:
**Verify event JSON:** - Presentation and Impressive issues: [IMPRESSIVE_INTEGRATION.md](IMPRESSIVE_INTEGRATION.md)
- `loop: true` is set - HDMI-CEC setup and TV control: [HDMI_CEC_SETUP.md](HDMI_CEC_SETUP.md)
- Screenshot race condition and metadata sync: [SCREENSHOT_MQTT_FIX.md](SCREENSHOT_MQTT_FIX.md)
- Monitoring and dashboard behavior: [CLIENT_MONITORING_SETUP.md](CLIENT_MONITORING_SETUP.md)
- Developer-oriented MQTT/event details: [src/README.md](src/README.md)
**Test loop mode:** Quick checks:
```bash
./scripts/test-impressive-loop.sh
```
### File downloads fail - Follow logs: `tail -f logs/display_manager.log src/simclient.log`
- Inspect screenshots: `ls -lh src/screenshots/`
- Inspect power state: `cat src/power_intent_state.json` and `cat src/power_state.json`
- Restart services (systemd): `sudo systemctl restart infoscreen-simclient infoscreen-display`
- Restart services (dev): `./scripts/restart-all.sh`
Symptoms: ## Deployment
- `Failed to resolve 'server'` or `NameResolutionError` when downloading files
- `Invalid URL 'http # http or https://...'` in `logs/simclient.log`
What to check: For production you typically run both `simclient.py` and `display_manager.py` via systemd or Docker.
- Look for lines like `Lade Datei herunter von:` in `logs/simclient.log` to see the effective URL used
- Ensure the URL host is the MQTT broker IP (or your configured file server), not `server`
- Verify `.env` values dont include inline comments as part of the value (e.g., keep `FILE_SERVER_SCHEME=http` on its own line)
Fixes: - Container setup: [src/CONTAINER_TRANSITION.md](src/CONTAINER_TRANSITION.md)
- If your API is on the same host as the broker: leave `FILE_SERVER_HOST` empty (defaults to `MQTT_BROKER`), keep `FILE_SERVER_PORT=8000`, and set `FILE_SERVER_SCHEME=http` or `https` - Production compose file: [src/docker-compose.production.yml](src/docker-compose.production.yml)
- To override fully, set `FILE_SERVER_BASE_URL` (e.g., `http://192.168.1.100:8000`); this takes precedence over host/port/scheme - Display manager architecture: [src/DISPLAY_MANAGER.md](src/DISPLAY_MANAGER.md)
- After changing `.env`, restart the simclient process
Expected healthy log sequence: If running directly on the host, ensure:
- `Lade Datei herunter von: http://<broker-ip>:8000/...`
- Followed by `"GET /... HTTP/1.1" 200` and `Datei erfolgreich heruntergeladen:`
### VLC hardware decode / renderer issues - the display session is available (`DISPLAY` / `XAUTHORITY` for X11),
- the screenshot tools for your session type are installed,
- `ENV=production` is set when you want HDMI-CEC active.
If you see messages like: ## Documentation Map
``` ### Operator / Deployment
[h264_v4l2m2m @ ...] Could not find a valid device
[h264_v4l2m2m @ ...] can't configure decoder
[... ] avcodec decoder error: cannot start codec (h264_v4l2m2m)
```
that indicates libVLC / ffmpeg attempted to use the platform V4L2 M2M hardware decoder but the kernel/device isn't available. Options to resolve: - [QUICK_REFERENCE.md](QUICK_REFERENCE.md)
- [HDMI_CEC_SETUP.md](HDMI_CEC_SETUP.md)
- [TV_POWER_RUNBOOK.md](TV_POWER_RUNBOOK.md)
- [CLIENT_MONITORING_SETUP.md](CLIENT_MONITORING_SETUP.md)
- [CHANGELOG.md](CHANGELOG.md)
- Enable the V4L2 M2M codec driver on the system (platform-specific; on Raspberry Pi ensure correct kernel/firmware and codec modules are loaded). Check `v4l2-ctl --list-devices` and `ls /dev/video*` after installing `v4l-utils`. ### Feature-Specific
- Disable hardware decoding so libVLC/ffmpeg uses software decoding (reliable but higher CPU). You can test this by launching the `vlc` binary with:
```bash - [IMPRESSIVE_INTEGRATION.md](IMPRESSIVE_INTEGRATION.md)
vlc --avcodec-hw=none 'http://<your-video-url>' - [SCREENSHOT_MQTT_FIX.md](SCREENSHOT_MQTT_FIX.md)
``` - [SCHEDULER_FIELDS_SUPPORT.md](SCHEDULER_FIELDS_SUPPORT.md)
- [SERVER_VOLUME_CONTROL_SETUP.md](SERVER_VOLUME_CONTROL_SETUP.md)
Or modify `src/display_manager.py` to create the libVLC instance with software-decoding forced: ### Development / Internal
```python - [src/README.md](src/README.md)
instance = vlc.Instance('--avcodec-hw=none', '--no-video-title-show', '--no-video-deco') - [src/DISPLAY_MANAGER.md](src/DISPLAY_MANAGER.md)
``` - [src/IMPLEMENTATION_SUMMARY.md](src/IMPLEMENTATION_SUMMARY.md)
- [TV_POWER_COORDINATION_TASKLIST.md](TV_POWER_COORDINATION_TASKLIST.md)
- [TV_POWER_HANDOFF_SERVER.md](TV_POWER_HANDOFF_SERVER.md)
- [SERVER_TEAM_ACTIONS.md](SERVER_TEAM_ACTIONS.md)
This is the fastest workaround if hardware decode is not required or not available on the device. ## Contributing
### MQTT connection issues Before changing runtime behavior:
**Test broker connectivity:** - test with the relevant helper scripts,
```bash - verify logs stay clean,
./scripts/test-mqtt.sh - update the specialist doc for the feature you changed.
```
### MQTT reconnect and heartbeat behavior When editing AI assistant guidance files:
- On reconnect, the client re-subscribes all topics in `on_connect` and re-sends discovery to re-register. - keep `.github/copilot-instructions.md` policy-focused,
- Heartbeats are sent only when connected. During brief reconnect windows, Paho may return rc=4 (`NO_CONN`). - follow its "Instruction File Design Rules" section,
- A single rc=4 warning after broker restarts or short network stalls is expected; the next heartbeat usually succeeds. - avoid turning it into a shadow README.
- Investigate only if rc=4 repeats across multiple intervals without subsequent successful heartbeat logs.
### Monitoring and UTC timestamps Recent project history is tracked in [CHANGELOG.md](CHANGELOG.md).
Client-side monitoring is implemented with a health-state bridge between `display_manager.py` and `simclient.py`. ## License
- Health bridge file: `src/current_process_health.json`
- Local monitoring log: `logs/monitoring.log`
- Process states: `running`, `crashed`, `stopped`
- Restart tracking: bounded restart attempts per active event
UTC timestamp policy:
- `display_manager.log`, `simclient.log`, and `monitoring.log` are written in UTC (`...Z`)
- MQTT payload timestamps (heartbeat/dashboard/health/log messages) are UTC ISO timestamps
- Screenshot metadata timestamps are UTC ISO timestamps
This prevents daylight-saving and timezone drift issues across clients.
### VLC/PulseAudio warnings in remote sessions
Warnings such as `pulse audio output error: overflow, flushing` can appear when testing through remote desktop/audio forwarding (for example, NoMachine) or virtual/dummy display setups.
- If playback and audio are stable on a real HDMI display, this is usually non-fatal.
- If warnings appear only in remote sessions, treat them as environment-related rather than a core video playback bug.
### Screenshots not uploading
**Check which session type you're running:**
```bash
echo $WAYLAND_DISPLAY # Set if Wayland
echo $DISPLAY # Set if X11
echo $XAUTHORITY # Should point to ~/.Xauthority for X11 captures
```
If `DISPLAY` is empty for non-interactive starts (systemd/nohup/ssh), the display manager now falls back to `:0` and tries `~/.Xauthority` automatically.
**Install appropriate screenshot tool:**
```bash
# For X11:
sudo apt-get install scrot imagemagick
# For Wayland:
sudo apt-get install grim gnome-screenshot
```
**Test screenshot capture:**
```bash
export SCREENSHOT_ALWAYS=1 # Force capture even without active event
./scripts/test-screenshot.sh
ls -lh src/screenshots/
```
**Check logs for session detection:**
```bash
tail -f logs/display_manager.log | grep -i screenshot
# Should show: "Screenshot session=wayland" or "Screenshot session=x11"
```
**If you see stale dashboard images after restarts:**
```bash
cat src/screenshots/meta.json
stat src/screenshots/latest.jpg
```
- If `send_immediately` is stuck `true` for old metadata, restart both processes so simclient consumes and clears it.
- If `latest.jpg` timestamp does not move while new `screenshot_*.jpg` files appear, update to latest code (fix for periodic `latest.jpg` update path) and restart display_manager.
**Verify simclient is reading screenshots:**
```bash
tail -f logs/simclient.log | grep -i screenshot
# Should show: "Dashboard published: schema=2.0 type=periodic screenshot=latest.jpg"
# For event transitions: "Dashboard published: schema=2.0 type=event_start ..."
```
## 📚 Documentation
- **IMPRESSIVE_INTEGRATION.md** - Detailed presentation system documentation
- **HDMI_CEC_SETUP.md** - HDMI-CEC setup and troubleshooting
- **src/DISPLAY_MANAGER.md** - Display Manager architecture
- **src/IMPLEMENTATION_SUMMARY.md** - Implementation overview
- **src/README.md** - MQTT client documentation
## 🔐 Security
- Hardware-based client identification (non-spoofable)
- Configurable MQTT authentication
- Local-only file storage
- No sensitive data in logs
## 🚢 Production Deployment
### Systemd Service
Create `/etc/systemd/system/infoscreen-display.service`:
```ini
[Unit]
Description=Infoscreen Display Manager
After=network.target
[Service]
Type=simple
User=olafn
WorkingDirectory=/home/olafn/infoscreen-dev/src
Environment="DISPLAY=:0"
Environment="XAUTHORITY=/home/olafn/.Xauthority"
ExecStart=/home/olafn/infoscreen-dev/venv/bin/python3 /home/olafn/infoscreen-dev/src/display_manager.py
Restart=always
RestartSec=10
[Install]
WantedBy=multi-user.target
```
Enable and start:
```bash
sudo systemctl daemon-reload
sudo systemctl enable infoscreen-display
sudo systemctl start infoscreen-display
sudo systemctl status infoscreen-display
```
### Auto-start on Boot
Both services (simclient.py and display_manager.py) should start automatically:
1. **simclient.py** - MQTT communication, event management
2. **display_manager.py** - Display application controller
Create similar systemd service for simclient.py.
### Docker Deployment (Alternative)
```bash
docker-compose -f src/docker-compose.production.yml up -d
```
See `src/CONTAINER_TRANSITION.md` for details.
## 📝 Development
### Development Mode
Set in `.env`:
```bash
ENV=development
DEBUG_MODE=1
LOG_LEVEL=DEBUG
HEARTBEAT_INTERVAL=10
```
### Start Development Client
```bash
./scripts/start-dev.sh
```
### View Logs
```bash
# Display Manager
tail -f logs/display_manager.log
# MQTT Client
tail -f logs/simclient.log
# Both
tail -f logs/*.log
```
## 📺 HDMI-CEC TV Control
The system includes automatic TV power control via HDMI-CEC. The TV turns on when events start and turns off (with delay) when no events are active.
### Development mode behavior
- When `ENV=development`, HDMI-CEC is automatically disabled by the Display Manager to avoid constantly switching the TV during development.
- The test script `scripts/test-hdmi-cec.sh` also respects this: menu option 5 (Display Manager CEC integration) will detect development mode and skip the integration test. Manual options (14) still work for direct cec-client testing.
To test CEC end-to-end, temporarily set `ENV=production` in `.env` and restart the Display Manager, or use the manual commands in the test script.
### Quick Setup
```bash
# Install CEC utilities
sudo apt-get install cec-utils
# Test CEC connection
echo "scan" | cec-client -s -d 1
# Configure in .env
CEC_ENABLED=true
CEC_DEVICE=0 # Use 0 for best performance
CEC_TURN_OFF_DELAY=30
CEC_POWER_ON_WAIT=5 # Adjust if TV is slow to boot
CEC_POWER_OFF_WAIT=2
```
### Features
- **Auto Power On**: TV turns on when event starts
- **Auto Power Off**: TV turns off after configurable delay when events end
- **Smart Switching**: TV stays on when switching between events
- **Configurable Delay**: Prevent rapid on/off cycles
### Testing
```bash
echo "on 0" | cec-client -s -d 1 # Turn on
echo "standby 0" | cec-client -s -d 1 # Turn off
echo "pow 0" | cec-client -s -d 1 # Check status
```
## 🤝 Contributing
1. Test changes with `./scripts/test-display-manager.sh`
2. Verify MQTT communication with `./scripts/test-mqtt.sh`
3. Update documentation
4. Submit pull request
## 📄 License
[Add your license here] [Add your license here]
## 🆘 Support
For issues or questions:
1. Check logs in `logs/` directory
2. Review troubleshooting section
3. Test individual components with test scripts
4. Check MQTT broker connectivity
---
**Last Updated:** March 2026
**Status:** ✅ Production Ready
**Tested On:** Raspberry Pi 5, Raspberry Pi OS (Bookworm)
## Recent Changes
### November 2025
- Screenshot pipeline implemented with a two-process model (`display_manager.py` capture, `simclient.py` transmission).
- Wayland/X11 screenshot tool fallback chains added.
- Dashboard payload format extended with screenshot and system metadata.
- Scheduler event type support extended (`presentation`, `webuntis`, `webpage`, `website`).
- Website autoscroll support added (CDP injection + extension fallback).
### March 2026
- Event-trigger screenshots (`event_start`, `event_stop`) hardened against periodic overwrite races.
- `latest.jpg` and `meta.json` synchronization improved for reliable dashboard updates.
- Stale/invalid pending trigger metadata now self-heals instead of blocking periodic updates.
- Display environment fallbacks (`DISPLAY=:0`, `XAUTHORITY`) improved for non-interactive starts.
- Development mode allows periodic idle captures to keep dashboard previews fresh when no event is active.
- Event-triggered screenshots added: `display_manager` captures a screenshot shortly after every event start and stop and signals `simclient` via `meta.json` (`send_immediately=true`). Capture delays are content-type aware (presentation: 4s, video: 2s, web: 5s, configurable via `.env`).
- `simclient` screenshot service thread now runs on a 1-second tick instead of a blocking sleep, so triggered sends fire within ≤1s of the `meta.json` signal.
- Dashboard payload migrated to grouped v2 schema (`message`, `content`, `runtime`, `metadata`). Legacy flat fields removed. `metadata.schema_version` is `"2.0"`. Payload assembly centralized in `_build_dashboard_payload()`.
- Tunable trigger delays added: `SCREENSHOT_TRIGGER_DELAY_PRESENTATION`, `SCREENSHOT_TRIGGER_DELAY_VIDEO`, `SCREENSHOT_TRIGGER_DELAY_WEB`.
- Rapid event switches handled safely: pending trigger timer is cancelled and replaced when a new event starts before the delay expires.

127
SERVER_TEAM_ACTIONS.md Normal file
View File

@@ -0,0 +1,127 @@
# Server Team Action Items — Infoscreen Client
This document lists everything the server/infrastructure/frontend team must implement to complete the client integration. The client-side code is production-ready for all items listed here.
---
## 1. MQTT Broker Hardening (prerequisite for everything else)
- Disable anonymous access on the broker.
- Create one broker account **per client device**:
- Username convention: `infoscreen-client-<uuid-prefix>` (e.g. `infoscreen-client-9b8d1856`)
- Provision the password to the device `.env` as `MQTT_PASSWORD_BROKER=`
- Create a **server/publisher account** (e.g. `infoscreen-server`) for all server-side publishes.
- Enforce ACLs:
| Topic | Publisher |
|---|---|
| `infoscreen/{uuid}/commands` | server only |
| `infoscreen/{uuid}/command` (alias) | server only |
| `infoscreen/{uuid}/group_id` | server only |
| `infoscreen/events/{group_id}` | server only |
| `infoscreen/groups/+/power/intent` | server only |
| `infoscreen/{uuid}/commands/ack` | client only |
| `infoscreen/{uuid}/command/ack` | client only |
| `infoscreen/{uuid}/heartbeat` | client only |
| `infoscreen/{uuid}/health` | client only |
| `infoscreen/{uuid}/logs/#` | client only |
| `infoscreen/{uuid}/service_failed` | client only |
---
## 2. Reboot / Shutdown Command — Ack Lifecycle
Client publishes ack status updates to two topics per command (canonical + transitional alias):
- `infoscreen/{uuid}/commands/ack`
- `infoscreen/{uuid}/command/ack`
**Ack payload schema (v1, frozen):**
```json
{
"command_id": "07aab032-53c2-45ef-a5a3-6aa58e9d9fae",
"status": "accepted | execution_started | completed | failed",
"error_code": null,
"error_message": null
}
```
**Status lifecycle:**
| Status | When | Notes |
|---|---|---|
| `accepted` | Command received and validated | Immediate |
| `execution_started` | Helper invoked | Immediate after accepted |
| `completed` | Execution confirmed | For `reboot_host`: arrives after reconnect (1090 s after `execution_started`) |
| `failed` | Helper returned error | `error_code` and `error_message` will be set |
**Server must:**
- Track `command_id` through the full lifecycle and update status in DB/UI.
- Surface `failed` + `error_code` to the operator UI.
- Expect `reboot_host` `completed` to arrive after a reconnect delay — do not treat the gap as a timeout.
- Use `expires_at` from the original command to determine when to abandon waiting.
---
## 3. Health Dashboard — Broker Connection Fields (Gap 2)
Every `infoscreen/{uuid}/health` payload now includes a `broker_connection` block:
```json
{
"timestamp": "2026-04-05T08:00:00.000000+00:00",
"expected_state": { "event_id": 42 },
"actual_state": {
"process": "display_manager",
"pid": 1234,
"status": "running"
},
"broker_connection": {
"broker_reachable": true,
"reconnect_count": 2,
"last_disconnect_at": "2026-04-04T10:30:00Z"
}
}
```
**Server must:**
- Display `reconnect_count` and `last_disconnect_at` per device in the health dashboard.
- Implement alerting heuristic:
- **All** clients go silent simultaneously → likely broker outage, not device crash.
- **Single** client goes silent → device crash, network failure, or process hang.
---
## 4. Service-Failed MQTT Notification (Gap 3)
When systemd gives up restarting a service after repeated crashes (`StartLimitBurst` exceeded), the client automatically publishes a **retained** message:
**Topic:** `infoscreen/{uuid}/service_failed`
**Payload:**
```json
{
"event": "service_failed",
"unit": "infoscreen-simclient.service",
"client_uuid": "9b8d1856-ff34-4864-a726-12de072d0f77",
"failed_at": "2026-04-05T08:00:00Z"
}
```
**Server must:**
- Subscribe to `infoscreen/+/service_failed` on startup (retained — message survives broker restart).
- Alert the operator immediately when this topic receives a payload.
- **Clear the retained message** once the device is acknowledged or recovered:
```
mosquitto_pub -t "infoscreen/{uuid}/service_failed" -n --retain
```
---
## 5. No Server Action Required
These items are fully implemented client-side and require no server changes:
- systemd watchdog (`WatchdogSec=60`) — hangs detected and process restarted automatically.
- Command deduplication — `command_id` deduplicated with 24-hour TTL.
- Ack retry backoff — client retries ack publish on broker disconnect until `expires_at`.
- Mock helper / test mode (`COMMAND_MOCK_REBOOT_IMMEDIATE_COMPLETE`) — development only.

88
TODO.md
View File

@@ -25,10 +25,98 @@ This file tracks higher-level todos and design notes for the infoscreen client.
- `set_volume()` issues appropriate CEC commands and returns success/failure. - `set_volume()` issues appropriate CEC commands and returns success/failure.
- Document any platform limitations (some TVs don't support absolute volume via CEC). - Document any platform limitations (some TVs don't support absolute volume via CEC).
## Systemd crash recovery (server team recommendation)
Reliable restart-on-crash for both processes must be handled by **systemd**, not by in-process watchdogs or ad-hoc shell scripts.
### What needs to be done
- `display_manager`: already has `scripts/infoscreen-display.service` with `Restart=on-failure` / `RestartSec=10`.
- Review `RestartSec` — may want a short backoff (e.g. 515 s) and `StartLimitIntervalSec` + `StartLimitBurst` to prevent thrash loops.
- `simclient`: **no service unit exists yet**.
- Create `scripts/infoscreen-simclient.service` modelled on the display service.
- Use `Restart=on-failure` and `RestartSec=10`.
- Wire `EnvironmentFile=/home/olafn/infoscreen-dev/.env` so the unit picks up `.env` variables automatically.
- Set `After=network-online.target` so MQTT connection is not attempted before the network is ready.
- Both units should be installed and enabled via `src/pi-setup.sh` (`systemctl enable --now`).
- After enabling, verify crash recovery with `kill -9 <pid>` and confirm systemd restarts the process within `RestartSec`.
### Acceptance criteria
- Both `simclient` and `display_manager` restart automatically within 15 s of any non-intentional exit.
- `systemctl status` shows `active (running)` after a crash-induced restart.
- `journalctl -u infoscreen-simclient` captures all process output (stdout + stderr).
- `pi-setup.sh` idempotently installs and enables both units.
### Notes
- Use `Restart=on-failure` — restarts on crashes and signals but not on clean `systemctl stop`, preserving operator control during deployments.
- The reboot/shutdown command flow publishes `execution_started` and then exits intentionally; systemd will restart simclient, and the recovery logic in the heartbeat loop will emit `completed` on reconnect. This is the intended lifecycle.
## Process health observability gaps
Two scenarios are currently undetected or ambiguous from the server/frontend perspective.
### Gap 1: Hung / deadlocked process ✅ implemented
**Solution implemented:** Zero-dependency `_sd_notify()` helper writes directly to `NOTIFY_SOCKET` (raw Unix socket, no extra package). `READY=1` is sent when the heartbeat loop starts; `WATCHDOG=1` is sent every 5 s in the main loop iteration. The service unit uses `Type=notify` + `WatchdogSec=60` — if the main loop freezes for 60 s, systemd kills and restarts the process automatically.
**To apply on device:**
```bash
sudo cp ~/infoscreen-dev/scripts/infoscreen-simclient.service /etc/systemd/system/
sudo systemctl daemon-reload
sudo systemctl restart infoscreen-simclient
```
### Gap 2: MQTT broker unreachable vs. simclient dead ✅ implemented (client side)
**Solution implemented:** `connection_state` dict expanded with `reconnect_count` and `connect_count`. `publish_health_message()` now accepts `connection_state` and appends a `broker_connection` block to every health payload:
```json
"broker_connection": {
"broker_reachable": true,
"reconnect_count": 2,
"last_disconnect_at": "2026-04-04T10:00:00Z"
}
```
`broker_reachable` = `true` when MQTT is connected at publish time.
`reconnect_count` increments on every reconnection (first connect does not count).
`last_disconnect_at` is the UTC timestamp of the most recent disconnect.
**Server-side action still needed:**
- Display `reconnect_count` and `last_disconnect_at` in the frontend health dashboard.
- Alert heuristic: if **all** clients go silent simultaneously → likely broker issue; if only one → likely device issue.
### Gap 3: systemd gives up (StartLimitBurst exceeded) ✅ implemented
**Solution implemented:** `scripts/infoscreen-notify-failure@.service` (template unit) + `scripts/infoscreen-notify-failure.sh`. Both main units have `OnFailure=infoscreen-notify-failure@%n.service`. When systemd marks a service `failed`, the notifier runs once, reads broker credentials from `.env`, reads `client_uuid.txt`, and publishes a retained JSON payload to `infoscreen/{uuid}/service_failed` via `mosquitto_pub`.
**To apply on device:**
```bash
sudo cp ~/infoscreen-dev/scripts/infoscreen-notify-failure@.service /etc/systemd/system/
sudo cp ~/infoscreen-dev/scripts/infoscreen-simclient.service /etc/systemd/system/
sudo cp ~/infoscreen-dev/scripts/infoscreen-display.service /etc/systemd/system/
sudo systemctl daemon-reload
sudo systemctl restart infoscreen-simclient infoscreen-display
```
**Topic:** `infoscreen/{client_uuid}/service_failed` (retained)
**Payload:** `{"event":"service_failed","unit":"infoscreen-simclient.service","client_uuid":"...","failed_at":"2026-..."}`
## Next-high-level items ## Next-high-level items
- Add environment-controlled libVLC hw-accel toggle (`VLC_HW_ACCEL=1|0`) to `display_manager.py` so software decode can be forced when necessary. - Add environment-controlled libVLC hw-accel toggle (`VLC_HW_ACCEL=1|0`) to `display_manager.py` so software decode can be forced when necessary.
- Add automated tests for video start/stop lifecycle (mock python-vlc) to ensure resources are released on event end. - Add automated tests for video start/stop lifecycle (mock python-vlc) to ensure resources are released on event end.
- Add allowlist validation for `website` / `webuntis` event URLs
- Goal: restrict browser-based events to approved hosts and schemes even if an authenticated publisher sends an unsafe URL.
- Ideas / approaches:
- Add env-configurable allowlists for general website hosts and WebUntis hosts.
- Allow only `https` by default and reject `file:`, `data:`, `javascript:`, loopback, and private-address URLs unless explicitly allowed.
- Enforce the same validation on both server-side payload generation and client-side execution in `display_manager.py`.
- Acceptance criteria:
- Unsafe or unapproved URLs are rejected before Chromium is launched.
- WebUntis and approved website events still work with explicit allowlist configuration.
## Notes ## Notes

View File

@@ -0,0 +1,206 @@
# TV Power Coordination Task List (Server + Client)
## Goal
Prevent unintended TV power-off during adjacent events while enabling coordinated, server-driven power intent via MQTT with robust client-side fallback.
## Scope
- Server publishes explicit TV power intent and event-window context.
- Client executes HDMI-CEC power actions with timer-safe behavior.
- Client falls back to local schedule/end-time logic if server intent is missing or stale.
- Existing event playback behavior remains backward compatible.
## Ownership Proposal
- Server team: Scheduler integration, power-intent publisher, reliability semantics.
- Client team: MQTT handler, state machine, CEC execution, fallback and observability.
---
## 1. MQTT Contract (Shared Spec)
### 1.1 Topics
- Command/intent topic (retained):
- infoscreen/{client_id}/power/intent
- Optional group-wide command topic (retained):
- infoscreen/groups/{group_id}/power/intent
- Client state/ack topic:
- infoscreen/{client_id}/power/state
### 1.2 QoS and retain
- intent topics: QoS 1, retained=true
- state topic: QoS 0 or 1 (recommend QoS 0 initially), retained=false
### 1.3 Intent payload schema (v1)
```json
{
"schema_version": "1.0",
"intent_id": "uuid-or-monotonic-id",
"issued_at": "2026-03-31T12:00:00Z",
"expires_at": "2026-03-31T12:10:00Z",
"target": {
"client_id": "optional-if-group-topic",
"group_id": "optional"
},
"power": {
"desired_state": "on",
"reason": "event_window_active",
"grace_seconds": 30
},
"event_window": {
"start": "2026-03-31T12:00:00Z",
"end": "2026-03-31T13:00:00Z"
}
}
```
### 1.4 State payload schema (client -> server)
```json
{
"schema_version": "1.0",
"intent_id": "last-applied-intent-id",
"client_id": "...",
"reported_at": "2026-03-31T12:00:01Z",
"power": {
"applied_state": "on",
"source": "mqtt_intent|local_fallback",
"result": "ok|skipped|error",
"detail": "free text"
}
}
```
### 1.5 Idempotency and ordering rules
- Client applies only newest valid intent by issued_at then intent_id tie-break.
- Duplicate intent_id must be ignored after first successful apply.
- Expired intents must not trigger new actions.
- Retained intent must be immediately usable after client reconnect.
### 1.6 Safety rules
- desired_state=on cancels any pending delayed-off timer before action.
- desired_state=off may schedule delayed-off, never immediate off during an active event window.
- If payload is malformed, client logs and ignores it.
---
## 2. Server Team Task List
### 2.1 Contract + scheduler mapping
- Finalize field names and UTC timestamp format with client team.
- Define when scheduler emits on/off intents for adjacent/overlapping events.
- Ensure contiguous events produce uninterrupted desired_state=on coverage.
### 2.2 Publisher implementation
- Add publisher for infoscreen/{client_id}/power/intent.
- Support retained messages and QoS 1.
- Include expires_at for stale-intent protection.
- Emit new intent_id for every semantic state transition.
### 2.3 Reconnect and replay behavior
- On scheduler restart, republish current effective intent as retained.
- On event edits/cancellations, publish replacement retained intent.
### 2.4 Conflict policy
- Define precedence when both group and per-client intents exist.
- Recommended: per-client overrides group intent.
### 2.5 Monitoring and diagnostics
- Record publish attempts, broker ack results, and active retained payload.
- Add operational dashboard panels for intent age and last transition.
### 2.6 Server acceptance criteria
- Adjacent event windows do not produce off intent between events.
- Reconnect test: fresh client receives retained intent and powers correctly.
- Expired intent is never acted on by a conforming client.
---
## 3. Client Team Task List
### 3.1 MQTT subscription + parsing
- Subscribe to infoscreen/{client_id}/power/intent.
- Optionally subscribe to infoscreen/groups/{group_id}/power/intent.
- Parse schema_version=1.0 payload with strict validation.
### 3.2 Power state controller integration
- Add power-intent handler in display manager path that owns HDMI-CEC decisions.
- On desired_state=on:
- cancel delayed-off timer
- call CEC on only if needed
- On desired_state=off:
- schedule delayed off using configured grace_seconds (or local default)
- re-check active event before executing off
### 3.3 Fallback behavior (critical)
- If MQTT unreachable, intent missing, invalid, or expired:
- fall back to existing local event-time logic
- use event end as off trigger with existing delayed-off safety
- If local logic sees active event, enforce cancel of pending off timer.
### 3.4 Adjacent-event race hardening
- Guarantee pending off timer is canceled on any newly active event.
- Ensure event switch path never requests off while next event is active.
- Add explicit logging for timer create/cancel/fire with reason and event_id.
### 3.5 State publishing
- Publish apply results to infoscreen/{client_id}/power/state.
- Include source=mqtt_intent or local_fallback.
- Include last intent_id and result details for troubleshooting.
### 3.6 Config flags
- Add feature toggle:
- POWER_CONTROL_MODE=local|mqtt|hybrid (recommend default: hybrid)
- hybrid behavior:
- prefer valid mqtt intent
- automatically fall back to local logic
### 3.7 Client acceptance criteria
- Adjacent events: no unintended off between two active windows.
- Broker outage during event: TV remains on via local fallback.
- Broker recovery: retained intent reconciles state without oscillation.
- Duplicate/old intents do not cause repeated CEC toggles.
---
## 4. Integration Test Matrix (Joint)
## 4.1 Happy paths
- Single event start -> on intent -> TV on.
- Event end -> off intent -> delayed off -> TV off.
- Adjacent events (end==start or small gap) -> uninterrupted TV on.
## 4.2 Failure paths
- Broker down before event start.
- Broker down during active event.
- Malformed retained intent at reconnect.
- Delayed off armed, then new event starts before timer fires.
## 4.3 Consistency checks
- Client state topic reflects actual applied source and result.
- Logs include intent_id correlation across server and client.
---
## 5. Rollout Plan
### Phase 1: Contract and feature flags
- Freeze schema and topic naming.
- Ship client support behind POWER_CONTROL_MODE=hybrid.
### Phase 2: Server publisher rollout
- Enable publishing for test group only.
- Verify retained and reconnect behavior.
### Phase 3: Production enablement
- Enable hybrid mode fleet-wide.
- Observe for 1 week: off-between-adjacent-events incidents must be zero.
### Phase 4: Optional tightening
- If metrics are stable, evaluate mqtt-first policy while retaining local safety fallback.
---
## 6. Definition of Done
- Shared MQTT contract approved by both teams.
- Server and client implementations merged with tests.
- Adjacent-event regression test added and passing.
- Operational runbook updated (topics, payloads, fallback behavior, troubleshooting).
- Production monitoring confirms no unintended mid-schedule TV power-off.

View File

@@ -0,0 +1,95 @@
# Client Handoff: TV Power Coordination
## Purpose
Implement robust client-side TV power control that applies server MQTT intents when valid and falls back to local event timing when server/broker data is missing or stale.
## Source of Truth
- Shared full plan: TV_POWER_COORDINATION_TASKLIST.md
## Scope (Client Team)
- Intent subscription/validation
- CEC state transitions and timer cancellation safety
- Hybrid fallback using local event windows
- Power state acknowledgment publishing
## MQTT Contract (Client Responsibilities)
### Subscribe
- infoscreen/{client_id}/power/intent
- Optional: infoscreen/groups/{group_id}/power/intent
### Publish state
- infoscreen/{client_id}/power/state
### State Payload (v1)
```json
{
"schema_version": "1.0",
"intent_id": "last-applied-intent-id",
"client_id": "...",
"reported_at": "2026-03-31T12:00:01Z",
"power": {
"applied_state": "on",
"source": "mqtt_intent|local_fallback",
"result": "ok|skipped|error",
"detail": "free text"
}
}
```
## Required Runtime Rules
### Intent Validation and Ordering
- Accept only schema_version=1.0 (or explicitly version-gated supported set).
- Ignore malformed payloads.
- Ignore expired intents (expires_at in past).
- Apply only newest valid intent by issued_at, intent_id tie-break.
- Deduplicate already-applied intent_id.
### Power Action Safety
- desired_state=on:
- cancel pending delayed-off timer immediately
- turn on via CEC only if needed
- desired_state=off:
- schedule delayed off (grace_seconds or local default)
- re-check active event before executing actual off
### Fallback (Critical)
- If MQTT unavailable, intent missing, invalid, or stale:
- use existing local event start/end logic
- use event end as off trigger plus delayed-off safety
- Any active event must cancel pending off timers.
## Configuration
- Add POWER_CONTROL_MODE with values:
- local
- mqtt
- hybrid (recommended default)
### Hybrid Mode
- Prefer valid MQTT intent.
- Automatically fall back to local schedule logic when intent channel is not trustworthy.
## Implementation Tasks
1. Add intent topic handlers and schema validation.
2. Integrate intent application into display power control path.
3. Add timer race hardening for adjacent event transitions.
4. Add fallback decision branch for stale/missing intents.
5. Add power state publisher with intent_id/source/result.
6. Add logs for timer arm/cancel/fire with reason and event_id.
7. Add tests for adjacent events, broker outage, reconnect, duplicate intent.
## Acceptance Criteria
1. No unintended TV off between adjacent events.
2. Broker outage during active event does not power off TV prematurely.
3. Reconnect with retained intent reconciles state without oscillation.
4. Duplicate/old intents do not trigger repeated CEC toggles.
5. State messages clearly show mqtt_intent vs local_fallback source.
## Target Integration Points
- Main runtime orchestration: src/display_manager.py
- MQTT plumbing and topic handlers: src/simclient.py
## Operational Notes
- Keep fallback logic enabled even after MQTT rollout.
- Ensure all new timestamps are UTC ISO format.

View File

@@ -0,0 +1,83 @@
# Server Handoff: TV Power Coordination
## Purpose
Implement server-side MQTT power intent publishing so clients can keep TVs on across adjacent events and power off safely after schedules end.
## Source of Truth
- Shared full plan: TV_POWER_COORDINATION_TASKLIST.md
## Scope (Server Team)
- Scheduler-to-intent mapping
- MQTT publishing semantics (retain, QoS, expiry)
- Conflict handling (group vs client)
- Observability for intent lifecycle
## MQTT Contract (Server Responsibilities)
### Topics
- Primary (per-client): infoscreen/{client_id}/power/intent
- Optional (group-level): infoscreen/groups/{group_id}/power/intent
### Delivery Semantics
- QoS: 1
- retained: true
- Always publish UTC timestamps (ISO 8601 with Z)
### Intent Payload (v1)
```json
{
"schema_version": "1.0",
"intent_id": "uuid-or-monotonic-id",
"issued_at": "2026-03-31T12:00:00Z",
"expires_at": "2026-03-31T12:10:00Z",
"target": {
"client_id": "optional-if-group-topic",
"group_id": "optional"
},
"power": {
"desired_state": "on",
"reason": "event_window_active",
"grace_seconds": 30
},
"event_window": {
"start": "2026-03-31T12:00:00Z",
"end": "2026-03-31T13:00:00Z"
}
}
```
## Required Behavior
### Adjacent/Overlapping Events
- Never publish an intermediate off intent when windows are contiguous/overlapping.
- Maintain continuous desired_state=on coverage across adjacent windows.
### Reconnect/Restart
- On scheduler restart, republish effective retained intent.
- On event edits/cancellations, replace retained intent with a fresh intent_id.
### Conflict Policy
- If both group and client intent exist: per-client overrides group.
### Expiry Safety
- expires_at must be set for every intent.
- Server should avoid publishing already-expired intents.
## Implementation Tasks
1. Add scheduler mapping layer that computes effective desired_state per client timeline.
2. Add intent publisher with retained QoS1 delivery.
3. Generate unique intent_id for each semantic transition.
4. Emit issued_at/expires_at and event_window consistently in UTC.
5. Add group-vs-client precedence logic.
6. Add logs/metrics for publish success, retained payload age, and transition count.
7. Add integration tests for adjacent events and reconnect replay.
## Acceptance Criteria
1. Adjacent events do not create OFF gap intents.
2. Fresh client receives retained intent after reconnect and gets correct desired state.
3. Intent payloads are schema-valid, UTC-formatted, and include expiry.
4. Publish logs and metrics allow intent timeline reconstruction.
## Operational Notes
- Keep intent publishing idempotent and deterministic.
- Preserve backward compatibility while clients run in hybrid mode.

View File

@@ -0,0 +1,163 @@
# TV Power Intent — Server Contract v1 (Phase 1)
> This document is the stable reference for client-side implementation.
> The server implementation is validated and frozen at this contract.
> Last validated: 2026-04-01
---
## Topic
```
infoscreen/groups/{group_id}/power/intent
```
- **Scope**: group-level only (Phase 1). No per-client topic in Phase 1.
- **QoS**: 1
- **Retained**: true — broker holds last payload; client receives it immediately on (re)connect.
---
## Publish semantics
| Trigger | Behaviour |
|---|---|
| Semantic transition (state/reason changes) | New `intent_id`, immediate publish |
| No change (heartbeat) | Same `intent_id`, refreshed `issued_at` and `expires_at`, published every poll interval |
| Scheduler startup | Immediate publish before first poll wait |
| MQTT reconnect | Immediate retained republish of last known intent |
Poll interval default: **15 seconds** (dev) / **30 seconds** (prod).
---
## Payload schema
All fields are always present. No optional fields for Phase 1 required fields.
```json
{
"schema_version": "1.0",
"intent_id": "<uuid4>",
"group_id": <integer>,
"desired_state": "on" | "off",
"reason": "active_event" | "no_active_event",
"issued_at": "<ISO 8601 UTC with Z>",
"expires_at": "<ISO 8601 UTC with Z>",
"poll_interval_sec": <integer>,
"active_event_ids": [<integer>, ...],
"event_window_start": "<ISO 8601 UTC with Z>" | null,
"event_window_end": "<ISO 8601 UTC with Z>" | null
}
```
### Field reference
| Field | Type | Description |
|---|---|---|
| `schema_version` | string | Always `"1.0"` in Phase 1 |
| `intent_id` | string (uuid4) | Stable across heartbeats; new value on semantic transition |
| `group_id` | integer | Matches the MQTT topic group_id |
| `desired_state` | `"on"` or `"off"` | The commanded TV power state |
| `reason` | string | Human-readable reason for current state |
| `issued_at` | UTC Z string | When this payload was computed |
| `expires_at` | UTC Z string | After this time, payload is stale; re-subscribe or treat as `off` |
| `poll_interval_sec` | integer | Server poll interval; expiry = max(3 × poll, 90s) |
| `active_event_ids` | integer array | IDs of currently active events; empty when `off` |
| `event_window_start` | UTC Z string or null | Start of merged active coverage window; null when `off` |
| `event_window_end` | UTC Z string or null | End of merged active coverage window; null when `off` |
---
## Expiry rule
```
expires_at = issued_at + max(3 × poll_interval_sec, 90s)
```
Default at poll=15s → expiry window = **90 seconds**.
**Client rule**: if `now > expires_at` treat as stale and fall back to `off` until a fresh payload arrives.
---
## Example payloads
### ON (active event)
```json
{
"schema_version": "1.0",
"intent_id": "4a7fe3bc-3654-48e3-b5b9-9fad1f7fead3",
"group_id": 2,
"desired_state": "on",
"reason": "active_event",
"issued_at": "2026-04-01T06:00:03.496Z",
"expires_at": "2026-04-01T06:01:33.496Z",
"poll_interval_sec": 15,
"active_event_ids": [148],
"event_window_start": "2026-04-01T06:00:00Z",
"event_window_end": "2026-04-01T07:00:00Z"
}
```
### OFF (no active event)
```json
{
"schema_version": "1.0",
"intent_id": "833c53e3-d728-4604-9861-6ff7be1f227e",
"group_id": 2,
"desired_state": "off",
"reason": "no_active_event",
"issued_at": "2026-04-01T07:00:03.702Z",
"expires_at": "2026-04-01T07:01:33.702Z",
"poll_interval_sec": 15,
"active_event_ids": [],
"event_window_start": null,
"event_window_end": null
}
```
---
## Validated server behaviours (client can rely on these)
| Scenario | Guaranteed server behaviour |
|---|---|
| Event starts | `desired_state: on` emitted within one poll interval |
| Event ends | `desired_state: off` emitted within one poll interval |
| Adjacent events (end1 == start2) | No intermediate `off` emitted at boundary |
| Overlapping events | `desired_state: on` held continuously |
| Scheduler restart during active event | Immediate `on` republish on reconnect; broker retained holds `on` during outage |
| No events in group | `desired_state: off` with empty `active_event_ids` |
| Heartbeat (no change) | Same `intent_id`, refreshed timestamps every poll |
---
## Client responsibilities (Phase 1)
1. **Subscribe** to `infoscreen/groups/{own_group_id}/power/intent` at QoS 1 on connect.
2. **Re-subscribe on reconnect** — broker retained message will deliver last known intent immediately.
3. **Parse `desired_state`** and apply TV power action (`on` → power on / `off` → power off).
4. **Deduplicate** using `intent_id` — if same `intent_id` received again, skip re-applying power command.
5. **Check expiry** — if `now > expires_at`, treat as stale and fall back to `off` until renewed.
6. **Ignore unknown fields** — for forward compatibility with Phase 2 additions.
7. **Do not use per-client topic** in Phase 1; only group topic is active.
---
## Timestamps
- All timestamps use **ISO 8601 UTC with Z suffix**: `"2026-04-01T06:00:03.496Z"`
- Client must parse as UTC.
- Do not assume local time.
---
## Phase 2 (deferred — not yet active)
- Per-client intent topic: `infoscreen/{client_uuid}/power/intent`
- Per-client override takes precedence over group intent
- Client state acknowledgement: `infoscreen/{client_uuid}/power/state`
- Listener persistence of client state to DB

213
TV_POWER_RUNBOOK.md Normal file
View File

@@ -0,0 +1,213 @@
# TV Power Runbook
Operational runbook for Phase 1 TV power coordination using MQTT power intent plus local HDMI-CEC fallback.
## Scope
This runbook covers:
- `POWER_CONTROL_MODE` rollout
- canary validation
- expected log signatures
- rollback
- common failure checks
Contract reference:
- [TV_POWER_INTENT_SERVER_CONTRACT_V1.md](TV_POWER_INTENT_SERVER_CONTRACT_V1.md)
## Topics and Runtime Files
Phase 1 topic:
- `infoscreen/groups/{group_id}/power/intent`
Telemetry topic:
- `infoscreen/{client_id}/power/state`
Runtime files:
- `src/power_intent_state.json`
- `src/power_state.json`
- `src/current_process_health.json`
## Power Control Modes
- `local`: ignore MQTT intent and use local event-time CEC logic.
- `hybrid`: prefer fresh MQTT intent and fall back to local timing when missing, stale, or invalid.
- `mqtt`: MQTT intent is authoritative; stale or missing intent triggers safe delayed-off behavior.
Recommended rollout path:
1. Start with `local`.
2. Canary with `hybrid`.
3. Roll out `hybrid` fleet-wide after stable observation.
4. Use `mqtt` only if you explicitly want strict server authority.
## Gate 1: Local Mode
Set in `.env`:
```bash
POWER_CONTROL_MODE=local
```
Expected startup log signature:
```text
[INFO] Power control mode: local
```
Expected behavior:
- No MQTT power intent application.
- Existing CEC behavior remains unchanged.
## Gate 2: Hybrid Canary
On one client or one canary group:
```bash
POWER_CONTROL_MODE=hybrid
./scripts/restart-all.sh
```
Expected startup logs:
```text
[INFO] Power state service thread started
[INFO] Subscribed to power intent topic: infoscreen/groups/<id>/power/intent
[INFO] Power control mode: hybrid
```
### Valid ON Intent
Expected sequence:
```text
[INFO] Power intent accepted: id=<uuid> desired_state=on reason=active_event ...
[INFO] Applying MQTT power intent ON id=<uuid> reason=active_event
[INFO] TV turned ON successfully
[INFO] Power state published: state=on source=mqtt_intent result=ok
```
### Valid OFF Intent
Expected sequence:
```text
[INFO] Power intent accepted: id=<uuid> desired_state=off reason=no_active_event ...
[INFO] Applying MQTT power intent OFF id=<uuid> reason=no_active_event
[INFO] Power state published: state=off source=mqtt_intent result=ok
```
### Expired Intent
Expected rejection:
```text
[WARNING] Rejected power intent: intent expired
```
### Malformed Intent
Expected rejection:
```text
[WARNING] Rejected power intent: missing required field: intent_id
```
### Retained Clear
When you clear the retained topic, the broker delivers an empty payload.
Expected log:
```text
[INFO] Power intent retained message cleared (empty payload)
```
This is normal and should not be treated as a parse error.
## Validation Commands
Use:
```bash
./scripts/test-power-intent.sh
./scripts/test-hdmi-cec.sh
```
Useful test-power-intent paths:
- Option 1: publish valid ON intent.
- Option 2: publish valid OFF intent.
- Option 3: publish stale intent.
- Option 4: publish malformed intent.
- Option 5: clear retained topic with an empty retained payload.
- Option 6: inspect runtime JSON files.
- Option 8: subscribe to the power-state topic.
Useful manual checks:
```bash
tail -f logs/display_manager.log src/simclient.log
cat src/power_intent_state.json
cat src/power_state.json
cat src/current_process_health.json
```
## Rollback
To leave canary mode:
```bash
POWER_CONTROL_MODE=local
./scripts/restart-all.sh
```
Expected result:
- MQTT power intent handling becomes inactive.
- Local CEC fallback remains in place.
## Fleet Rollout Gate
Roll out `hybrid` more widely only after:
- zero unintended TV-off events between adjacent events,
- valid ON/OFF actions apply cleanly,
- duplicate refreshes are logged as `result=skipped`,
- stale and malformed intents are rejected without side effects,
- retained clear events no longer produce noisy warnings.
Suggested observation window:
- at least 7 days on a canary client or canary group.
## Common Symptoms
| Symptom | Check | Likely Action |
|---|---|---|
| Intent never arrives | `src/power_intent_state.json` missing or invalid | Check broker connectivity and group assignment |
| `intent expired` appears repeatedly | client clock and server publish cadence | verify NTP and server refresh interval |
| TV turns off between adjacent events | `src/power_state.json` shows `local_fallback` or stale intent at transition | inspect server timing and boundary coverage |
| Repeated power state publishes with `skipped` | duplicate intent refreshes only | normal dedupe behavior |
| Clear retained intent logs warning | old code path still running | restart services and verify latest code |
## Dashboard Observability
`src/current_process_health.json` includes a `power_control` block similar to:
```json
"power_control": {
"mode": "hybrid",
"source": "mqtt_intent",
"last_intent_id": "4a7fe3bc-...",
"last_action": "on",
"last_power_at": "2026-04-01T06:00:05Z"
}
```
This is the fastest local check for what the display manager last did and why.

View File

@@ -0,0 +1,149 @@
{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$id": "https://infoscreen.local/schemas/reboot-command-payload-schemas.json",
"title": "Infoscreen Reboot Command Payload Schemas",
"description": "Frozen v1 schemas for per-client command and command acknowledgement payloads.",
"$defs": {
"commandPayloadV1": {
"type": "object",
"additionalProperties": false,
"required": [
"schema_version",
"command_id",
"client_uuid",
"action",
"issued_at",
"expires_at",
"requested_by",
"reason"
],
"properties": {
"schema_version": {
"type": "string",
"const": "1.0"
},
"command_id": {
"type": "string",
"format": "uuid"
},
"client_uuid": {
"type": "string",
"format": "uuid"
},
"action": {
"type": "string",
"enum": [
"reboot_host",
"shutdown_host"
]
},
"issued_at": {
"type": "string",
"format": "date-time"
},
"expires_at": {
"type": "string",
"format": "date-time"
},
"requested_by": {
"type": [
"integer",
"null"
],
"minimum": 1
},
"reason": {
"type": [
"string",
"null"
],
"maxLength": 2000
}
}
},
"commandAckPayloadV1": {
"type": "object",
"additionalProperties": false,
"required": [
"command_id",
"status",
"error_code",
"error_message"
],
"properties": {
"command_id": {
"type": "string",
"format": "uuid"
},
"status": {
"type": "string",
"enum": [
"accepted",
"execution_started",
"completed",
"failed"
]
},
"error_code": {
"type": [
"string",
"null"
],
"maxLength": 128
},
"error_message": {
"type": [
"string",
"null"
],
"maxLength": 4000
}
},
"allOf": [
{
"if": {
"properties": {
"status": {
"const": "failed"
}
}
},
"then": {
"properties": {
"error_code": {
"type": "string",
"minLength": 1
},
"error_message": {
"type": "string",
"minLength": 1
}
}
}
}
]
}
},
"examples": [
{
"commandPayloadV1": {
"schema_version": "1.0",
"command_id": "5d1f8b4b-7e85-44fb-8f38-3f5d5da5e2e4",
"client_uuid": "9b8d1856-ff34-4864-a726-12de072d0f77",
"action": "reboot_host",
"issued_at": "2026-04-03T12:48:10Z",
"expires_at": "2026-04-03T12:52:10Z",
"requested_by": 1,
"reason": "operator_request"
}
},
{
"commandAckPayloadV1": {
"command_id": "5d1f8b4b-7e85-44fb-8f38-3f5d5da5e2e4",
"status": "execution_started",
"error_code": null,
"error_message": null
}
}
]
}

View File

@@ -0,0 +1,59 @@
## Reboot Command Payload Schema Snippets
This file provides copy-ready validation snippets for client and integration test helpers.
### Canonical Topics (v1)
1. Command topic: infoscreen/{client_uuid}/commands
2. Ack topic: infoscreen/{client_uuid}/commands/ack
### Transitional Compatibility Topics
1. Command topic alias: infoscreen/{client_uuid}/command
2. Ack topic alias: infoscreen/{client_uuid}/command/ack
### Canonical Action Values
1. reboot_host
2. shutdown_host
### Ack Status Values
1. accepted
2. execution_started
3. completed
4. failed
### JSON Schema Source
Use this file for machine validation:
1. implementation-plans/reboot-command-payload-schemas.json
### Minimal Command Schema Snippet
```json
{
"type": "object",
"additionalProperties": false,
"required": ["schema_version", "command_id", "client_uuid", "action", "issued_at", "expires_at", "requested_by", "reason"],
"properties": {
"schema_version": { "const": "1.0" },
"command_id": { "type": "string", "format": "uuid" },
"client_uuid": { "type": "string", "format": "uuid" },
"action": { "enum": ["reboot_host", "shutdown_host"] },
"issued_at": { "type": "string", "format": "date-time" },
"expires_at": { "type": "string", "format": "date-time" },
"requested_by": { "type": ["integer", "null"] },
"reason": { "type": ["string", "null"] }
}
}
```
### Minimal Ack Schema Snippet
```json
{
"type": "object",
"additionalProperties": false,
"required": ["command_id", "status", "error_code", "error_message"],
"properties": {
"command_id": { "type": "string", "format": "uuid" },
"status": { "enum": ["accepted", "execution_started", "completed", "failed"] },
"error_code": { "type": ["string", "null"] },
"error_message": { "type": ["string", "null"] }
}
}
```

View File

@@ -0,0 +1,169 @@
## Client Team Implementation Spec (Raspberry Pi 5)
### Mission
Implement client-side command handling for reliable restart and shutdown with strict validation, idempotency, acknowledgements, and reboot recovery continuity.
### Ownership Boundaries
1. Client team owns command intake, execution, acknowledgement emission, and post-reboot continuity.
2. Platform team owns command issuance, lifecycle aggregation, and server-side escalation logic.
3. Client implementation must not assume managed PoE availability.
### Required Client Behaviors
### Frozen MQTT Topics and Schemas (v1)
1. Canonical command topic: infoscreen/{client_uuid}/commands.
2. Canonical ack topic: infoscreen/{client_uuid}/commands/ack.
3. Transitional compatibility topics during migration:
- infoscreen/{client_uuid}/command
- infoscreen/{client_uuid}/command/ack
4. QoS policy: command QoS 1, ack QoS 1 recommended.
5. Retain policy: commands and acks are non-retained.
6. Client migration behavior: subscribe to both command topics and publish to both ack topics during migration.
Frozen command payload schema:
```json
{
"schema_version": "1.0",
"command_id": "5d1f8b4b-7e85-44fb-8f38-3f5d5da5e2e4",
"client_uuid": "9b8d1856-ff34-4864-a726-12de072d0f77",
"action": "reboot_host",
"issued_at": "2026-04-03T12:48:10Z",
"expires_at": "2026-04-03T12:52:10Z",
"requested_by": 1,
"reason": "operator_request"
}
```
Frozen ack payload schema:
```json
{
"command_id": "5d1f8b4b-7e85-44fb-8f38-3f5d5da5e2e4",
"status": "execution_started",
"error_code": null,
"error_message": null
}
```
Allowed ack status values:
1. accepted
2. execution_started
3. completed
4. failed
Frozen command action values for v1:
1. reboot_host
2. shutdown_host
Reserved but not emitted by server in v1:
1. restart_service
### Client Decision Defaults (v1)
1. Privileged helper invocation: sudoers + local helper script (`sudo /usr/local/bin/infoscreen-cmd-helper.sh`).
2. Dedupe retention: keep processed command IDs for 24 hours and cap store size to 5000 newest entries.
3. Ack retry schedule while broker unavailable: 0.5s, 1s, 2s, 4s, then 5s cap until expires_at.
4. Boot-loop handling: server remains authority for safety lockout; client enforces idempotency by command_id and reports local execution outcomes.
### MQTT Auth Hardening (Current Priority)
1. Client must support authenticated MQTT connections for both command and event intake.
2. Client must remain compatible with broker ACLs that restrict publish/subscribe rights per topic.
3. Client should support TLS broker connections from environment configuration when certificates are provided.
4. URL/domain allowlisting for web and webuntis events is explicitly deferred and tracked separately in TODO.md.
5. Client credentials are loaded from the local [/.env](.env), not from tracked docs or templates.
Server-side prerequisites for this client work:
1. Broker credentials must be provisioned for clients.
2. Broker ACLs must allow each client to subscribe only to its own command topics and assigned event topics.
3. Broker ACLs must allow each client to publish only its own ack, heartbeat, health, dashboard, and telemetry topics.
4. Server-side publishers must move to authenticated broker access before production rollout.
Validation snippets for helper scripts:
1. Human-readable snippets: implementation-plans/reboot-command-payload-schemas.md
2. Machine-validated JSON Schema: implementation-plans/reboot-command-payload-schemas.json
### 1. Command Intake
1. Subscribe to canonical and transitional command topics with QoS 1.
2. Parse required fields exactly: schema_version, command_id, client_uuid, action, issued_at, expires_at, requested_by, reason.
3. Reject invalid payloads with failed acknowledgement including error_code and diagnostic message.
4. Reject stale commands when current time exceeds expires_at.
5. Reject already-processed command_id values without re-execution.
### 2. Idempotency And Persistence
1. Persist processed command_id and execution result on local storage.
2. Persistence must survive service restart and full OS reboot.
3. On restart, reload dedupe cache before processing newly delivered commands.
### 3. Acknowledgement Contract Behavior
1. Emit accepted immediately after successful validation and dedupe pass.
2. Emit execution_started immediately before invoking the command action.
3. Emit completed only when local success condition is confirmed.
4. Emit failed with structured error_code on validation or execution failure.
5. If MQTT is temporarily unavailable, retry ack publish with bounded backoff until command expiry.
6. Ack payload fields are strict: command_id, status, error_code, error_message (no additional fields).
7. For status failed, error_code and error_message must be non-null, non-empty strings.
### 4. Execution Security Model
1. Execute via systemd-managed privileged helper.
2. Allow only whitelisted operations:
- reboot_host
- shutdown_host
3. Do not execute restart_service in v1.
4. Disallow arbitrary shell commands and untrusted arguments.
5. Enforce per-command execution timeout and terminate hung child processes.
### 5. Reboot Recovery Continuity
1. For reboot_host action:
- send execution_started
- trigger reboot promptly
2. During startup:
- emit heartbeat early
- emit process-health once service is ready
3. Keep last command execution state available after reboot for reconciliation.
### 6. Time And Timeout Semantics
1. Use monotonic timers for local elapsed-time checks.
2. Use UTC wall-clock only for protocol timestamps and expiry comparisons.
3. Target reconnect baseline on Pi 5 USB-SATA SSD: 90 seconds.
4. Accept cold-boot and USB enumeration ceiling up to 150 seconds.
### 7. Capability Reporting
1. Report recovery capability class:
- software_only
- managed_poe_available
- manual_only
2. Report watchdog enabled status.
3. Report boot-source metadata for diagnostics.
### 8. Error Codes Minimum Set
1. invalid_schema
2. missing_field
3. stale_command
4. duplicate_command
5. permission_denied_local
6. execution_timeout
7. execution_failed
8. broker_unavailable
9. internal_error
### Acceptance Tests (Client Team)
1. Invalid schema payload is rejected and failed ack emitted.
2. Expired command is rejected and not executed.
3. Duplicate command_id is not executed twice.
4. reboot_host emits execution_started and reconnects with heartbeat in expected window.
5. shutdown_host action is accepted and invokes local privileged helper without accepting non-whitelisted actions.
6. MQTT outage during ack path retries correctly without duplicate execution.
7. Client idempotency cooperates with server-side lockout semantics (no local reboot-rate policy).
8. Client connects successfully to an authenticated broker and still receives commands and event topics permitted by ACLs.
### Delivery Artifacts
1. Client protocol conformance checklist.
2. Test evidence for all acceptance tests.
3. Runtime logs showing full lifecycle for one shutdown and one reboot scenario.
4. Known limitations list per image version.
### Definition Of Done
1. All acceptance tests pass on Pi 5 USB-SATA SSD test devices.
2. No duplicate execution observed under reconnect and retained-delivery edge cases.
3. Acknowledgement sequence is complete and machine-parseable for server correlation.
4. Reboot recovery continuity works without managed PoE dependencies.

View File

@@ -0,0 +1,175 @@
## Remote Reboot Reliability Handoff (Share Document)
### Purpose
This document defines the agreed implementation scope for reliable remote reboot and shutdown of Raspberry Pi 5 clients, with monitoring-first visibility and safe escalation paths.
### Scope
1. In scope: restart and shutdown command reliability.
2. In scope: full lifecycle monitoring and audit visibility.
3. In scope: capability-tier recovery model with optional managed PoE escalation.
4. Out of scope: broader maintenance module in client-management for this cycle.
5. Out of scope: mandatory dependency on customer-managed power switching.
### Agreed Operating Model
1. Command delivery is asynchronous and lifecycle-tracked, not fire-and-forget.
2. Commands use idempotent command_id semantics with stale-command rejection by expires_at.
3. Monitoring is authoritative for operational state and escalation decisions.
4. Recovery must function even when no managed power switching is available.
### Frozen Contract v1 (Effective Immediately)
1. Canonical command topic: infoscreen/{client_uuid}/commands.
2. Canonical ack topic: infoscreen/{client_uuid}/commands/ack.
3. Transitional compatibility topics accepted during migration:
- infoscreen/{client_uuid}/command
- infoscreen/{client_uuid}/command/ack
4. QoS policy: command QoS 1, ack QoS 1 recommended.
5. Retain policy: commands and acks are non-retained.
Command payload schema (frozen):
```json
{
"schema_version": "1.0",
"command_id": "5d1f8b4b-7e85-44fb-8f38-3f5d5da5e2e4",
"client_uuid": "9b8d1856-ff34-4864-a726-12de072d0f77",
"action": "reboot_host",
"issued_at": "2026-04-03T12:48:10Z",
"expires_at": "2026-04-03T12:52:10Z",
"requested_by": 1,
"reason": "operator_request"
}
```
Ack payload schema (frozen):
```json
{
"command_id": "5d1f8b4b-7e85-44fb-8f38-3f5d5da5e2e4",
"status": "execution_started",
"error_code": null,
"error_message": null
}
```
Allowed ack status values:
1. accepted
2. execution_started
3. completed
4. failed
Frozen command action values:
1. reboot_host
2. shutdown_host
API endpoint mapping:
1. POST /api/clients/{uuid}/restart -> action reboot_host
2. POST /api/clients/{uuid}/shutdown -> action shutdown_host
Validation snippets:
1. Human-readable snippets: implementation-plans/reboot-command-payload-schemas.md
2. Machine-validated JSON Schema: implementation-plans/reboot-command-payload-schemas.json
### Command Lifecycle States
1. queued
2. publish_in_progress
3. published
4. ack_received
5. execution_started
6. awaiting_reconnect
7. recovered
8. completed
9. failed
10. expired
11. timed_out
12. canceled
13. blocked_safety
14. manual_intervention_required
### Timeout Defaults (Pi 5, USB-SATA SSD baseline)
1. queued to publish_in_progress: immediate, timeout 5 seconds.
2. publish_in_progress to published: timeout 8 seconds.
3. published to ack_received: timeout 20 seconds.
4. ack_received to execution_started: 15 seconds for service restart, 25 seconds for host reboot.
5. execution_started to awaiting_reconnect: timeout 10 seconds.
6. awaiting_reconnect to recovered: baseline 90 seconds after validation, cold-boot ceiling 150 seconds.
7. recovered to completed: 15 to 20 seconds based on fleet stability.
8. command expires_at default: 240 seconds, bounded 180 to 360 seconds.
### Recovery Tiers
1. Tier 0 baseline, always required: watchdog, systemd auto-restart, lifecycle tracking, manual intervention fallback.
2. Tier 1 optional: managed PoE per-port power-cycle escalation where customer infrastructure supports it.
3. Tier 2 no remote power control: direct manual intervention workflow.
### Governance And Safety
1. Role access: admin and superadmin.
2. Bulk actions require reason capture.
3. Safety lockout: maximum 3 reboot commands per client in 15 minutes.
4. Escalation cooldown: 60 seconds before automatic move to manual_intervention_required.
### MQTT Auth Hardening (Phase 1, Required Before Broad Rollout)
1. Intranet-only deployment is not sufficient protection for privileged MQTT actions by itself.
2. Phase 1 hardening scope is broker authentication, authorization, and network restriction; payload URL allowlisting is deferred to a later client/server feature.
3. MQTT broker must disable anonymous publish/subscribe access in production.
4. MQTT broker must require authenticated identities for server-side publishers and client devices.
5. MQTT broker must enforce ACLs so that:
- only server-side services can publish to `infoscreen/{client_uuid}/commands`
- only server-side services can publish scheduler event topics
- each client can subscribe only to its own command topics and assigned event topics
- each client can publish only its own ack, heartbeat, health, dashboard, and telemetry topics
6. Broker port exposure must be restricted to the management network and approved hosts only.
7. TLS support is strongly recommended in this phase and should be enabled when operationally feasible.
### Server Team Actions For Auth Hardening
1. Provision broker credentials for command/event publishers and for client devices.
2. Configure Mosquitto or equivalent broker ACLs for per-topic publish and subscribe restrictions.
3. Disable anonymous access on production brokers.
4. Restrict broker network exposure with firewall rules, VLAN policy, or equivalent network controls.
5. Update server/frontend deployment to publish MQTT with authenticated credentials.
6. Validate that server-side event publishing and reboot/shutdown command publishing still work under the new ACL policy.
7. Coordinate credential distribution and rotation with the client deployment process.
### Credential Management Guidance
1. Real MQTT passwords must not be stored in tracked documentation or committed templates.
2. Each client device should receive a unique broker username and password, stored only in its local [/.env](.env).
3. Server-side publisher credentials should be stored in the server team's secret-management path, not in source control.
4. Recommended naming convention for client broker users: `infoscreen-client-<client-uuid-prefix>`.
5. Client passwords should be random, at least 20 characters, and rotated through deployment tooling or broker administration procedures.
6. The server/infrastructure team owns broker-side user creation, ACL assignment, rotation, and revocation.
7. The client team owns loading credentials from local env files and validating connection behavior against the secured broker.
### Client Team Actions For Auth Hardening
1. Add MQTT username/password support in the client connection setup.
2. Add client-side TLS configuration support from environment when certificates are provided.
3. Update local test helpers to support authenticated MQTT publishing and subscription.
4. Validate command and event intake against the authenticated broker configuration before canary rollout.
### Ready For Server/Frontend Team (Auth Phase)
1. Client implementation is ready to connect with MQTT auth from local `.env` (`MQTT_USERNAME`, `MQTT_PASSWORD`, optional TLS settings).
2. Client command/event intake and client ack/telemetry publishing run over the authenticated MQTT session.
3. Server/frontend team must now complete broker-side enforcement and publisher migration.
Server/frontend done criteria:
1. Anonymous broker access is disabled in production.
2. Server-side publishers use authenticated broker credentials.
3. ACLs are active and validated for command, event, and client telemetry topics.
4. At least one canary client proves end-to-end flow under ACLs:
- server publishes command/event with authenticated publisher
- client receives payload
- client sends ack/telemetry successfully
5. Revocation test passes: disabling one client credential blocks only that client without impacting others.
Operational note:
1. Client-side auth support is necessary but not sufficient by itself; broker ACL/auth enforcement is the security control that must be enabled by the server/infrastructure team.
### Rollout Plan
1. Contract freeze and sign-off.
2. Platform and client implementation against frozen schemas.
3. One-group canary.
4. Rollback if failed plus timed_out exceeds 5 percent.
5. Expand only after 7 days below intervention threshold.
### Success Criteria
1. Deterministic command lifecycle visibility from enqueue to completion.
2. No duplicate execution under reconnect or delayed-delivery conditions.
3. Stable Pi 5 SSD reconnect behavior within defined baseline.
4. Clear and actionable manual intervention states when automatic recovery is exhausted.

View File

@@ -0,0 +1,54 @@
## Reboot Reliability Kickoff Summary
### Objective
Ship a reliable, observable restart and shutdown workflow for Raspberry Pi 5 clients, with safe escalation and clear operator outcomes.
### What Is Included
1. Asynchronous command lifecycle with idempotent command_id handling.
2. Monitoring-first state visibility from queued to terminal outcomes.
3. Client acknowledgements for accepted, execution_started, completed, and failed.
4. Pi 5 USB-SATA SSD timeout baseline and tuning rules.
5. Capability-tier recovery with optional managed PoE escalation.
### What Is Not Included
1. Full maintenance module in client-management.
2. Required managed power-switch integration.
3. Production Wake-on-LAN rollout.
### Team Split
1. Platform team: API command lifecycle, safety controls, listener ack ingestion.
2. Web team: lifecycle-aware UX and command status display.
3. Client team: strict validation, dedupe, ack sequence, secure execution helper, reboot continuity.
### Ownership Matrix
| Team | Primary Plan File | Main Deliverables |
| --- | --- | --- |
| Platform team | implementation-plans/reboot-implementation-handoff-share.md | Command lifecycle backend, policy enforcement, listener ack mapping, safety lockout and escalation |
| Web team | implementation-plans/reboot-implementation-handoff-share.md | Lifecycle UI states, bulk safety UX, capability visibility, command status polling |
| Client team | implementation-plans/reboot-implementation-handoff-client-team.md | Command validation, dedupe persistence, ack sequence, secure execution helper, reboot continuity |
| Project coordination | implementation-plans/reboot-kickoff-summary.md | Phase sequencing, canary gates, rollback thresholds, cross-team sign-off tracking |
### Baseline Operational Defaults
1. Safety lockout: 3 reboot commands per client in rolling 15 minutes.
2. Escalation cooldown: 60 seconds.
3. Reconnect target on Pi 5 SSD: 90 seconds baseline, 150 seconds cold-boot ceiling.
4. Rollback canary trigger: failed plus timed_out above 5 percent.
### Frozen Contract Snapshot
1. Canonical command topic: infoscreen/{client_uuid}/commands.
2. Canonical ack topic: infoscreen/{client_uuid}/commands/ack.
3. Transitional compatibility topics during migration:
- infoscreen/{client_uuid}/command
- infoscreen/{client_uuid}/command/ack
4. Command schema version: 1.0.
5. Allowed command actions: reboot_host, shutdown_host.
6. Allowed ack status values: accepted, execution_started, completed, failed.
7. Validation snippets:
- implementation-plans/reboot-command-payload-schemas.md
- implementation-plans/reboot-command-payload-schemas.json
### Immediate Next Steps
1. Continue implementation in parallel by team against frozen contract.
2. Client team validates dedupe and expiry handling on canonical topics.
3. Platform team verifies ack-state transitions for accepted, execution_started, completed, failed.
4. Execute one-group canary and validate timing plus failure drills.

View File

@@ -0,0 +1,27 @@
#!/usr/bin/env bash
set -euo pipefail
# Privileged command helper for remote reboot/shutdown actions.
# Intended installation path: /usr/local/bin/infoscreen-cmd-helper.sh
# Suggested sudoers entry:
# infoscreen ALL=(ALL) NOPASSWD: /usr/local/bin/infoscreen-cmd-helper.sh
if [[ $# -ne 1 ]]; then
echo "usage: infoscreen-cmd-helper.sh <reboot_host|shutdown_host>" >&2
exit 2
fi
action="$1"
case "$action" in
reboot_host)
exec systemctl reboot
;;
shutdown_host)
exec systemctl poweroff
;;
*)
echo "unsupported action: $action" >&2
exit 1
;;
esac

View File

@@ -3,6 +3,8 @@ Description=Infoscreen Display Manager
Documentation=https://github.com/RobbStarkAustria/infoscreen_client_2025 Documentation=https://github.com/RobbStarkAustria/infoscreen_client_2025
After=network.target graphical.target After=network.target graphical.target
Wants=network-online.target Wants=network-online.target
# Publish an MQTT alert if systemd gives up restarting (StartLimitBurst exceeded).
OnFailure=infoscreen-notify-failure@%n.service
[Service] [Service]
Type=simple Type=simple

View File

@@ -0,0 +1,55 @@
#!/usr/bin/env bash
# Publishes a service-failed MQTT notification when called by systemd OnFailure=.
# Usage: infoscreen-notify-failure.sh <failing-unit-name>
#
# Designed to be called from infoscreen-notify-failure@.service.
# Reads broker credentials from .env; reads client UUID from config.
# Safe to run even if MQTT is unreachable (exits cleanly, errors logged to journal).
set -euo pipefail
FAILING_UNIT="${1:-unknown}"
PROJECT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
ENV_FILE="$PROJECT_DIR/.env"
UUID_FILE="$PROJECT_DIR/src/config/client_uuid.txt"
# Load .env (skip comments and blank lines)
if [[ -f "$ENV_FILE" ]]; then
set -a
# shellcheck source=/dev/null
source <(grep -v '^\s*#' "$ENV_FILE" | grep -v '^\s*$')
set +a
fi
MQTT_BROKER="${MQTT_BROKER:-localhost}"
MQTT_PORT="${MQTT_PORT:-1883}"
MQTT_USER="${MQTT_USER:-}"
MQTT_PASSWORD_BROKER="${MQTT_PASSWORD_BROKER:-}"
CLIENT_UUID="unknown"
if [[ -f "$UUID_FILE" ]]; then
CLIENT_UUID="$(cat "$UUID_FILE" | tr -d '[:space:]')"
fi
TOPIC="infoscreen/${CLIENT_UUID}/service_failed"
TIMESTAMP="$(date -u +%Y-%m-%dT%H:%M:%SZ)"
PAYLOAD=$(printf '{"event":"service_failed","unit":"%s","client_uuid":"%s","failed_at":"%s"}' \
"$FAILING_UNIT" "$CLIENT_UUID" "$TIMESTAMP")
# Build mosquitto_pub auth args
AUTH_ARGS=()
if [[ -n "$MQTT_USER" ]]; then AUTH_ARGS+=(-u "$MQTT_USER"); fi
if [[ -n "$MQTT_PASSWORD_BROKER" ]]; then AUTH_ARGS+=(-P "$MQTT_PASSWORD_BROKER"); fi
echo "Publishing service-failed notification: unit=$FAILING_UNIT client=$CLIENT_UUID"
mosquitto_pub \
-h "$MQTT_BROKER" \
-p "$MQTT_PORT" \
"${AUTH_ARGS[@]}" \
-t "$TOPIC" \
-m "$PAYLOAD" \
-q 1 \
--retain \
2>&1 || echo "WARNING: mosquitto_pub failed (broker unreachable?); notification not delivered"

View File

@@ -0,0 +1,19 @@
[Unit]
Description=Infoscreen service-failed MQTT notifier (%i)
# One-shot: run once and exit. %i is the failing unit name passed by OnFailure=.
After=network.target
[Service]
Type=oneshot
User=olafn
Group=olafn
WorkingDirectory=/home/olafn/infoscreen-dev
EnvironmentFile=/home/olafn/infoscreen-dev/.env
ExecStart=/home/olafn/infoscreen-dev/scripts/infoscreen-notify-failure.sh %i
# Do not restart the notifier itself.
Restart=no
StandardOutput=journal
StandardError=journal
SyslogIdentifier=infoscreen-notify-failure

View File

@@ -0,0 +1,52 @@
[Unit]
Description=Infoscreen Simclient (MQTT communication)
Documentation=https://github.com/RobbStarkAustria/infoscreen_client_2025
# Simclient needs network before starting — MQTT will fail otherwise.
After=network-online.target
Wants=network-online.target
# Publish an MQTT alert if systemd gives up restarting (StartLimitBurst exceeded).
OnFailure=infoscreen-notify-failure@%n.service
# StartLimit* must live in [Unit] for compatibility with older systemd (< 230).
StartLimitIntervalSec=60
StartLimitBurst=5
[Service]
# notify: simclient sends READY=1 via sd_notify once fully initialised.
# WatchdogSec: if WATCHDOG=1 is not sent within this window, systemd kills
# and restarts the process — detects hung/deadlocked main loops.
Type=notify
WatchdogSec=60
User=olafn
Group=olafn
WorkingDirectory=/home/olafn/infoscreen-dev
# Load all client configuration from the local .env file.
# Keep .env mode 600; systemd reads it as root before dropping privileges.
EnvironmentFile=/home/olafn/infoscreen-dev/.env
# Start simclient
ExecStart=/home/olafn/infoscreen-dev/scripts/start-simclient.sh
# Restart on failure (non-zero exit or signal).
# This covers crash recovery AND the reboot-command lifecycle:
# 1. Server sends reboot_host command
# 2. Simclient publishes accepted + execution_started, then exits
# 3. Systemd restarts simclient within RestartSec seconds
# 4. On reconnect, heartbeat loop detects pending_recovery_command and
# publishes completed — closing the lifecycle cleanly.
Restart=on-failure
RestartSec=10
StandardOutput=journal
StandardError=journal
SyslogIdentifier=infoscreen-simclient
# Security settings
NoNewPrivileges=true
PrivateTmp=true
# Resource limits
LimitNOFILE=65536
[Install]
# Simclient runs in multi-user mode — no graphical session required.
WantedBy=multi-user.target

View File

@@ -0,0 +1,24 @@
#!/usr/bin/env bash
set -euo pipefail
# Installs the privileged command helper and sudoers drop-in.
# Usage: ./scripts/install-command-helper.sh [linux-user]
target_user="${1:-$USER}"
script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
helper_src="$script_dir/infoscreen-cmd-helper.sh"
helper_dst="/usr/local/bin/infoscreen-cmd-helper.sh"
sudoers_file="/etc/sudoers.d/infoscreen-command-helper"
if [[ ! -f "$helper_src" ]]; then
echo "helper source not found: $helper_src" >&2
exit 1
fi
sudo install -m 0755 "$helper_src" "$helper_dst"
printf '%s\n' "$target_user ALL=(ALL) NOPASSWD: $helper_dst" | sudo tee "$sudoers_file" >/dev/null
sudo chmod 0440 "$sudoers_file"
sudo visudo -cf "$sudoers_file" >/dev/null
echo "Installed helper: $helper_dst"
echo "Installed sudoers: $sudoers_file (user: $target_user)"

34
scripts/mock-command-helper.sh Executable file
View File

@@ -0,0 +1,34 @@
#!/usr/bin/env bash
set -euo pipefail
# Non-destructive helper for command lifecycle canary tests.
# Use by starting simclient with:
# COMMAND_HELPER_PATH=/home/olafn/infoscreen-dev/scripts/mock-command-helper.sh
if [[ $# -ne 1 ]]; then
echo "usage: mock-command-helper.sh <reboot_host|shutdown_host>" >&2
exit 2
fi
action="$1"
case "$action" in
reboot_host|shutdown_host)
;;
*)
echo "unsupported action: $action" >&2
exit 1
;;
esac
if [[ "${MOCK_COMMAND_HELPER_FORCE_FAIL:-0}" == "1" ]]; then
echo "forced failure for canary test (action=$action)" >&2
exit 1
fi
if [[ "${MOCK_COMMAND_HELPER_SLEEP_SEC:-0}" != "0" ]]; then
sleep "${MOCK_COMMAND_HELPER_SLEEP_SEC}"
fi
echo "mock helper executed action=$action"
exit 0

35
scripts/start-simclient.sh Executable file
View File

@@ -0,0 +1,35 @@
#!/bin/bash
# Start Simclient - MQTT communication and event intake for infoscreen
set -e
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_ROOT="$(dirname "$SCRIPT_DIR")"
VENV_PATH="$PROJECT_ROOT/venv"
SIMCLIENT="$PROJECT_ROOT/src/simclient.py"
echo "📡 Starting Simclient..."
echo "Project root: $PROJECT_ROOT"
# Check if virtual environment exists
if [ ! -d "$VENV_PATH" ]; then
echo "❌ Virtual environment not found at: $VENV_PATH"
echo "Please create it with: python3 -m venv venv"
exit 1
fi
# Activate virtual environment
source "$VENV_PATH/bin/activate"
# Check if simclient.py exists
if [ ! -f "$SIMCLIENT" ]; then
echo "❌ Simclient not found at: $SIMCLIENT"
exit 1
fi
ENV="${ENV:-development}"
echo "Environment: $ENV"
echo "Starting simclient..."
echo "---"
exec python3 "$SIMCLIENT"

View File

@@ -65,6 +65,8 @@ while true; do
echo " 4) Scan for devices" echo " 4) Scan for devices"
echo " 5) Test Display Manager CEC integration" echo " 5) Test Display Manager CEC integration"
echo " 6) View CEC logs from Display Manager" echo " 6) View CEC logs from Display Manager"
echo " 7) Show power intent/state runtime files"
echo " 8) Clear power intent/state runtime files"
echo " q) Quit" echo " q) Quit"
echo "" echo ""
read -p "Enter choice: " choice read -p "Enter choice: " choice
@@ -249,6 +251,35 @@ PYTEST
echo "" echo ""
read -p "Press Enter to continue..." read -p "Press Enter to continue..."
;; ;;
7)
echo -e "${YELLOW}Showing power intent/state runtime files...${NC}"
echo ""
INTENT_FILE="$PROJECT_ROOT/src/power_intent_state.json"
STATE_FILE="$PROJECT_ROOT/src/power_state.json"
if [ -f "$INTENT_FILE" ]; then
echo "power_intent_state.json:"
echo "-------------------------"
cat "$INTENT_FILE"
else
echo "power_intent_state.json not found"
fi
echo ""
if [ -f "$STATE_FILE" ]; then
echo "power_state.json:"
echo "-----------------"
cat "$STATE_FILE"
else
echo "power_state.json not found"
fi
echo ""
read -p "Press Enter to continue..."
;;
8)
echo -e "${YELLOW}Clearing power intent/state runtime files...${NC}"
rm -f "$PROJECT_ROOT/src/power_intent_state.json" "$PROJECT_ROOT/src/power_state.json"
echo -e "${GREEN}Removed runtime power files (if present).${NC}"
;;
q|Q) q|Q)
echo "Exiting..." echo "Exiting..."
exit 0 exit 0

View File

@@ -1,9 +1,27 @@
#!/bin/bash #!/bin/bash
source "$(dirname "$0")/../.env" source "$(dirname "$0")/../.env"
MQTT_AUTH_ARGS=()
MQTT_TLS_ARGS=()
if [[ -n "${MQTT_USERNAME:-}" ]]; then
MQTT_AUTH_ARGS+=( -u "$MQTT_USERNAME" )
fi
if [[ -n "${MQTT_PASSWORD:-}" ]]; then
MQTT_AUTH_ARGS+=( -P "$MQTT_PASSWORD" )
fi
if [[ "${MQTT_TLS_ENABLED:-0}" == "1" || "${MQTT_TLS_ENABLED:-0}" == "true" || "${MQTT_TLS_ENABLED:-0}" == "yes" ]]; then
[[ -n "${MQTT_TLS_CA_CERT:-}" ]] && MQTT_TLS_ARGS+=( --cafile "$MQTT_TLS_CA_CERT" )
[[ -n "${MQTT_TLS_CERT:-}" ]] && MQTT_TLS_ARGS+=( --cert "$MQTT_TLS_CERT" )
[[ -n "${MQTT_TLS_KEY:-}" ]] && MQTT_TLS_ARGS+=( --key "$MQTT_TLS_KEY" )
if [[ "${MQTT_TLS_INSECURE:-0}" == "1" || "${MQTT_TLS_INSECURE:-0}" == "true" || "${MQTT_TLS_INSECURE:-0}" == "yes" ]]; then
MQTT_TLS_ARGS+=( --insecure )
fi
fi
echo "Testing MQTT connection to $MQTT_BROKER:$MQTT_PORT" echo "Testing MQTT connection to $MQTT_BROKER:$MQTT_PORT"
echo "Publishing test message..." echo "Publishing test message..."
mosquitto_pub -h "$MQTT_BROKER" -p "$MQTT_PORT" -t "infoscreen/test" -m "Hello from Pi development setup" mosquitto_pub -h "$MQTT_BROKER" -p "$MQTT_PORT" "${MQTT_AUTH_ARGS[@]}" "${MQTT_TLS_ARGS[@]}" -t "infoscreen/test" -m "Hello from Pi development setup"
echo "Subscribing to test topic (press Ctrl+C to stop)..." echo "Subscribing to test topic (press Ctrl+C to stop)..."
mosquitto_sub -h "$MQTT_BROKER" -p "$MQTT_PORT" -t "infoscreen/test" mosquitto_sub -h "$MQTT_BROKER" -p "$MQTT_PORT" "${MQTT_AUTH_ARGS[@]}" "${MQTT_TLS_ARGS[@]}" -t "infoscreen/test"

271
scripts/test-power-intent.sh Executable file
View File

@@ -0,0 +1,271 @@
#!/bin/bash
# Test TV power intent MQTT message flow (Phase 1 contract v1)
# Requires: mosquitto_pub / mosquitto_sub
set -euo pipefail
SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
PROJECT_ROOT="$(dirname "$SCRIPT_DIR")"
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m'
# ── Load .env ────────────────────────────────────────────────────────────────
ENV_FILE="$PROJECT_ROOT/.env"
if [ -f "$ENV_FILE" ]; then
# Strip inline comments and surrounding whitespace before export.
while IFS='=' read -r key value; do
key="${key//[$'\t\r\n']}"
key="$(echo "$key" | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//')"
# Skip comments/empty lines/invalid keys
[[ -z "$key" ]] && continue
[[ "$key" =~ ^# ]] && continue
[[ "$key" =~ ^[A-Z_][A-Z0-9_]*$ ]] || continue
value="${value%%#*}" # strip inline comments
value="${value//[$'\t\r\n']}"
value="$(echo "$value" | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//')"
export "$key=$value"
done < "$ENV_FILE"
fi
BROKER="${MQTT_BROKER:-localhost}"
PORT="${MQTT_PORT:-1883}"
MQTT_USERNAME="${MQTT_USERNAME:-}"
MQTT_PASSWORD="${MQTT_PASSWORD:-}"
MQTT_TLS_ENABLED="${MQTT_TLS_ENABLED:-0}"
MQTT_TLS_CA_CERT="${MQTT_TLS_CA_CERT:-}"
MQTT_TLS_CERT="${MQTT_TLS_CERT:-}"
MQTT_TLS_KEY="${MQTT_TLS_KEY:-}"
MQTT_TLS_INSECURE="${MQTT_TLS_INSECURE:-0}"
MQTT_AUTH_ARGS=()
MQTT_TLS_ARGS=()
if [[ -n "$MQTT_USERNAME" ]]; then
MQTT_AUTH_ARGS+=( -u "$MQTT_USERNAME" )
fi
if [[ -n "$MQTT_PASSWORD" ]]; then
MQTT_AUTH_ARGS+=( -P "$MQTT_PASSWORD" )
fi
if [[ "$MQTT_TLS_ENABLED" == "1" || "$MQTT_TLS_ENABLED" == "true" || "$MQTT_TLS_ENABLED" == "yes" ]]; then
[[ -n "$MQTT_TLS_CA_CERT" ]] && MQTT_TLS_ARGS+=( --cafile "$MQTT_TLS_CA_CERT" )
[[ -n "$MQTT_TLS_CERT" ]] && MQTT_TLS_ARGS+=( --cert "$MQTT_TLS_CERT" )
[[ -n "$MQTT_TLS_KEY" ]] && MQTT_TLS_ARGS+=( --key "$MQTT_TLS_KEY" )
if [[ "$MQTT_TLS_INSECURE" == "1" || "$MQTT_TLS_INSECURE" == "true" || "$MQTT_TLS_INSECURE" == "yes" ]]; then
MQTT_TLS_ARGS+=( --insecure )
fi
fi
# ── Read runtime IDs ─────────────────────────────────────────────────────────
GROUP_ID_FILE="$PROJECT_ROOT/src/config/last_group_id.txt"
CLIENT_UUID_FILE="$PROJECT_ROOT/src/config/client_uuid.txt"
GROUP_ID=""
CLIENT_UUID=""
[ -f "$GROUP_ID_FILE" ] && GROUP_ID="$(cat "$GROUP_ID_FILE" 2>/dev/null | tr -d '[:space:]')"
[ -f "$CLIENT_UUID_FILE" ] && CLIENT_UUID="$(cat "$CLIENT_UUID_FILE" 2>/dev/null | tr -d '[:space:]')"
echo -e "${BLUE}================================================${NC}"
echo -e "${BLUE}TV Power Intent Test (Phase 1 Contract)${NC}"
echo -e "${BLUE}================================================${NC}"
echo " Broker : $BROKER:$PORT"
echo " Group : ${GROUP_ID:-<not assigned yet>}"
echo " Client : ${CLIENT_UUID:-<unknown>}"
echo ""
# ── Check tools ──────────────────────────────────────────────────────────────
if ! command -v mosquitto_pub &>/dev/null; then
echo -e "${RED}mosquitto_pub not found. Install with: sudo apt-get install mosquitto-clients${NC}"
exit 1
fi
# ── Helpers ──────────────────────────────────────────────────────────────────
now_iso() { date -u +"%Y-%m-%dT%H:%M:%S.000Z"; }
# expires_at = now + <seconds>
expires_iso() {
local secs="${1:-90}"
date -u -d "@$(( $(date +%s) + secs ))" +"%Y-%m-%dT%H:%M:%S.000Z"
}
group_topic() {
echo "infoscreen/groups/${GROUP_ID}/power/intent"
}
publish_intent() {
local state="$1"
local reason="$2"
local issued="${3:-$(now_iso)}"
local expires="${4:-$(expires_iso 90)}"
local intent_id
intent_id="$(python3 -c 'import uuid; print(uuid.uuid4())')"
local topic
topic="$(group_topic)"
if [ -z "$GROUP_ID" ]; then
echo -e "${RED}No group_id found. Subscribe a client and assign a group first.${NC}"
return 1
fi
local payload
payload=$(cat <<EOF
{
"schema_version": "1.0",
"intent_id": "$intent_id",
"group_id": $GROUP_ID,
"desired_state": "$state",
"reason": "$reason",
"issued_at": "$issued",
"expires_at": "$expires",
"poll_interval_sec": 15,
"active_event_ids": [$([ "$state" = "on" ] && echo "1" || echo "")],
"event_window_start": $([ "$state" = "on" ] && echo "\"$(now_iso)\"" || echo "null"),
"event_window_end": $([ "$state" = "on" ] && echo "\"$(expires_iso 3600)\"" || echo "null")
}
EOF
)
echo -e "${YELLOW}Publishing to: $topic${NC}"
echo "$payload" | python3 -m json.tool 2>/dev/null || echo "$payload"
echo ""
mosquitto_pub -h "$BROKER" -p "$PORT" "${MQTT_AUTH_ARGS[@]}" "${MQTT_TLS_ARGS[@]}" -t "$topic" -q 1 --retain -m "$payload"
echo -e "${GREEN}Published (retained, QoS 1)${NC}"
echo "intent_id: $intent_id"
}
clear_intent() {
local topic
topic="$(group_topic)"
if [ -z "$GROUP_ID" ]; then
echo -e "${RED}No group_id found.${NC}"
return 1
fi
mosquitto_pub -h "$BROKER" -p "$PORT" "${MQTT_AUTH_ARGS[@]}" "${MQTT_TLS_ARGS[@]}" -t "$topic" -q 1 --retain --null-message
echo -e "${GREEN}Retained intent cleared from broker${NC}"
}
show_state_files() {
echo ""
local intent_file="$PROJECT_ROOT/src/power_intent_state.json"
local state_file="$PROJECT_ROOT/src/power_state.json"
local health_file="$PROJECT_ROOT/src/current_process_health.json"
for f in "$intent_file" "$state_file" "$health_file"; do
local label
label="$(basename "$f")"
if [ -f "$f" ]; then
echo -e "${BLUE}── $label ──────────────────${NC}"
python3 -m json.tool "$f" 2>/dev/null || cat "$f"
else
echo -e "${YELLOW}$label not found${NC}"
fi
echo ""
done
}
watch_logs() {
echo -e "${YELLOW}Following power-related log entries (Ctrl-C to stop)...${NC}"
local dm_log="$PROJECT_ROOT/logs/display_manager.log"
local sc_log="$PROJECT_ROOT/src/simclient.log"
if [ -f "$dm_log" ] && [ -f "$sc_log" ]; then
tail -f "$dm_log" "$sc_log" | grep --line-buffered -i \
-E "(power|intent|cec|turn|desired_state|mqtt_intent|local_fallback|POWER)"
elif [ -f "$dm_log" ]; then
tail -f "$dm_log" | grep --line-buffered -i \
-E "(power|intent|cec|turn|desired_state|mqtt_intent|local_fallback|POWER)"
else
echo -e "${RED}No log files found. Have both processes run at least once?${NC}"
fi
}
subscribe_power_state() {
if [ -z "$CLIENT_UUID" ]; then
echo -e "${RED}No client_uuid found.${NC}"
return 1
fi
local topic="infoscreen/${CLIENT_UUID}/power/state"
echo -e "${YELLOW}Subscribing to: $topic${NC}"
echo "(Ctrl-C to stop)"
echo ""
mosquitto_sub -h "$BROKER" -p "$PORT" "${MQTT_AUTH_ARGS[@]}" "${MQTT_TLS_ARGS[@]}" -t "$topic" | \
python3 -c "
import sys, json
for line in sys.stdin:
line = line.strip()
if line:
try:
print(json.dumps(json.loads(line), indent=2))
except Exception:
print(line)
"
}
# ── Menu ─────────────────────────────────────────────────────────────────────
while true; do
echo -e "${BLUE}================================================${NC}"
echo "Choose a test:"
echo " 1) Publish ON intent (valid 90s, group ${GROUP_ID:-?})"
echo " 2) Publish OFF intent (valid 90s, group ${GROUP_ID:-?})"
echo " 3) Publish stale intent (already expired) — expect rejection"
echo " 4) Publish malformed intent (missing fields) — expect rejection"
echo " 5) Clear retained intent from broker (sends empty retained payload)"
echo " 6) Show power_intent_state / power_state / health JSON files"
echo " 7) Follow power-related log entries (display_manager + simclient)"
echo " 8) Subscribe to infoscreen/{client}/power/state topic"
echo " q) Quit"
echo ""
read -rp "Enter choice: " choice
echo ""
case "$choice" in
1)
publish_intent "on" "active_event"
;;
2)
publish_intent "off" "no_active_event"
;;
3)
# issued and expired both in the past
STALE_ISSUED=$(date -u -d '5 minutes ago' +"%Y-%m-%dT%H:%M:%S.000Z")
STALE_EXPIRES=$(date -u -d '2 minutes ago' +"%Y-%m-%dT%H:%M:%S.000Z")
publish_intent "on" "active_event" "$STALE_ISSUED" "$STALE_EXPIRES"
echo -e "${YELLOW}⚠ This intent is expired - client must reject it and show 'intent expired' in log${NC}"
;;
4)
if [ -z "$GROUP_ID" ]; then
echo -e "${RED}No group_id.${NC}"
else
TOPIC="$(group_topic)"
mosquitto_pub -h "$BROKER" -p "$PORT" "${MQTT_AUTH_ARGS[@]}" "${MQTT_TLS_ARGS[@]}" -t "$TOPIC" -q 1 --retain \
-m '{"schema_version":"1.0","desired_state":"on"}'
echo -e "${YELLOW}⚠ Malformed intent published - client must reject with 'missing required field' in log${NC}"
fi
;;
5)
clear_intent
;;
6)
show_state_files
read -rp "Press Enter to continue..."
;;
7)
watch_logs
;;
8)
subscribe_power_state
;;
q|Q)
echo "Exiting."
exit 0
;;
*)
echo -e "${RED}Invalid choice${NC}"
;;
esac
echo ""
done

244
scripts/test-reboot-command.sh Executable file
View File

@@ -0,0 +1,244 @@
#!/usr/bin/env bash
# Safe end-to-end command lifecycle canary for reboot/shutdown contract v1.
# Verifies ack flow: accepted -> execution_started -> completed/failed.
set -euo pipefail
SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
PROJECT_ROOT="$(dirname "$SCRIPT_DIR")"
BLUE='\033[0;34m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
RED='\033[0;31m'
NC='\033[0m'
ENV_FILE="$PROJECT_ROOT/.env"
if [[ -f "$ENV_FILE" ]]; then
while IFS='=' read -r key value; do
key="${key//[$'\t\r\n']}"
key="$(echo "$key" | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//')"
[[ -z "$key" ]] && continue
[[ "$key" =~ ^# ]] && continue
[[ "$key" =~ ^[A-Z_][A-Z0-9_]*$ ]] || continue
value="${value%%#*}"
value="${value//[$'\t\r\n']}"
value="$(echo "$value" | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//')"
export "$key=$value"
done < "$ENV_FILE"
fi
BROKER="${MQTT_BROKER:-localhost}"
PORT="${MQTT_PORT:-1883}"
MQTT_USERNAME="${MQTT_USERNAME:-}"
MQTT_PASSWORD="${MQTT_PASSWORD:-}"
MQTT_TLS_ENABLED="${MQTT_TLS_ENABLED:-0}"
MQTT_TLS_CA_CERT="${MQTT_TLS_CA_CERT:-}"
MQTT_TLS_CERT="${MQTT_TLS_CERT:-}"
MQTT_TLS_KEY="${MQTT_TLS_KEY:-}"
MQTT_TLS_INSECURE="${MQTT_TLS_INSECURE:-0}"
CLIENT_UUID_FILE="$PROJECT_ROOT/src/config/client_uuid.txt"
LAST_COMMAND_STATE_FILE="$PROJECT_ROOT/src/config/last_command_state.json"
if [[ ! -f "$CLIENT_UUID_FILE" ]]; then
echo -e "${RED}client UUID file missing: $CLIENT_UUID_FILE${NC}"
exit 1
fi
if ! command -v mosquitto_pub >/dev/null 2>&1 || ! command -v mosquitto_sub >/dev/null 2>&1; then
echo -e "${RED}mosquitto_pub/sub not found. Install mosquitto-clients.${NC}"
exit 1
fi
CLIENT_UUID="$(tr -d '[:space:]' < "$CLIENT_UUID_FILE")"
COMMAND_TOPIC="infoscreen/${CLIENT_UUID}/commands"
COMMAND_TOPIC_ALIAS="infoscreen/${CLIENT_UUID}/command"
ACK_TOPIC="infoscreen/${CLIENT_UUID}/commands/ack"
ACK_TOPIC_ALIAS="infoscreen/${CLIENT_UUID}/command/ack"
MQTT_AUTH_ARGS=()
MQTT_TLS_ARGS=()
if [[ -n "$MQTT_USERNAME" ]]; then
MQTT_AUTH_ARGS+=( -u "$MQTT_USERNAME" )
fi
if [[ -n "$MQTT_PASSWORD" ]]; then
MQTT_AUTH_ARGS+=( -P "$MQTT_PASSWORD" )
fi
if [[ "$MQTT_TLS_ENABLED" == "1" || "$MQTT_TLS_ENABLED" == "true" || "$MQTT_TLS_ENABLED" == "yes" ]]; then
[[ -n "$MQTT_TLS_CA_CERT" ]] && MQTT_TLS_ARGS+=( --cafile "$MQTT_TLS_CA_CERT" )
[[ -n "$MQTT_TLS_CERT" ]] && MQTT_TLS_ARGS+=( --cert "$MQTT_TLS_CERT" )
[[ -n "$MQTT_TLS_KEY" ]] && MQTT_TLS_ARGS+=( --key "$MQTT_TLS_KEY" )
if [[ "$MQTT_TLS_INSECURE" == "1" || "$MQTT_TLS_INSECURE" == "true" || "$MQTT_TLS_INSECURE" == "yes" ]]; then
MQTT_TLS_ARGS+=( --insecure )
fi
fi
ACTION="${1:-reboot_host}"
MODE="${2:-success}" # success | failed
TOPIC_MODE="${3:-canonical}" # canonical | alias
WAIT_SEC="${4:-25}"
if [[ "$ACTION" != "reboot_host" && "$ACTION" != "shutdown_host" ]]; then
echo -e "${RED}invalid action '$ACTION' (expected reboot_host|shutdown_host)${NC}"
exit 1
fi
if [[ "$MODE" != "success" && "$MODE" != "failed" ]]; then
echo -e "${RED}invalid mode '$MODE' (expected success|failed)${NC}"
exit 1
fi
if [[ "$TOPIC_MODE" != "canonical" && "$TOPIC_MODE" != "alias" ]]; then
echo -e "${RED}invalid topic mode '$TOPIC_MODE' (expected canonical|alias)${NC}"
exit 1
fi
if ! [[ "$WAIT_SEC" =~ ^[0-9]+$ ]] || [[ "$WAIT_SEC" -lt 1 ]]; then
echo -e "${RED}invalid wait seconds '$WAIT_SEC' (expected positive integer)${NC}"
exit 1
fi
if [[ "$TOPIC_MODE" == "alias" ]]; then
COMMAND_TOPIC="$COMMAND_TOPIC_ALIAS"
fi
COMMAND_ID="$(python3 - <<'PY'
import uuid
print(uuid.uuid4())
PY
)"
ISSUED_AT="$(date -u +"%Y-%m-%dT%H:%M:%SZ")"
EXPIRES_EPOCH="$(( $(date +%s) + 240 ))"
EXPIRES_AT="$(date -u -d "@$EXPIRES_EPOCH" +"%Y-%m-%dT%H:%M:%SZ")"
PAYLOAD="$(cat <<EOF
{
"schema_version": "1.0",
"command_id": "$COMMAND_ID",
"client_uuid": "$CLIENT_UUID",
"action": "$ACTION",
"issued_at": "$ISSUED_AT",
"expires_at": "$EXPIRES_AT",
"requested_by": 1,
"reason": "canary_test"
}
EOF
)"
TMP_ACK_LOG="$(mktemp)"
cleanup() {
[[ -n "${SUB_PID_1:-}" ]] && kill "$SUB_PID_1" >/dev/null 2>&1 || true
[[ -n "${SUB_PID_2:-}" ]] && kill "$SUB_PID_2" >/dev/null 2>&1 || true
rm -f "$TMP_ACK_LOG"
}
trap cleanup EXIT
echo -e "${BLUE}================================================${NC}"
echo -e "${BLUE}Command Lifecycle Canary${NC}"
echo -e "${BLUE}================================================${NC}"
echo " Broker : $BROKER:$PORT"
echo " Client UUID : $CLIENT_UUID"
echo " Command ID : $COMMAND_ID"
echo " Action : $ACTION"
echo " Mode : $MODE"
echo " Cmd Topic : $COMMAND_TOPIC"
echo " Ack Topics : $ACK_TOPIC , $ACK_TOPIC_ALIAS"
echo ""
echo -e "${YELLOW}IMPORTANT${NC}: to avoid real reboot/shutdown, run simclient with"
echo " COMMAND_HELPER_PATH=$PROJECT_ROOT/scripts/mock-command-helper.sh"
echo ""
# Subscribe first to avoid missing retained/non-retained race windows.
mosquitto_sub -h "$BROKER" -p "$PORT" "${MQTT_AUTH_ARGS[@]}" "${MQTT_TLS_ARGS[@]}" -q 1 -v -t "$ACK_TOPIC" >> "$TMP_ACK_LOG" &
SUB_PID_1=$!
mosquitto_sub -h "$BROKER" -p "$PORT" "${MQTT_AUTH_ARGS[@]}" "${MQTT_TLS_ARGS[@]}" -q 1 -v -t "$ACK_TOPIC_ALIAS" >> "$TMP_ACK_LOG" &
SUB_PID_2=$!
sleep 0.5
if [[ "$MODE" == "failed" ]]; then
echo -e "${YELLOW}If simclient was started with MOCK_COMMAND_HELPER_FORCE_FAIL=1, expected terminal status is failed.${NC}"
fi
echo -e "${YELLOW}Publishing command payload...${NC}"
mosquitto_pub -h "$BROKER" -p "$PORT" "${MQTT_AUTH_ARGS[@]}" "${MQTT_TLS_ARGS[@]}" -q 1 -t "$COMMAND_TOPIC" -m "$PAYLOAD"
EXPECTED_TERMINAL="completed"
if [[ "$MODE" == "failed" ]]; then
EXPECTED_TERMINAL="failed"
fi
EXPECT_RECOVERY_COMPLETION=0
if [[ "$ACTION" == "reboot_host" && "$MODE" == "success" ]]; then
EXPECTED_TERMINAL=""
EXPECT_RECOVERY_COMPLETION=1
fi
DEADLINE=$(( $(date +%s) + WAIT_SEC ))
SEEN_ACCEPTED=0
SEEN_STARTED=0
SEEN_TERMINAL=0
while [[ $(date +%s) -lt $DEADLINE ]]; do
if grep -q '"command_id"' "$TMP_ACK_LOG" 2>/dev/null; then
if grep -q "\"command_id\": \"$COMMAND_ID\"" "$TMP_ACK_LOG"; then
grep -q '"status": "accepted"' "$TMP_ACK_LOG" && SEEN_ACCEPTED=1 || true
grep -q '"status": "execution_started"' "$TMP_ACK_LOG" && SEEN_STARTED=1 || true
if [[ -n "$EXPECTED_TERMINAL" ]]; then
grep -q "\"status\": \"$EXPECTED_TERMINAL\"" "$TMP_ACK_LOG" && SEEN_TERMINAL=1 || true
fi
fi
fi
if [[ $SEEN_ACCEPTED -eq 1 && $SEEN_STARTED -eq 1 ]]; then
if [[ -z "$EXPECTED_TERMINAL" || $SEEN_TERMINAL -eq 1 ]]; then
break
fi
fi
sleep 1
done
echo ""
echo -e "${BLUE}Ack stream (filtered by command_id):${NC}"
python3 - <<'PY' "$TMP_ACK_LOG" "$COMMAND_ID"
import json
import sys
path, command_id = sys.argv[1], sys.argv[2]
with open(path, "r", encoding="utf-8", errors="ignore") as f:
for line in f:
line = line.strip()
if not line:
continue
parts = line.split(" ", 1)
payload = parts[1] if len(parts) == 2 else parts[0]
try:
obj = json.loads(payload)
except Exception:
continue
if obj.get("command_id") == command_id:
print(json.dumps(obj, indent=2))
PY
if [[ $SEEN_ACCEPTED -eq 1 && $SEEN_STARTED -eq 1 ]]; then
if [[ -z "$EXPECTED_TERMINAL" ]]; then
echo -e "${GREEN}PASS${NC}: observed accepted -> execution_started"
echo -e "${YELLOW}NOTE${NC}: completed for reboot_host is expected only after client reconnect/recovery."
elif [[ $SEEN_TERMINAL -eq 1 ]]; then
echo -e "${GREEN}PASS${NC}: observed accepted -> execution_started -> $EXPECTED_TERMINAL"
else
echo -e "${RED}FAIL${NC}: missing expected terminal state $EXPECTED_TERMINAL for command_id=$COMMAND_ID"
exit 1
fi
else
echo -e "${RED}FAIL${NC}: missing expected lifecycle states for command_id=$COMMAND_ID"
if [[ -n "$EXPECTED_TERMINAL" ]]; then
echo " observed: accepted=$SEEN_ACCEPTED execution_started=$SEEN_STARTED terminal($EXPECTED_TERMINAL)=$SEEN_TERMINAL"
else
echo " observed: accepted=$SEEN_ACCEPTED execution_started=$SEEN_STARTED"
fi
exit 1
fi
if [[ -f "$LAST_COMMAND_STATE_FILE" ]]; then
echo ""
echo -e "${BLUE}Last command state:${NC}"
python3 -m json.tool "$LAST_COMMAND_STATE_FILE" || cat "$LAST_COMMAND_STATE_FILE"
fi

View File

@@ -8,6 +8,13 @@ VERSION=latest
# MQTT Broker # MQTT Broker
MQTT_BROKER=192.168.1.100 MQTT_BROKER=192.168.1.100
MQTT_PORT=1883 MQTT_PORT=1883
MQTT_USERNAME=infoscreen-client-<client-uuid-prefix>
MQTT_PASSWORD=<set-per-device-20-char-random-password>
MQTT_TLS_ENABLED=0
# MQTT_TLS_CA_CERT=/etc/infoscreen/mqtt/ca.crt
# MQTT_TLS_CERT=/etc/infoscreen/mqtt/client.crt
# MQTT_TLS_KEY=/etc/infoscreen/mqtt/client.key
# MQTT_TLS_INSECURE=0
# Timing (production values) # Timing (production values)
HEARTBEAT_INTERVAL=60 HEARTBEAT_INTERVAL=60

View File

@@ -9,6 +9,13 @@ LOG_LEVEL=DEBUG
# MQTT Broker Configuration # MQTT Broker Configuration
MQTT_BROKER=192.168.1.100 # Change to your MQTT server IP MQTT_BROKER=192.168.1.100 # Change to your MQTT server IP
MQTT_PORT=1883 MQTT_PORT=1883
MQTT_USERNAME=infoscreen-client-<client-uuid-prefix>
MQTT_PASSWORD=<set-per-device-20-char-random-password>
MQTT_TLS_ENABLED=0
# MQTT_TLS_CA_CERT=/etc/infoscreen/mqtt/ca.crt
# MQTT_TLS_CERT=/etc/infoscreen/mqtt/client.crt
# MQTT_TLS_KEY=/etc/infoscreen/mqtt/client.key
# MQTT_TLS_INSECURE=0
# Timing Configuration (shorter intervals for development) # Timing Configuration (shorter intervals for development)
HEARTBEAT_INTERVAL=10 # Heartbeat frequency in seconds HEARTBEAT_INTERVAL=10 # Heartbeat frequency in seconds

View File

@@ -1,274 +1,165 @@
# Infoscreen Client - Raspberry Pi Development # Developer Guide
A presentation system client for Raspberry Pi that communicates with a server via MQTT to display presentations, videos, and web content in kiosk mode. This document is the developer-facing companion to the root [README.md](../README.md). It focuses on code structure, runtime boundaries, MQTT flow, and debugging during implementation work.
## Features For installation, operator usage, and deployment, start at [README.md](../README.md).
- 📡 MQTT communication with server ## Architecture
- 📥 Automatic file downloads (presentations, videos)
- 🖥️ **Automated display management** with dedicated Display Manager
- 🎯 Event-driven content switching (presentations, videos, web pages)
- ⏰ Time-based event scheduling with automatic start/stop
- 🔄 Graceful application transitions (LibreOffice, Chromium, VLC)
- 📸 Screenshot capture for dashboard monitoring
- 👥 Group-based content management
- 💖 Heartbeat monitoring
## Quick Setup The client is split into two cooperating processes:
### 1. Flash Raspberry Pi OS - `simclient.py`: MQTT communication, discovery, group assignment, event intake, heartbeat, dashboard publishing, power-intent intake.
- Use **Raspberry Pi OS (64-bit) with Desktop** - `display_manager.py`: event polling, display orchestration, HDMI-CEC, screenshots, local process health state.
- Enable SSH and configure WiFi in Pi Imager
- Boot Pi and connect to network Primary runtime flow:
1. `simclient.py` receives group and event messages over MQTT.
2. It writes the active event into `current_event.json`.
3. `display_manager.py` polls that file and starts or stops the display process.
4. `display_manager.py` writes health, screenshot, and power telemetry files.
5. `simclient.py` publishes dashboard, health, and power-state messages.
## Key Files
- `display_manager.py`: display lifecycle, HDMI-CEC, screenshots, local fallback logic.
- `simclient.py`: MQTT callbacks, event persistence, dashboard publishing, power-intent validation, command intake.
- `current_event.json`: active event state consumed by the display manager.
- `current_process_health.json`: local health bridge for monitoring.
- `power_intent_state.json`: latest validated power intent from MQTT.
- `power_state.json`: latest applied power action telemetry.
- `screenshots/meta.json`: screenshot metadata used by the dashboard path.
- `../scripts/start-simclient.sh`: launcher for `simclient.py` (used by the systemd unit).
- `../scripts/start-display-manager.sh`: launcher for `display_manager.py`.
- `../scripts/infoscreen-simclient.service`: systemd unit for `simclient.py`.
- `../scripts/infoscreen-display.service`: systemd unit for `display_manager.py`.
- `../scripts/infoscreen-notify-failure@.service`: systemd template unit; fires on `OnFailure=`.
- `../scripts/infoscreen-notify-failure.sh`: publishes `service_failed` MQTT alert when a unit gives up.
## Developer Workflow
On deployed devices, both processes are managed by systemd:
### 2. Install Development Environment
```bash ```bash
# Run on your Raspberry Pi: # Start / stop / restart
curl -sSL https://raw.githubusercontent.com/RobbStarkAustria/infoscreen_client_2025/main/pi-dev-setup.sh | bash sudo systemctl start infoscreen-simclient infoscreen-display
sudo systemctl restart infoscreen-simclient infoscreen-display
# Follow logs
journalctl -u infoscreen-simclient -u infoscreen-display -f
``` ```
### 3. Configure MQTT Broker First-time systemd setup:
```bash
sudo cp scripts/infoscreen-simclient.service /etc/systemd/system/
sudo cp scripts/infoscreen-display.service /etc/systemd/system/
sudo cp scripts/infoscreen-notify-failure@.service /etc/systemd/system/
sudo systemctl daemon-reload
sudo systemctl enable infoscreen-simclient infoscreen-display
```
Or run `src/pi-setup.sh` which includes the above as step 14.
For local development without systemd:
```bash ```bash
cd ~/infoscreen-dev cd ~/infoscreen-dev
nano .env source venv/bin/activate
# Update MQTT_BROKER=your-server-ip
```
### 4. Test Setup # Terminal 1
```bash ./scripts/start-simclient.sh
./scripts/test-mqtt.sh # Test MQTT connection
./scripts/test-screenshot.sh # Test screenshot capture
./scripts/test-presentation.sh # Test presentation tools
```
### 5. Start Development # Terminal 2
```bash
# Terminal 1: Start MQTT client (receives events)
./scripts/start-dev.sh
# Terminal 2: Start Display Manager (controls screen)
./scripts/start-display-manager.sh ./scripts/start-display-manager.sh
# Or use interactive menu:
./dev-workflow.sh
``` ```
**Important**: You need **both** processes running: Useful helpers:
- `simclient.py` - Handles MQTT communication and writes events
- `display_manager.py` - Reads events and controls display software
See [DISPLAY_MANAGER.md](DISPLAY_MANAGER.md) for detailed documentation. - `./dev-workflow.sh`
- `./scripts/test-display-manager.sh`
## Development Workflow - `./scripts/test-mqtt.sh`
- `./scripts/test-screenshot.sh`
### Daily Development - `./scripts/test-power-intent.sh`
```bash - `./scripts/test-progress-bars.sh`
cd ~/infoscreen-dev
./dev-workflow.sh # Interactive menu with all options
```
**Menu Options:**
1. Start development client (MQTT)
2. Start Display Manager
3. View live logs
4. Test Display Manager
5. Test screenshot capture
6. Test MQTT connection
7. Test presentation tools
8. Git status and sync
9. Restart systemd services
10. Monitor system resources
11. Open tmux session
### Remote Development (Recommended)
```bash
# From your main computer:
# Add to ~/.ssh/config
Host pi-dev
HostName YOUR_PI_IP
User pi
# Connect with VS Code
code --remote ssh-remote+pi-dev ~/infoscreen-dev
```
## File Structure
```
~/infoscreen-dev/
├── .env # Configuration
├── src/ # Source code (this repository)
│ ├── simclient.py # MQTT client (event receiver)
│ ├── display_manager.py # Display controller (NEW!)
│ ├── current_event.json # Current active event
│ ├── DISPLAY_MANAGER.md # Display Manager documentation
│ └── config/ # Client UUID and group ID
├── venv/ # Python virtual environment
├── presentation/ # Downloaded presentation files
├── screenshots/ # Screenshot captures
├── logs/ # Application logs
│ ├── simclient.log # MQTT client logs
│ └── display_manager.log # Display Manager logs
└── scripts/ # Development helper scripts
├── start-dev.sh # Start MQTT client
├── start-display-manager.sh # Start Display Manager (NEW!)
├── test-display-manager.sh # Test display events (NEW!)
├── test-mqtt.sh # Test MQTT connection
├── test-screenshot.sh # Test screenshot capture
└── test-presentation.sh # Test presentation tools
```
## Configuration
### Environment Variables (.env)
```bash
# Development settings
ENV=development
DEBUG_MODE=1
LOG_LEVEL=DEBUG
# MQTT Configuration
MQTT_BROKER=192.168.1.100 # Your MQTT server IP
MQTT_PORT=1883
# Intervals (seconds)
HEARTBEAT_INTERVAL=10 # Heartbeat frequency
SCREENSHOT_INTERVAL=30 # Screenshot capture frequency
DISPLAY_CHECK_INTERVAL=5 # Display Manager event check frequency
```
## MQTT Topics ## MQTT Topics
### Client → Server ### Client → Server
- `infoscreen/discovery` - Client registration
- `infoscreen/{client_id}/heartbeat` - Regular heartbeat - `infoscreen/discovery`
- `infoscreen/{client_id}/dashboard` - Screenshot + status - `infoscreen/{client_id}/heartbeat`
- `infoscreen/{client_id}/dashboard`
- `infoscreen/{client_id}/health` — includes `broker_connection` block with `reconnect_count`, `last_disconnect_at`
- `infoscreen/{client_id}/power/state`
- `infoscreen/{client_id}/commands/ack` — command acknowledgement (states: `accepted`, `rejected`, `execution_started`, `completed`, `failed`)
- `infoscreen/{client_id}/command/ack` — legacy ack topic (also published for compatibility)
- `infoscreen/{client_id}/service_failed` — retained alert published by `infoscreen-notify-failure.sh` when systemd gives up restarting a unit
### Server → Client ### Server → Client
- `infoscreen/{client_id}/discovery_ack` - Registration acknowledgment
- `infoscreen/{client_id}/group_id` - Group assignment
- `infoscreen/events/{group_id}` - Event messages with content
## Event Format - `infoscreen/{client_id}/discovery_ack`
- `infoscreen/{client_id}/group_id`
- `infoscreen/events/{group_id}`
- `infoscreen/groups/{group_id}/power/intent`
- `infoscreen/{client_id}/commands` — remote command intake (`reboot`, `shutdown`)
The Display Manager supports three event types: ## Event and Display Notes
**Presentation Event:** Supported runtime content categories:
```json
{
"id": 1,
"title": "Company Overview",
"start": "2025-10-01 08:00:00",
"end": "2025-10-01 18:00:00",
"presentation": {
"files": [
{
"url": "https://server/presentations/slide.pptx",
"name": "slide.pptx"
}
],
"slide_interval": 10,
"auto_advance": true
}
}
```
**Web Page Event:** - presentation
```json - video
{ - web / webpage / website / webuntis
"id": 2,
"title": "Dashboard",
"start": "2025-10-01 08:00:00",
"end": "2025-10-01 18:00:00",
"web": {
"url": "https://dashboard.example.com"
}
}
```
**Video Event:** Presentation behavior is documented in [../IMPRESSIVE_INTEGRATION.md](../IMPRESSIVE_INTEGRATION.md).
```json
{
"id": 3,
"title": "Promo Video",
"start": "2025-10-01 08:00:00",
"end": "2025-10-01 18:00:00",
"video": {
"url": "https://server/videos/promo.mp4",
"loop": true
}
}
```
See [DISPLAY_MANAGER.md](DISPLAY_MANAGER.md) for complete event documentation. TV power coordination references:
- [../TV_POWER_INTENT_SERVER_CONTRACT_V1.md](../TV_POWER_INTENT_SERVER_CONTRACT_V1.md)
- [../TV_POWER_RUNBOOK.md](../TV_POWER_RUNBOOK.md)
## Debugging ## Debugging
### View Logs ### Logs
```bash ```bash
tail -f ~/infoscreen-dev/logs/simclient.log tail -f ~/infoscreen-dev/logs/display_manager.log ~/infoscreen-dev/src/simclient.log
``` ```
### MQTT Debugging ### Runtime Files
```bash
# Subscribe to all infoscreen topics
mosquitto_sub -h YOUR_BROKER_IP -t "infoscreen/+/+"
# Publish test event ```bash
mosquitto_pub -h YOUR_BROKER_IP -t "infoscreen/events/test-group" -m '{"web":{"url":"https://google.com"}}' cat ~/infoscreen-dev/src/current_event.json
cat ~/infoscreen-dev/src/current_process_health.json
cat ~/infoscreen-dev/src/power_intent_state.json
cat ~/infoscreen-dev/src/power_state.json
``` ```
### System Service (Optional) ### MQTT Inspection
```bash
# Enable automatic startup
sudo systemctl enable infoscreen-dev
sudo systemctl start infoscreen-dev
# View service logs ```bash
sudo journalctl -u infoscreen-dev -f mosquitto_sub -h YOUR_BROKER_IP -t 'infoscreen/#'
``` ```
## Hardware Requirements ### Screenshots
- **Raspberry Pi 4 or 5** (recommended Pi 5 for best performance)
- **SSD storage** (much faster than SD card)
- **Display** connected via HDMI
- **Network connection** (WiFi or Ethernet)
## Troubleshooting
### Display Issues
```bash ```bash
export DISPLAY=:0 ls -lh ~/infoscreen-dev/src/screenshots/
echo $DISPLAY cat ~/infoscreen-dev/src/screenshots/meta.json
``` ```
### Screenshot Issues ## Environment Notes
```bash
# Test screenshot manually
scrot ~/test.png
# Check permissions
sudo usermod -a -G video pi
```
### MQTT Connection Issues - `ENV=development` disables HDMI-CEC in the display manager.
```bash - `POWER_CONTROL_MODE` controls local vs hybrid vs mqtt power behavior.
# Test broker connectivity - `COMMAND_HELPER_PATH` points to the shell script that executes privileged commands (reboot/shutdown). Use `mock-command-helper.sh` for local testing.
telnet YOUR_BROKER_IP 1883 - `COMMAND_MOCK_REBOOT_IMMEDIATE_COMPLETE=1` makes a mock reboot complete immediately instead of waiting for process restart. Only works when the helper basename is `mock-command-helper.sh`.
# Check firewall - File download host rewriting is handled in `simclient.py` using `FILE_SERVER_*` settings.
sudo ufw status
```
## Development vs Production ## Related Documents
This setup is optimized for **development**: - [../README.md](../README.md)
- ✅ Fast iteration (edit → save → restart) - [DISPLAY_MANAGER.md](DISPLAY_MANAGER.md)
- ✅ Native debugging and logging - [IMPLEMENTATION_SUMMARY.md](IMPLEMENTATION_SUMMARY.md)
- ✅ Direct hardware access - [../CLIENT_MONITORING_SETUP.md](../CLIENT_MONITORING_SETUP.md)
- ✅ Remote development friendly - [../SCREENSHOT_MQTT_FIX.md](../SCREENSHOT_MQTT_FIX.md)
For **production deployment** with multiple clients, consider containerization for easier updates and management.
## License
This project is part of the infoscreen presentation system for educational/research purposes.

View File

@@ -79,6 +79,9 @@ CEC_DEVICE = os.getenv("CEC_DEVICE", "TV") # Target device name (TV, 0, etc.)
CEC_TURN_OFF_DELAY = int(os.getenv("CEC_TURN_OFF_DELAY", "30")) # seconds after last event ends CEC_TURN_OFF_DELAY = int(os.getenv("CEC_TURN_OFF_DELAY", "30")) # seconds after last event ends
CEC_POWER_ON_WAIT = int(os.getenv("CEC_POWER_ON_WAIT", "3")) # seconds to wait after turning TV on CEC_POWER_ON_WAIT = int(os.getenv("CEC_POWER_ON_WAIT", "3")) # seconds to wait after turning TV on
CEC_POWER_OFF_WAIT = int(os.getenv("CEC_POWER_OFF_WAIT", "2")) # seconds to wait after turning TV off CEC_POWER_OFF_WAIT = int(os.getenv("CEC_POWER_OFF_WAIT", "2")) # seconds to wait after turning TV off
POWER_CONTROL_MODE = os.getenv("POWER_CONTROL_MODE", "local").strip().lower()
POWER_INTENT_STATE_FILE = os.path.join(os.path.dirname(__file__), "power_intent_state.json")
POWER_STATE_FILE = os.path.join(os.path.dirname(__file__), "power_state.json")
# Setup logging # Setup logging
LOG_PATH = os.path.join(os.path.dirname(__file__), "..", "logs", "display_manager.log") LOG_PATH = os.path.join(os.path.dirname(__file__), "..", "logs", "display_manager.log")
@@ -131,6 +134,11 @@ class ProcessHealthState:
self.restart_count = 0 self.restart_count = 0
self.max_restarts = 3 self.max_restarts = 3
self.last_update = datetime.now(timezone.utc).isoformat() self.last_update = datetime.now(timezone.utc).isoformat()
self.power_control_mode = None
self.power_source = None
self.last_intent_id = None
self.last_power_action = None
self.last_power_at = None
def to_dict(self) -> Dict: def to_dict(self) -> Dict:
return { return {
@@ -140,7 +148,14 @@ class ProcessHealthState:
"process_pid": self.process_pid, "process_pid": self.process_pid,
"process_status": self.status, "process_status": self.status,
"restart_count": self.restart_count, "restart_count": self.restart_count,
"timestamp": datetime.now(timezone.utc).isoformat() "timestamp": datetime.now(timezone.utc).isoformat(),
"power_control": {
"mode": self.power_control_mode,
"source": self.power_source,
"last_intent_id": self.last_intent_id,
"last_action": self.last_power_action,
"last_power_at": self.last_power_at,
},
} }
def save(self): def save(self):
@@ -188,6 +203,14 @@ class ProcessHealthState:
self.save() self.save()
monitoring_logger.info("Process stopped (event ended or no active event)") monitoring_logger.info("Process stopped (event ended or no active event)")
def update_power_action(self, action: str, source: str, intent_id: Optional[str] = None):
"""Record the last power action for dashboard observability."""
self.last_power_action = action
self.power_source = source
self.last_intent_id = intent_id
self.last_power_at = datetime.now(timezone.utc).isoformat()
self.save()
class HDMICECController: class HDMICECController:
"""Controls HDMI-CEC to turn TV on/off automatically """Controls HDMI-CEC to turn TV on/off automatically
@@ -213,6 +236,7 @@ class HDMICECController:
self.power_off_wait = power_off_wait self.power_off_wait = power_off_wait
self.tv_state = None # None = unknown, True = on, False = off self.tv_state = None # None = unknown, True = on, False = off
self.turn_off_timer = None self.turn_off_timer = None
self.turn_off_guard = None
if not self.enabled: if not self.enabled:
logging.info("HDMI-CEC control disabled") logging.info("HDMI-CEC control disabled")
@@ -391,6 +415,16 @@ class HDMICECController:
def _turn_off_now(self) -> bool: def _turn_off_now(self) -> bool:
"""Internal method to turn TV off immediately""" """Internal method to turn TV off immediately"""
self.turn_off_timer = None
if callable(self.turn_off_guard):
try:
if not self.turn_off_guard():
logging.info("Skipping TV OFF due to runtime guard condition")
return True
except Exception as e:
logging.warning(f"Turn-off guard failed, continuing with OFF command: {e}")
# Skip if TV is already off # Skip if TV is already off
if self.tv_state is False: if self.tv_state is False:
logging.debug("TV already off, skipping CEC command") logging.debug("TV already off, skipping CEC command")
@@ -421,6 +455,10 @@ class HDMICECController:
self.turn_off_timer = None self.turn_off_timer = None
logging.debug("Cancelled TV turn-off timer") logging.debug("Cancelled TV turn-off timer")
def set_turn_off_guard(self, guard_fn):
"""Set callback that must return True before delayed turn-off executes."""
self.turn_off_guard = guard_fn
class DisplayProcess: class DisplayProcess:
"""Manages a running display application process""" """Manages a running display application process"""
@@ -598,9 +636,15 @@ class DisplayManager:
self.client_settings_mtime: Optional[float] = None self.client_settings_mtime: Optional[float] = None
self.client_volume_multiplier = 1.0 self.client_volume_multiplier = 1.0
self._video_duration_cache: Dict[str, float] = {} self._video_duration_cache: Dict[str, float] = {}
self.power_control_mode = POWER_CONTROL_MODE if POWER_CONTROL_MODE in ("local", "hybrid", "mqtt") else "local"
self.last_applied_intent_id: Optional[str] = None
self.last_seen_intent_id: Optional[str] = None
self.latest_valid_intent: Optional[Dict] = None
self.mqtt_mode_safe_off_armed = False
# Initialize health state tracking for process monitoring # Initialize health state tracking for process monitoring
self.health = ProcessHealthState() self.health = ProcessHealthState()
self.health.power_control_mode = self.power_control_mode
# Initialize HDMI-CEC controller # Initialize HDMI-CEC controller
self.cec = HDMICECController( self.cec = HDMICECController(
@@ -610,6 +654,8 @@ class DisplayManager:
power_on_wait=CEC_POWER_ON_WAIT, power_on_wait=CEC_POWER_ON_WAIT,
power_off_wait=CEC_POWER_OFF_WAIT power_off_wait=CEC_POWER_OFF_WAIT
) )
self.cec.set_turn_off_guard(self._allow_turn_off_now)
logging.info(f"Power control mode: {self.power_control_mode}")
# Setup signal handlers for graceful shutdown # Setup signal handlers for graceful shutdown
signal.signal(signal.SIGTERM, self._signal_handler) signal.signal(signal.SIGTERM, self._signal_handler)
@@ -811,6 +857,152 @@ class DisplayManager:
logging.error(f"Error reading event file: {e}") logging.error(f"Error reading event file: {e}")
return None return None
def _parse_utc_iso(self, value: str) -> datetime:
if not isinstance(value, str) or not value.strip():
raise ValueError("timestamp must be non-empty string")
normalized = value.strip()
if normalized.endswith('Z'):
normalized = normalized[:-1] + '+00:00'
dt = datetime.fromisoformat(normalized)
if dt.tzinfo is None:
dt = dt.replace(tzinfo=timezone.utc)
return dt.astimezone(timezone.utc)
def _get_power_intent_state(self) -> Optional[Dict]:
"""Read latest validated intent written by simclient."""
try:
if not os.path.exists(POWER_INTENT_STATE_FILE):
self.latest_valid_intent = None
return None
with open(POWER_INTENT_STATE_FILE, 'r', encoding='utf-8') as f:
state = json.load(f)
if not isinstance(state, dict):
self.latest_valid_intent = None
return None
if not state.get('valid'):
self.latest_valid_intent = None
return None
payload = state.get('payload')
if not isinstance(payload, dict):
self.latest_valid_intent = None
return None
expires_at = self._parse_utc_iso(payload.get('expires_at'))
now_utc = datetime.now(timezone.utc)
if now_utc > expires_at:
logging.warning(
"Ignoring stale power intent id=%s expires_at=%s",
payload.get('intent_id'),
payload.get('expires_at')
)
self.latest_valid_intent = None
return None
self.latest_valid_intent = payload
return payload
except Exception as e:
logging.warning(f"Could not read power intent state: {e}")
self.latest_valid_intent = None
return None
def _write_power_state(self, applied_state: str, source: str, result: str, detail: str = "", intent_id: Optional[str] = None):
"""Write last power control action for simclient telemetry publishing."""
try:
payload = {
"schema_version": "1.0",
"reported_at": datetime.now(timezone.utc).isoformat(),
"power": {
"applied_state": applied_state,
"source": source,
"result": result,
"detail": detail,
}
}
if intent_id:
payload["intent_id"] = intent_id
tmp_path = POWER_STATE_FILE + ".tmp"
with open(tmp_path, 'w', encoding='utf-8') as f:
json.dump(payload, f, ensure_ascii=False, indent=2)
os.replace(tmp_path, POWER_STATE_FILE)
except Exception as e:
logging.debug(f"Could not write power state telemetry: {e}")
def _has_any_active_event_now(self) -> bool:
"""Evaluate active event state directly from current_event.json."""
try:
if not os.path.exists(EVENT_FILE):
return False
with open(EVENT_FILE, 'r', encoding='utf-8') as f:
data = json.load(f)
events = data if isinstance(data, list) else [data]
for item in events:
if isinstance(item, dict) and self.is_event_active(item):
return True
return False
except Exception:
return False
def _allow_turn_off_now(self) -> bool:
"""Prevent delayed OFF while an active event or fresh ON intent is present."""
if self._has_any_active_event_now():
return False
intent = self._get_power_intent_state()
if intent and intent.get('desired_state') == 'on':
return False
return True
def _should_use_local_power_control(self, intent: Optional[Dict]) -> bool:
if self.power_control_mode == 'local':
return True
if self.power_control_mode == 'hybrid':
return intent is None
# mqtt mode
return False
def _apply_mqtt_power_intent(self, intent: Optional[Dict]):
if self.power_control_mode not in ('hybrid', 'mqtt'):
return
if intent is None:
if self.power_control_mode == 'mqtt' and not self.mqtt_mode_safe_off_armed:
logging.warning("No valid MQTT power intent in mqtt mode - scheduling safe delayed OFF")
self.cec.turn_off(delayed=True)
self._write_power_state("off", "mqtt_intent", "ok", "mqtt_mode_no_valid_intent_safe_off")
self.health.update_power_action("off", "mqtt_intent")
self.mqtt_mode_safe_off_armed = True
return
intent_id = str(intent.get('intent_id', ''))
desired_state = intent.get('desired_state')
reason = intent.get('reason')
duplicate = intent_id and intent_id == self.last_applied_intent_id
self.last_seen_intent_id = intent_id or self.last_seen_intent_id
if duplicate:
self._write_power_state(desired_state or "unknown", "mqtt_intent", "skipped", "duplicate_intent_id", intent_id=intent_id)
return
if desired_state == 'on':
logging.info("Applying MQTT power intent ON id=%s reason=%s", intent_id, reason)
self.cec.cancel_turn_off()
success = self.cec.turn_on()
self._write_power_state("on", "mqtt_intent", "ok" if success else "error", reason or "", intent_id=intent_id)
self.health.update_power_action("on", "mqtt_intent", intent_id)
self.last_applied_intent_id = intent_id
self.mqtt_mode_safe_off_armed = False
return
if desired_state == 'off':
logging.info("Applying MQTT power intent OFF id=%s reason=%s", intent_id, reason)
self.cec.turn_off(delayed=True)
self._write_power_state("off", "mqtt_intent", "ok", reason or "", intent_id=intent_id)
self.health.update_power_action("off", "mqtt_intent", intent_id)
self.last_applied_intent_id = intent_id
self.mqtt_mode_safe_off_armed = True
return
def is_event_active(self, event: Dict) -> bool: def is_event_active(self, event: Dict) -> bool:
"""Check if event should be displayed based on start/end times """Check if event should be displayed based on start/end times
@@ -922,6 +1114,7 @@ class DisplayManager:
# Turn off TV when display stops (with configurable delay) # Turn off TV when display stops (with configurable delay)
if turn_off_tv: if turn_off_tv:
self.cec.turn_off(delayed=True) self.cec.turn_off(delayed=True)
self.health.update_power_action("off", "local_fallback")
def start_presentation(self, event: Dict) -> Optional[DisplayProcess]: def start_presentation(self, event: Dict) -> Optional[DisplayProcess]:
"""Start presentation display (PDF/PowerPoint/LibreOffice) using Impressive """Start presentation display (PDF/PowerPoint/LibreOffice) using Impressive
@@ -1658,6 +1851,9 @@ class DisplayManager:
def process_events(self): def process_events(self):
"""Main processing loop - check for event changes and manage display""" """Main processing loop - check for event changes and manage display"""
power_intent = self._get_power_intent_state()
self._apply_mqtt_power_intent(power_intent)
local_power_control = self._should_use_local_power_control(power_intent)
event_data = self.read_event_file() event_data = self.read_event_file()
@@ -1665,7 +1861,7 @@ class DisplayManager:
if not event_data: if not event_data:
if self.current_process: if self.current_process:
logging.info("No active event - stopping current display") logging.info("No active event - stopping current display")
self.stop_current_display() self.stop_current_display(turn_off_tv=local_power_control)
return return
# Handle event arrays (take first event) # Handle event arrays (take first event)
@@ -1674,7 +1870,7 @@ class DisplayManager:
if not events_to_process: if not events_to_process:
if self.current_process: if self.current_process:
logging.info("Empty event list - stopping current display") logging.info("Empty event list - stopping current display")
self.stop_current_display() self.stop_current_display(turn_off_tv=local_power_control)
return return
# Process first active event # Process first active event
@@ -1687,7 +1883,7 @@ class DisplayManager:
if not active_event: if not active_event:
if self.current_process: if self.current_process:
logging.info("No active events in time window - stopping current display") logging.info("No active events in time window - stopping current display")
self.stop_current_display() self.stop_current_display(turn_off_tv=local_power_control)
return return
# Get event identifier # Get event identifier
@@ -1755,7 +1951,8 @@ class DisplayManager:
else: else:
# Everything is fine, continue # Everything is fine, continue
# Cancel any pending TV turn-off since event is still active # Cancel any pending TV turn-off since event is still active
self.cec.cancel_turn_off() if local_power_control:
self.cec.cancel_turn_off()
self._apply_runtime_video_settings(active_event) self._apply_runtime_video_settings(active_event)
return return
else: else:
@@ -1773,7 +1970,9 @@ class DisplayManager:
logging.info(f" Event end time (UTC): {active_event['end']}") logging.info(f" Event end time (UTC): {active_event['end']}")
# Turn on TV before starting display # Turn on TV before starting display
self.cec.turn_on() if local_power_control:
self.cec.turn_on()
self.health.update_power_action("on", "local_fallback")
new_process = self.start_display_for_event(active_event) new_process = self.start_display_for_event(active_event)

View File

@@ -18,7 +18,11 @@ log_warn() { echo -e "${YELLOW}⚠️ $1${NC}"; }
log_err() { echo -e "${RED}$1${NC}"; } log_err() { echo -e "${RED}$1${NC}"; }
# Configuration # Configuration
PROJECT_DIR="$HOME/infoscreen-dev" # Resolve the actual unprivileged user even when the script is invoked via sudo.
ACTUAL_USER="${SUDO_USER:-$USER}"
ACTUAL_HOME="$(eval echo "~$ACTUAL_USER")"
PROJECT_DIR="$ACTUAL_HOME/infoscreen-dev"
REPO_URL="https://github.com/RobbStarkAustria/infoscreen-client-dev-2025.git" # Public HTTPS clone REPO_URL="https://github.com/RobbStarkAustria/infoscreen-client-dev-2025.git" # Public HTTPS clone
VENV_DIR="$PROJECT_DIR/venv" VENV_DIR="$PROJECT_DIR/venv"
REQ_FILE="$PROJECT_DIR/src/requirements.txt" REQ_FILE="$PROJECT_DIR/src/requirements.txt"
@@ -125,13 +129,57 @@ EOF
chmod +x "$PROJECT_DIR/scripts/start-dev.sh" chmod +x "$PROJECT_DIR/scripts/start-dev.sh"
log_ok "start-dev.sh created" log_ok "start-dev.sh created"
# 13. SSH enable (for remote dev) # 13. Install command helper + sudoers rule for reboot/shutdown command execution
log_step "Installing command helper and sudoers policy..."
HELPER_SRC="$PROJECT_DIR/scripts/infoscreen-cmd-helper.sh"
HELPER_DST="/usr/local/bin/infoscreen-cmd-helper.sh"
SUDOERS_FILE="/etc/sudoers.d/infoscreen-command-helper"
if [ -f "$HELPER_SRC" ]; then
sudo install -m 0755 "$HELPER_SRC" "$HELPER_DST"
echo "$USER ALL=(ALL) NOPASSWD: $HELPER_DST" | sudo tee "$SUDOERS_FILE" >/dev/null
sudo chmod 0440 "$SUDOERS_FILE"
sudo visudo -cf "$SUDOERS_FILE" >/dev/null
log_ok "Command helper installed at $HELPER_DST and sudoers rule validated"
else
log_warn "Command helper source not found: $HELPER_SRC"
fi
# 14. Systemd service units (simclient + display manager)
log_step "Installing systemd service units..."
SERVICES_SRC="$PROJECT_DIR/scripts"
SYSTEMD_DIR="/etc/systemd/system"
for unit in infoscreen-simclient.service infoscreen-display.service "infoscreen-notify-failure@.service"; do
if [ -f "$SERVICES_SRC/$unit" ]; then
# Substitute the dev-machine username/home with the actual target user.
sed -e "s|olafn|$ACTUAL_USER|g" \
-e "s|/home/$ACTUAL_USER|$ACTUAL_HOME|g" \
"$SERVICES_SRC/$unit" | sudo tee "$SYSTEMD_DIR/$unit" > /dev/null
log_ok "Installed $SYSTEMD_DIR/$unit (user=$ACTUAL_USER)"
else
log_warn "Service unit not found: $SERVICES_SRC/$unit"
fi
done
sudo systemctl daemon-reload
for unit in infoscreen-simclient.service infoscreen-display.service; do
if [ -f "$SYSTEMD_DIR/$unit" ]; then
sudo systemctl enable "$unit" >/dev/null 2>&1 || true
log_ok "Enabled: $unit"
fi
done
log_ok "Systemd units installed. Start with: sudo systemctl start infoscreen-simclient infoscreen-display"
# 15. SSH enable (for remote dev)
log_step "Ensuring SSH service enabled..." log_step "Ensuring SSH service enabled..."
sudo systemctl enable ssh >/dev/null 2>&1 || true sudo systemctl enable ssh >/dev/null 2>&1 || true
sudo systemctl start ssh >/dev/null 2>&1 || true sudo systemctl start ssh >/dev/null 2>&1 || true
log_ok "SSH service active" log_ok "SSH service active"
# 14. Summary # 16. Summary
echo "" echo ""
echo -e "${GREEN}🎉 Setup complete!${NC}" echo -e "${GREEN}🎉 Setup complete!${NC}"
echo "Project: $PROJECT_DIR" echo "Project: $PROJECT_DIR"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,287 @@
"""
Unit tests for reboot/shutdown command intake primitives.
Run from project root (venv activated):
python -m pytest tests/test_command_intake.py -v
"""
import os
import sys
import json
import tempfile
import unittest
from datetime import datetime, timezone, timedelta
from unittest.mock import patch
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "src"))
from simclient import ( # noqa: E402
NIL_COMMAND_ID,
command_requires_recovery_completion,
command_mock_reboot_immediate_complete_enabled,
configure_mqtt_security,
mqtt,
validate_command_payload,
publish_command_ack,
_prune_processed_commands,
load_processed_commands,
persist_processed_commands,
)
class FakePublishResult:
def __init__(self, rc):
self.rc = rc
class FakeMqttClient:
def __init__(self, rc=0):
self.rc = rc
self.calls = []
def publish(self, topic, payload, qos=0, retain=False):
self.calls.append({
"topic": topic,
"payload": payload,
"qos": qos,
"retain": retain,
})
return FakePublishResult(self.rc)
class SequencedMqttClient:
def __init__(self, rc_sequence):
self._rc_sequence = list(rc_sequence)
self.calls = []
def publish(self, topic, payload, qos=0, retain=False):
rc = self._rc_sequence.pop(0) if self._rc_sequence else 0
self.calls.append({
"topic": topic,
"payload": payload,
"qos": qos,
"retain": retain,
"rc": rc,
})
return FakePublishResult(rc)
class FakeSecurityClient:
def __init__(self):
self.username = None
self.password = None
self.tls_kwargs = None
self.tls_insecure = None
def username_pw_set(self, username, password=None):
self.username = username
self.password = password
def tls_set(self, **kwargs):
self.tls_kwargs = kwargs
def tls_insecure_set(self, enabled):
self.tls_insecure = enabled
def _valid_payload(seconds_valid=240):
now = datetime.now(timezone.utc)
exp = now + timedelta(seconds=seconds_valid)
return {
"schema_version": "1.0",
"command_id": "5d1f8b4b-7e85-44fb-8f38-3f5d5da5e2e4",
"client_uuid": "9b8d1856-ff34-4864-a726-12de072d0f77",
"action": "reboot_host",
"issued_at": now.strftime("%Y-%m-%dT%H:%M:%SZ"),
"expires_at": exp.strftime("%Y-%m-%dT%H:%M:%SZ"),
"requested_by": 1,
"reason": "operator_request",
}
class TestValidateCommandPayload(unittest.TestCase):
def test_accepts_valid_payload(self):
payload = _valid_payload()
ok, normalized, code, msg = validate_command_payload(payload, payload["client_uuid"])
self.assertTrue(ok)
self.assertIsNone(code)
self.assertIsNone(msg)
self.assertEqual(normalized["action"], "reboot_host")
def test_rejects_extra_fields(self):
payload = _valid_payload()
payload["extra"] = "x"
ok, _, code, msg = validate_command_payload(payload, payload["client_uuid"])
self.assertFalse(ok)
self.assertEqual(code, "invalid_schema")
self.assertIn("unexpected fields", msg)
def test_rejects_stale_command(self):
payload = _valid_payload()
old_issued = datetime.now(timezone.utc) - timedelta(hours=3)
old_expires = datetime.now(timezone.utc) - timedelta(hours=2)
payload["issued_at"] = old_issued.strftime("%Y-%m-%dT%H:%M:%SZ")
payload["expires_at"] = old_expires.strftime("%Y-%m-%dT%H:%M:%SZ")
ok, _, code, _ = validate_command_payload(payload, payload["client_uuid"])
self.assertFalse(ok)
self.assertEqual(code, "stale_command")
def test_rejects_action_outside_enum(self):
payload = _valid_payload()
payload["action"] = "restart_service"
ok, _, code, msg = validate_command_payload(payload, payload["client_uuid"])
self.assertFalse(ok)
self.assertEqual(code, "invalid_schema")
self.assertIn("action must be one of", msg)
def test_rejects_client_uuid_mismatch(self):
payload = _valid_payload()
ok, _, code, msg = validate_command_payload(
payload,
"aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa",
)
self.assertFalse(ok)
self.assertEqual(code, "invalid_schema")
self.assertIn("client_uuid", msg)
class TestCommandLifecyclePolicy(unittest.TestCase):
def test_reboot_requires_recovery_completion(self):
self.assertTrue(command_requires_recovery_completion("reboot_host"))
self.assertFalse(command_requires_recovery_completion("shutdown_host"))
def test_mock_reboot_immediate_completion_enabled_for_mock_helper(self):
with patch("simclient.COMMAND_MOCK_REBOOT_IMMEDIATE_COMPLETE", True), \
patch("simclient.COMMAND_HELPER_PATH", "/home/pi/scripts/mock-command-helper.sh"):
self.assertTrue(command_mock_reboot_immediate_complete_enabled("reboot_host"))
def test_mock_reboot_immediate_completion_disabled_for_live_helper(self):
with patch("simclient.COMMAND_MOCK_REBOOT_IMMEDIATE_COMPLETE", True), \
patch("simclient.COMMAND_HELPER_PATH", "/usr/local/bin/infoscreen-cmd-helper.sh"):
self.assertFalse(command_mock_reboot_immediate_complete_enabled("reboot_host"))
class TestMqttSecurityConfiguration(unittest.TestCase):
def test_configure_username_password(self):
fake_client = FakeSecurityClient()
with patch("simclient.MQTT_USER", ""), \
patch("simclient.MQTT_PASSWORD_BROKER", ""), \
patch("simclient.MQTT_USERNAME", "client-user"), \
patch("simclient.MQTT_PASSWORD", "client-pass"), \
patch("simclient.MQTT_TLS_ENABLED", False):
configured = configure_mqtt_security(fake_client)
self.assertEqual(fake_client.username, "client-user")
self.assertEqual(fake_client.password, "client-pass")
self.assertFalse(configured["tls"])
def test_configure_tls(self):
fake_client = FakeSecurityClient()
with patch("simclient.MQTT_USER", ""), \
patch("simclient.MQTT_PASSWORD_BROKER", ""), \
patch("simclient.MQTT_USERNAME", ""), \
patch("simclient.MQTT_PASSWORD", ""), \
patch("simclient.MQTT_TLS_ENABLED", True), \
patch("simclient.MQTT_TLS_CA_CERT", "/tmp/ca.pem"), \
patch("simclient.MQTT_TLS_CERT", "/tmp/client.pem"), \
patch("simclient.MQTT_TLS_KEY", "/tmp/client.key"), \
patch("simclient.MQTT_TLS_INSECURE", True):
configured = configure_mqtt_security(fake_client)
self.assertTrue(configured["tls"])
self.assertEqual(fake_client.tls_kwargs["ca_certs"], "/tmp/ca.pem")
self.assertEqual(fake_client.tls_kwargs["certfile"], "/tmp/client.pem")
self.assertEqual(fake_client.tls_kwargs["keyfile"], "/tmp/client.key")
self.assertTrue(fake_client.tls_insecure)
class TestAckPublish(unittest.TestCase):
def test_failed_ack_forces_non_null_error_fields(self):
fake_client = FakeMqttClient(rc=0)
ok = publish_command_ack(
fake_client,
"9b8d1856-ff34-4864-a726-12de072d0f77",
NIL_COMMAND_ID,
"failed",
error_code=None,
error_message=None,
)
self.assertTrue(ok)
self.assertEqual(len(fake_client.calls), 2)
payload = json.loads(fake_client.calls[0]["payload"])
self.assertEqual(payload["status"], "failed")
self.assertTrue(isinstance(payload["error_code"], str) and payload["error_code"])
self.assertTrue(isinstance(payload["error_message"], str) and payload["error_message"])
def test_retry_on_broker_disconnect_then_success(self):
# First loop (2 topics): NO_CONN, NO_CONN. Second loop: success, success.
fake_client = SequencedMqttClient([
mqtt.MQTT_ERR_NO_CONN,
mqtt.MQTT_ERR_NO_CONN,
mqtt.MQTT_ERR_SUCCESS,
mqtt.MQTT_ERR_SUCCESS,
])
future_expiry = (datetime.now(timezone.utc) + timedelta(seconds=30)).strftime("%Y-%m-%dT%H:%M:%SZ")
with patch("simclient.time.sleep", return_value=None) as sleep_mock:
ok = publish_command_ack(
fake_client,
"9b8d1856-ff34-4864-a726-12de072d0f77",
"5d1f8b4b-7e85-44fb-8f38-3f5d5da5e2e4",
"accepted",
expires_at=future_expiry,
)
self.assertTrue(ok)
self.assertEqual(len(fake_client.calls), 4)
sleep_mock.assert_called_once()
def test_stop_retry_when_expired(self):
fake_client = SequencedMqttClient([
mqtt.MQTT_ERR_NO_CONN,
mqtt.MQTT_ERR_NO_CONN,
])
past_expiry = (datetime.now(timezone.utc) - timedelta(seconds=30)).strftime("%Y-%m-%dT%H:%M:%SZ")
with patch("simclient.time.sleep", return_value=None) as sleep_mock:
ok = publish_command_ack(
fake_client,
"9b8d1856-ff34-4864-a726-12de072d0f77",
"5d1f8b4b-7e85-44fb-8f38-3f5d5da5e2e4",
"accepted",
expires_at=past_expiry,
)
self.assertFalse(ok)
self.assertEqual(len(fake_client.calls), 2)
sleep_mock.assert_not_called()
class TestProcessedCommandsState(unittest.TestCase):
def test_prune_keeps_recent_only(self):
recent = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
old = (datetime.now(timezone.utc) - timedelta(hours=30)).strftime("%Y-%m-%dT%H:%M:%SZ")
commands = {
"a": {"processed_at": recent, "status": "completed"},
"b": {"processed_at": old, "status": "completed"},
}
pruned = _prune_processed_commands(commands)
self.assertIn("a", pruned)
self.assertNotIn("b", pruned)
def test_load_and_persist_round_trip(self):
with tempfile.TemporaryDirectory() as tmpdir:
state_file = os.path.join(tmpdir, "processed_commands.json")
with patch("simclient.PROCESSED_COMMANDS_FILE", state_file):
persist_processed_commands({
"x": {
"status": "completed",
"processed_at": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"),
}
})
loaded = load_processed_commands()
self.assertIn("x", loaded)
if __name__ == "__main__":
unittest.main()

313
tests/test_power_intent.py Normal file
View File

@@ -0,0 +1,313 @@
"""
Unit tests for the TV power intent validation and state management.
Run from project root (venv activated):
python -m pytest tests/test_power_intent.py -v
"""
import sys
import os
import json
import tempfile
import unittest
from datetime import datetime, timezone, timedelta
from unittest.mock import patch, MagicMock
# Ensure src/ is importable without running MQTT code
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "src"))
from simclient import (
validate_power_intent_payload,
write_power_intent_state,
_parse_utc_iso,
POWER_INTENT_STATE_FILE,
)
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
def _make_intent(
desired: str = "on",
seconds_valid: int = 90,
group_id: int = 2,
intent_id: str = "test-intent-001",
poll_interval: int = 15,
offset_issued: timedelta = timedelta(0),
) -> dict:
"""Build a valid v1 power-intent payload."""
now = datetime.now(timezone.utc) + offset_issued
exp = now + timedelta(seconds=seconds_valid)
return {
"schema_version": "1.0",
"intent_id": intent_id,
"group_id": group_id,
"desired_state": desired,
"reason": "active_event" if desired == "on" else "no_active_event",
"issued_at": now.strftime("%Y-%m-%dT%H:%M:%SZ"),
"expires_at": exp.strftime("%Y-%m-%dT%H:%M:%SZ"),
"poll_interval_sec": poll_interval,
"active_event_ids": [1] if desired == "on" else [],
"event_window_start": now.strftime("%Y-%m-%dT%H:%M:%SZ") if desired == "on" else None,
"event_window_end": exp.strftime("%Y-%m-%dT%H:%M:%SZ") if desired == "on" else None,
}
def _make_stale_intent(desired: str = "on") -> dict:
"""Build an expired v1 payload (issued_at and expires_at both in the past)."""
return {
"schema_version": "1.0",
"intent_id": "stale-001",
"group_id": 2,
"desired_state": desired,
"reason": "no_active_event",
"issued_at": "2026-01-01T00:00:00Z",
"expires_at": "2026-01-01T01:30:00Z",
"poll_interval_sec": 15,
"active_event_ids": [],
"event_window_start": None,
"event_window_end": None,
}
# ---------------------------------------------------------------------------
# Tests: _parse_utc_iso
# ---------------------------------------------------------------------------
class TestParseUtcIso(unittest.TestCase):
def test_z_suffix(self):
dt = _parse_utc_iso("2026-01-15T10:30:00Z")
self.assertEqual(dt.tzinfo, timezone.utc)
self.assertEqual(dt.year, 2026)
self.assertEqual(dt.second, 0)
def test_plus00_suffix(self):
dt = _parse_utc_iso("2026-01-15T10:30:00+00:00")
self.assertEqual(dt.tzinfo, timezone.utc)
def test_none_raises(self):
with self.assertRaises(Exception):
_parse_utc_iso(None)
def test_garbage_raises(self):
with self.assertRaises(Exception):
_parse_utc_iso("not-a-date")
# ---------------------------------------------------------------------------
# Tests: validate_power_intent_payload — accepted paths
# ---------------------------------------------------------------------------
class TestValidateAccepted(unittest.TestCase):
def test_valid_on_no_group_check(self):
intent = _make_intent("on")
ok, norm, err = validate_power_intent_payload(intent)
self.assertTrue(ok, err)
self.assertIsNotNone(norm)
self.assertEqual(norm["desired_state"], "on")
self.assertEqual(norm["group_id"], 2)
def test_valid_off_no_group_check(self):
intent = _make_intent("off")
ok, norm, err = validate_power_intent_payload(intent)
self.assertTrue(ok, err)
self.assertEqual(norm["desired_state"], "off")
def test_valid_on_with_matching_group_id_str(self):
intent = _make_intent("on", group_id=5)
ok, norm, err = validate_power_intent_payload(intent, expected_group_id="5")
self.assertTrue(ok, err)
def test_valid_on_with_matching_group_id_int(self):
intent = _make_intent("on", group_id=7)
ok, norm, err = validate_power_intent_payload(intent, expected_group_id=7)
self.assertTrue(ok, err)
def test_normalized_output_contains_required_keys(self):
intent = _make_intent("on")
ok, norm, _ = validate_power_intent_payload(intent)
self.assertTrue(ok)
for key in ("intent_id", "desired_state", "issued_at", "expires_at",
"group_id", "poll_interval_sec", "active_event_ids",
"event_window_start", "event_window_end"):
self.assertIn(key, norm, f"missing key: {key}")
# ---------------------------------------------------------------------------
# Tests: validate_power_intent_payload — rejected paths
# ---------------------------------------------------------------------------
class TestValidateRejected(unittest.TestCase):
def test_missing_required_fields(self):
ok, _, err = validate_power_intent_payload({"schema_version": "1.0"})
self.assertFalse(ok)
self.assertIn("missing required field", err)
def test_wrong_schema_version(self):
intent = _make_intent("on")
intent["schema_version"] = "2.0"
ok, _, err = validate_power_intent_payload(intent)
self.assertFalse(ok)
self.assertIn("schema_version", err)
def test_invalid_desired_state(self):
intent = _make_intent("on")
intent["desired_state"] = "standby"
ok, _, err = validate_power_intent_payload(intent)
self.assertFalse(ok)
self.assertIn("desired_state", err)
def test_group_id_mismatch_string(self):
intent = _make_intent("on", group_id=2)
ok, _, err = validate_power_intent_payload(intent, expected_group_id="99")
self.assertFalse(ok)
self.assertIn("group_id mismatch", err)
def test_group_id_mismatch_int(self):
intent = _make_intent("on", group_id=2)
ok, _, err = validate_power_intent_payload(intent, expected_group_id=99)
self.assertFalse(ok)
self.assertIn("group_id mismatch", err)
def test_expired_intent(self):
ok, _, err = validate_power_intent_payload(_make_stale_intent("on"))
self.assertFalse(ok)
self.assertIn("expired", err)
def test_expires_before_issued(self):
intent = _make_intent("on")
# Swap the timestamps so expires < issued
intent["expires_at"] = intent["issued_at"]
ok, _, err = validate_power_intent_payload(intent)
self.assertFalse(ok)
def test_zero_poll_interval(self):
intent = _make_intent("on", poll_interval=0)
ok, _, err = validate_power_intent_payload(intent)
self.assertFalse(ok)
self.assertIn("poll_interval_sec", err)
def test_negative_poll_interval(self):
intent = _make_intent("on", poll_interval=-5)
ok, _, err = validate_power_intent_payload(intent)
self.assertFalse(ok)
def test_active_event_ids_not_list(self):
intent = _make_intent("on")
intent["active_event_ids"] = "not-a-list"
ok, _, err = validate_power_intent_payload(intent)
self.assertFalse(ok)
self.assertIn("active_event_ids", err)
def test_missing_intent_id(self):
intent = _make_intent("on")
del intent["intent_id"]
ok, _, err = validate_power_intent_payload(intent)
self.assertFalse(ok)
self.assertIn("missing required field", err)
def test_invalid_issued_at_format(self):
intent = _make_intent("on")
intent["issued_at"] = "not-a-timestamp"
ok, _, err = validate_power_intent_payload(intent)
self.assertFalse(ok)
self.assertIn("timestamp", err)
# ---------------------------------------------------------------------------
# Tests: write_power_intent_state atomic write
# ---------------------------------------------------------------------------
class TestWritePowerIntentState(unittest.TestCase):
def test_writes_valid_json(self):
data = {"intent_id": "abc", "desired_state": "on", "group_id": 2}
with tempfile.NamedTemporaryFile(suffix=".json", delete=False) as f:
tmp_path = f.name
try:
with patch("simclient.POWER_INTENT_STATE_FILE", tmp_path):
write_power_intent_state(data)
with open(tmp_path) as f:
loaded = json.load(f)
self.assertEqual(loaded["intent_id"], "abc")
self.assertEqual(loaded["desired_state"], "on")
finally:
os.unlink(tmp_path)
def test_atomic_write_replaces_existing(self):
existing = {"intent_id": "old"}
new_data = {"intent_id": "new", "desired_state": "off"}
with tempfile.NamedTemporaryFile(
suffix=".json", delete=False, mode="w"
) as f:
json.dump(existing, f)
tmp_path = f.name
try:
with patch("simclient.POWER_INTENT_STATE_FILE", tmp_path):
write_power_intent_state(new_data)
with open(tmp_path) as f:
loaded = json.load(f)
self.assertEqual(loaded["intent_id"], "new")
finally:
os.unlink(tmp_path)
# ---------------------------------------------------------------------------
# Tests: display_manager.ProcessHealthState power fields
# ---------------------------------------------------------------------------
class TestProcessHealthStatePowerFields(unittest.TestCase):
def setUp(self):
# Import here to avoid triggering display_manager side effects at module level
from display_manager import ProcessHealthState
self.ProcessHealthState = ProcessHealthState
def test_initial_power_fields_are_none(self):
h = self.ProcessHealthState()
self.assertIsNone(h.power_control_mode)
self.assertIsNone(h.power_source)
self.assertIsNone(h.last_intent_id)
self.assertIsNone(h.last_power_action)
self.assertIsNone(h.last_power_at)
def test_to_dict_contains_power_control(self):
h = self.ProcessHealthState()
d = h.to_dict()
self.assertIn("power_control", d)
pc = d["power_control"]
self.assertIn("mode", pc)
self.assertIn("source", pc)
self.assertIn("last_intent_id", pc)
self.assertIn("last_action", pc)
self.assertIn("last_power_at", pc)
def test_update_power_action_sets_fields(self):
h = self.ProcessHealthState()
h.power_control_mode = "hybrid"
h.update_power_action("on", "mqtt_intent", "intent-xyz")
self.assertEqual(h.last_power_action, "on")
self.assertEqual(h.power_source, "mqtt_intent")
self.assertEqual(h.last_intent_id, "intent-xyz")
self.assertIsNotNone(h.last_power_at)
def test_update_power_action_without_intent_id(self):
h = self.ProcessHealthState()
h.update_power_action("off", "local_fallback")
self.assertEqual(h.last_power_action, "off")
self.assertEqual(h.power_source, "local_fallback")
self.assertIsNone(h.last_intent_id)
def test_to_dict_reflects_update(self):
h = self.ProcessHealthState()
h.power_control_mode = "mqtt"
h.update_power_action("off", "mqtt_intent", "intent-abc")
d = h.to_dict()
pc = d["power_control"]
self.assertEqual(pc["mode"], "mqtt")
self.assertEqual(pc["source"], "mqtt_intent")
self.assertEqual(pc["last_intent_id"], "intent-abc")
self.assertEqual(pc["last_action"], "off")
if __name__ == "__main__":
unittest.main()