fix(screenshots): harden event-triggered MQTT screenshot flow and cleanup docs
- fix race where periodic captures could overwrite pending event_start and event_stop metadata before simclient published - keep latest.jpg and meta.json synchronized so triggered screenshots are not lost - add stale pending trigger self-healing to recover from old or invalid metadata states - improve non-interactive capture reliability with DISPLAY and XAUTHORITY fallbacks - allow periodic idle captures in development mode so dashboard previews stay fresh without active events - add deeper simclient screenshot diagnostics for trigger and metadata handling - add regression test script for metadata preservation and trigger delivery - add root-cause and fix documentation for the screenshot MQTT issue - align and deduplicate README screenshot and troubleshooting sections; update release notes to March 2026 - fix scripts/start-dev.sh .env loading to ignore comments safely and remove export invalid identifier warnings
This commit is contained in:
@@ -39,6 +39,19 @@ for env_path in env_paths:
|
||||
load_dotenv(env_path)
|
||||
break
|
||||
|
||||
# Best-effort display env bootstrap for non-interactive starts (nohup/systemd/ssh).
|
||||
# If both Wayland and X11 vars are missing, default to X11 :0 which is the
|
||||
# common kiosk display on Raspberry Pi deployments.
|
||||
if not os.environ.get("WAYLAND_DISPLAY") and not os.environ.get("DISPLAY"):
|
||||
os.environ["DISPLAY"] = os.getenv("DISPLAY", ":0")
|
||||
|
||||
# X11 capture tools may also require XAUTHORITY when started outside a desktop
|
||||
# session shell; default to ~/.Xauthority when available.
|
||||
if os.environ.get("DISPLAY") and not os.environ.get("XAUTHORITY"):
|
||||
xauth_default = os.path.join(os.path.expanduser("~"), ".Xauthority")
|
||||
if os.path.exists(xauth_default):
|
||||
os.environ["XAUTHORITY"] = xauth_default
|
||||
|
||||
# Configuration
|
||||
ENV = os.getenv("ENV", "development")
|
||||
LOG_LEVEL = os.getenv("LOG_LEVEL", "DEBUG" if ENV == "development" else "INFO")
|
||||
@@ -48,6 +61,10 @@ SCREENSHOT_MAX_WIDTH = int(os.getenv("SCREENSHOT_MAX_WIDTH", "800")) # Width to
|
||||
SCREENSHOT_JPEG_QUALITY = int(os.getenv("SCREENSHOT_JPEG_QUALITY", "70")) # JPEG quality 1-95
|
||||
SCREENSHOT_MAX_FILES = int(os.getenv("SCREENSHOT_MAX_FILES", "20")) # Rotate old screenshots
|
||||
SCREENSHOT_ALWAYS = os.getenv("SCREENSHOT_ALWAYS", "0").lower() in ("1","true","yes")
|
||||
# Delay (seconds) before triggered screenshot fires after event start/stop
|
||||
SCREENSHOT_TRIGGER_DELAY_PRESENTATION = int(os.getenv("SCREENSHOT_TRIGGER_DELAY_PRESENTATION", "4"))
|
||||
SCREENSHOT_TRIGGER_DELAY_VIDEO = int(os.getenv("SCREENSHOT_TRIGGER_DELAY_VIDEO", "2"))
|
||||
SCREENSHOT_TRIGGER_DELAY_WEB = int(os.getenv("SCREENSHOT_TRIGGER_DELAY_WEB", "5"))
|
||||
CHECK_INTERVAL = int(os.getenv("DISPLAY_CHECK_INTERVAL", "5")) # seconds
|
||||
PRESENTATION_DIR = os.path.join(os.path.dirname(__file__), "presentation")
|
||||
EVENT_FILE = os.path.join(os.path.dirname(__file__), "current_event.json")
|
||||
@@ -590,6 +607,9 @@ class DisplayManager:
|
||||
self._screenshot_thread = threading.Thread(target=self._screenshot_loop, daemon=True)
|
||||
self._screenshot_thread.start()
|
||||
|
||||
# Pending one-shot timer for event-triggered screenshots (event_start / event_stop)
|
||||
self._pending_trigger_timer: Optional[threading.Timer] = None
|
||||
|
||||
self._load_client_settings(force=True)
|
||||
|
||||
def _normalize_volume_level(self, value, default: float = 1.0) -> float:
|
||||
@@ -880,7 +900,9 @@ class DisplayManager:
|
||||
self.health.update_stopped()
|
||||
self.current_process = None
|
||||
self.current_event_data = None
|
||||
|
||||
# Capture a screenshot ~1s after stop so the dashboard shows the cleared screen
|
||||
self._trigger_event_screenshot("event_stop", 1.0)
|
||||
|
||||
# Turn off TV when display stops (with configurable delay)
|
||||
if turn_off_tv:
|
||||
self.cec.turn_off(delayed=True)
|
||||
@@ -1431,13 +1453,17 @@ class DisplayManager:
|
||||
|
||||
def start_display_for_event(self, event: Dict) -> Optional[DisplayProcess]:
|
||||
"""Start appropriate display software for the given event"""
|
||||
process = None
|
||||
handled = False
|
||||
|
||||
# First, respect explicit event_type if provided by scheduler
|
||||
etype = event.get('event_type')
|
||||
if etype:
|
||||
etype = etype.lower()
|
||||
if etype == 'presentation':
|
||||
return self.start_presentation(event)
|
||||
if etype in ('webuntis', 'webpage', 'website'):
|
||||
process = self.start_presentation(event)
|
||||
handled = True
|
||||
elif etype in ('webuntis', 'webpage', 'website'):
|
||||
# webuntis and webpage both show a browser kiosk
|
||||
# Ensure the URL is taken from 'website.url' or 'web.url'
|
||||
# Normalize event to include a 'web' key so start_webpage can use it
|
||||
@@ -1448,18 +1474,25 @@ class DisplayManager:
|
||||
event['web']['url'] = event['website'].get('url')
|
||||
# Only enable autoscroll for explicit scheduler event_type 'website'
|
||||
autoscroll_flag = (etype == 'website')
|
||||
return self.start_webpage(event, autoscroll_enabled=autoscroll_flag)
|
||||
process = self.start_webpage(event, autoscroll_enabled=autoscroll_flag)
|
||||
handled = True
|
||||
|
||||
# Fallback to legacy keys
|
||||
if 'presentation' in event:
|
||||
return self.start_presentation(event)
|
||||
elif 'video' in event:
|
||||
return self.start_video(event)
|
||||
elif 'web' in event:
|
||||
return self.start_webpage(event)
|
||||
else:
|
||||
logging.error(f"Unknown event type/structure: {list(event.keys())}")
|
||||
return None
|
||||
if not handled:
|
||||
# Fallback to legacy keys
|
||||
if 'presentation' in event:
|
||||
process = self.start_presentation(event)
|
||||
elif 'video' in event:
|
||||
process = self.start_video(event)
|
||||
elif 'web' in event:
|
||||
process = self.start_webpage(event)
|
||||
else:
|
||||
logging.error(f"Unknown event type/structure: {list(event.keys())}")
|
||||
|
||||
if process is not None:
|
||||
delay = self._get_trigger_delay(event)
|
||||
self._trigger_event_screenshot("event_start", delay)
|
||||
|
||||
return process
|
||||
|
||||
def _command_exists(self, command: str) -> bool:
|
||||
"""Check if a command exists in PATH"""
|
||||
@@ -1718,6 +1751,130 @@ class DisplayManager:
|
||||
# -------------------------------------------------------------
|
||||
# Screenshot capture subsystem
|
||||
# -------------------------------------------------------------
|
||||
|
||||
def _write_screenshot_meta(self, capture_type: str, final_path: str, send_immediately: bool = False):
|
||||
"""Write screenshots/meta.json atomically so simclient can detect new captures.
|
||||
|
||||
IMPORTANT: Protect event-triggered metadata from being overwritten by periodic captures.
|
||||
If a periodic screenshot is captured while an event-triggered one is still pending
|
||||
transmission (send_immediately=True), skip writing meta.json to preserve the event's metadata.
|
||||
|
||||
Args:
|
||||
capture_type: 'periodic', 'event_start', or 'event_stop'
|
||||
final_path: absolute path of the just-written screenshot file
|
||||
send_immediately: True for triggered (event) captures, False for periodic ones
|
||||
"""
|
||||
try:
|
||||
def _pending_trigger_is_valid(meta: Dict) -> bool:
|
||||
"""Return True only for fresh, actionable pending trigger metadata.
|
||||
|
||||
This prevents a stale/corrupt pending flag from permanently blocking
|
||||
periodic updates (meta.json/latest.jpg) if simclient was down or test
|
||||
data left send_immediately=True behind.
|
||||
"""
|
||||
try:
|
||||
if not meta.get('send_immediately'):
|
||||
return False
|
||||
mtype = str(meta.get('type') or '')
|
||||
if mtype not in ('event_start', 'event_stop'):
|
||||
return False
|
||||
mfile = str(meta.get('file') or '').strip()
|
||||
if not mfile:
|
||||
return False
|
||||
file_path = os.path.join(self.screenshot_dir, mfile)
|
||||
if not os.path.exists(file_path):
|
||||
logging.warning(
|
||||
f"Ignoring stale pending screenshot meta: missing file '{mfile}'"
|
||||
)
|
||||
return False
|
||||
|
||||
captured_at_raw = meta.get('captured_at')
|
||||
if not captured_at_raw:
|
||||
return False
|
||||
captured_at = datetime.fromisoformat(str(captured_at_raw).replace('Z', '+00:00'))
|
||||
age_s = (datetime.now(timezone.utc) - captured_at.astimezone(timezone.utc)).total_seconds()
|
||||
|
||||
# Guard against malformed/future timestamps that could lock
|
||||
# the pipeline by appearing permanently "fresh".
|
||||
if age_s < -5:
|
||||
logging.warning(
|
||||
f"Ignoring invalid pending screenshot meta: future captured_at (age={age_s:.1f}s)"
|
||||
)
|
||||
return False
|
||||
|
||||
# Triggered screenshots should be consumed quickly (<= 1s). Use a
|
||||
# generous safety window to avoid false negatives under load.
|
||||
if age_s > 30:
|
||||
logging.warning(
|
||||
f"Ignoring stale pending screenshot meta: type={mtype}, age={age_s:.1f}s"
|
||||
)
|
||||
return False
|
||||
|
||||
return True
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
meta_path = os.path.join(self.screenshot_dir, 'meta.json')
|
||||
|
||||
# PROTECTION: Don't overwrite pending event-triggered metadata with periodic capture
|
||||
if not send_immediately and capture_type == "periodic":
|
||||
try:
|
||||
if os.path.exists(meta_path):
|
||||
with open(meta_path, 'r', encoding='utf-8') as f:
|
||||
existing_meta = json.load(f)
|
||||
# If there's a pending event-triggered capture, skip this periodic write
|
||||
if _pending_trigger_is_valid(existing_meta):
|
||||
logging.debug(f"Skipping periodic meta.json to preserve pending {existing_meta.get('type')} (send_immediately=True)")
|
||||
return
|
||||
except Exception:
|
||||
pass # If we can't read existing meta, proceed with writing new one
|
||||
|
||||
meta = {
|
||||
"captured_at": datetime.now(timezone.utc).isoformat(),
|
||||
"file": os.path.basename(final_path),
|
||||
"type": capture_type,
|
||||
"send_immediately": send_immediately,
|
||||
}
|
||||
|
||||
tmp_path = meta_path + '.tmp'
|
||||
with open(tmp_path, 'w', encoding='utf-8') as f:
|
||||
json.dump(meta, f)
|
||||
os.replace(tmp_path, meta_path)
|
||||
logging.debug(f"Screenshot meta written: type={capture_type}, send_immediately={send_immediately}")
|
||||
except Exception as e:
|
||||
logging.debug(f"Could not write screenshot meta: {e}")
|
||||
|
||||
def _get_trigger_delay(self, event: Dict) -> float:
|
||||
"""Return the post-launch capture delay in seconds appropriate for the event type."""
|
||||
etype = (event.get('event_type') or '').lower()
|
||||
if etype == 'presentation' or 'presentation' in event:
|
||||
return float(SCREENSHOT_TRIGGER_DELAY_PRESENTATION)
|
||||
if etype in ('webuntis', 'webpage', 'website') or 'web' in event:
|
||||
return float(SCREENSHOT_TRIGGER_DELAY_WEB)
|
||||
if 'video' in event:
|
||||
return float(SCREENSHOT_TRIGGER_DELAY_VIDEO)
|
||||
return float(SCREENSHOT_TRIGGER_DELAY_PRESENTATION) # safe default
|
||||
|
||||
def _trigger_event_screenshot(self, capture_type: str, delay: float):
|
||||
"""Arm a one-shot timer to capture a triggered screenshot after *delay* seconds.
|
||||
|
||||
Cancels any already-pending trigger so rapid event switches only produce
|
||||
one screenshot after the final transition settles, not one per intermediate state.
|
||||
"""
|
||||
if self._pending_trigger_timer is not None:
|
||||
self._pending_trigger_timer.cancel()
|
||||
self._pending_trigger_timer = None
|
||||
|
||||
def _do_capture():
|
||||
self._pending_trigger_timer = None
|
||||
self._capture_screenshot(capture_type)
|
||||
|
||||
t = threading.Timer(delay, _do_capture)
|
||||
t.daemon = True
|
||||
t.start()
|
||||
self._pending_trigger_timer = t
|
||||
logging.debug(f"Screenshot trigger armed: type={capture_type}, delay={delay}s")
|
||||
|
||||
def _screenshot_loop(self):
|
||||
"""Background loop that captures screenshots periodically while an event is active.
|
||||
|
||||
@@ -1731,7 +1888,12 @@ class DisplayManager:
|
||||
continue
|
||||
now = time.time()
|
||||
if now - last_capture >= SCREENSHOT_CAPTURE_INTERVAL:
|
||||
if SCREENSHOT_ALWAYS or (self.current_process and self.current_process.is_running()):
|
||||
process_active = bool(self.current_process and self.current_process.is_running())
|
||||
# In development we keep dashboard screenshots fresh even when idle,
|
||||
# otherwise dashboards can look "dead" with stale images.
|
||||
capture_idle_in_dev = (ENV == "development")
|
||||
|
||||
if SCREENSHOT_ALWAYS or process_active or capture_idle_in_dev:
|
||||
self._capture_screenshot()
|
||||
last_capture = now
|
||||
else:
|
||||
@@ -1743,7 +1905,7 @@ class DisplayManager:
|
||||
logging.debug(f"Screenshot loop error: {e}")
|
||||
time.sleep(5)
|
||||
|
||||
def _capture_screenshot(self):
|
||||
def _capture_screenshot(self, capture_type: str = "periodic"):
|
||||
"""Capture a screenshot of the current display and store it in the shared screenshots directory.
|
||||
|
||||
Strategy:
|
||||
@@ -1841,8 +2003,15 @@ class DisplayManager:
|
||||
logging.debug(f"xwd/convert pipeline failed: {e}")
|
||||
|
||||
if not captured:
|
||||
# Warn only occasionally
|
||||
logging.warning("No screenshot tool available for current session. For X11, install 'scrot' or ImageMagick. For Wayland, install 'grim' or 'gnome-screenshot'.")
|
||||
# Capture can fail in headless/TTY sessions even when tools exist.
|
||||
logging.warning(
|
||||
"Screenshot capture failed for current session "
|
||||
f"(DISPLAY={os.environ.get('DISPLAY')}, "
|
||||
f"WAYLAND_DISPLAY={os.environ.get('WAYLAND_DISPLAY')}, "
|
||||
f"XDG_SESSION_TYPE={os.environ.get('XDG_SESSION_TYPE')}). "
|
||||
"Ensure display-manager runs in a desktop session or exports DISPLAY/XAUTHORITY. "
|
||||
"For X11 install/use 'scrot' or ImageMagick; for Wayland use 'grim' or 'gnome-screenshot'."
|
||||
)
|
||||
return
|
||||
|
||||
# Open image and downscale/compress
|
||||
@@ -1868,13 +2037,29 @@ class DisplayManager:
|
||||
|
||||
# Maintain latest.jpg as an atomic copy so readers never see a missing
|
||||
# or broken pointer while a new screenshot is being published.
|
||||
latest_link = os.path.join(self.screenshot_dir, 'latest.jpg')
|
||||
try:
|
||||
latest_tmp = os.path.join(self.screenshot_dir, 'latest.jpg.tmp')
|
||||
shutil.copyfile(final_path, latest_tmp)
|
||||
os.replace(latest_tmp, latest_link)
|
||||
except Exception as e:
|
||||
logging.debug(f"Could not update latest.jpg: {e}")
|
||||
# PROTECTION: Don't update latest.jpg for periodic captures if event-triggered is pending
|
||||
should_update_latest = True
|
||||
if capture_type == "periodic":
|
||||
try:
|
||||
meta_path = os.path.join(self.screenshot_dir, 'meta.json')
|
||||
if os.path.exists(meta_path):
|
||||
with open(meta_path, 'r', encoding='utf-8') as f:
|
||||
existing_meta = json.load(f)
|
||||
# If there's a pending event-triggered capture, don't update latest.jpg
|
||||
if _pending_trigger_is_valid(existing_meta):
|
||||
should_update_latest = False
|
||||
logging.debug(f"Skipping latest.jpg update to preserve pending {existing_meta.get('type')} screenshot")
|
||||
except Exception:
|
||||
pass # If we can't read meta, proceed with updating latest.jpg
|
||||
|
||||
latest_link = os.path.join(self.screenshot_dir, 'latest.jpg')
|
||||
if should_update_latest:
|
||||
try:
|
||||
latest_tmp = os.path.join(self.screenshot_dir, 'latest.jpg.tmp')
|
||||
shutil.copyfile(final_path, latest_tmp)
|
||||
os.replace(latest_tmp, latest_link)
|
||||
except Exception as e:
|
||||
logging.debug(f"Could not update latest.jpg: {e}")
|
||||
|
||||
# Rotate old screenshots
|
||||
try:
|
||||
@@ -1894,7 +2079,8 @@ class DisplayManager:
|
||||
except Exception:
|
||||
pass
|
||||
logged_size = size if size is not None else 'unknown'
|
||||
logging.info(f"Screenshot captured: {os.path.basename(final_path)} ({logged_size} bytes)")
|
||||
self._write_screenshot_meta(capture_type, final_path, send_immediately=(capture_type != "periodic"))
|
||||
logging.info(f"Screenshot captured: {os.path.basename(final_path)} ({logged_size} bytes) type={capture_type}")
|
||||
except Exception as e:
|
||||
logging.debug(f"Screenshot capture failure: {e}")
|
||||
|
||||
|
||||
Reference in New Issue
Block a user