fix(screenshots): harden event-triggered MQTT screenshot flow and cleanup docs

- fix race where periodic captures could overwrite pending event_start and event_stop metadata before simclient published
- keep latest.jpg and meta.json synchronized so triggered screenshots are not lost
- add stale pending trigger self-healing to recover from old or invalid metadata states
- improve non-interactive capture reliability with DISPLAY and XAUTHORITY fallbacks
- allow periodic idle captures in development mode so dashboard previews stay fresh without active events
- add deeper simclient screenshot diagnostics for trigger and metadata handling
- add regression test script for metadata preservation and trigger delivery
- add root-cause and fix documentation for the screenshot MQTT issue
- align and deduplicate README screenshot and troubleshooting sections; update release notes to March 2026
- fix scripts/start-dev.sh .env loading to ignore comments safely and remove export invalid identifier warnings
This commit is contained in:
RobbStarkAustria
2026-03-29 10:38:29 +02:00
parent cda126018f
commit d6090a6179
7 changed files with 556 additions and 244 deletions

View File

@@ -144,6 +144,9 @@ logging.info(f"Monitoring logger initialized: {MONITORING_LOG_PATH}")
# Health state file (written by display_manager, read by simclient)
HEALTH_STATE_FILE = os.path.join(os.path.dirname(__file__), "current_process_health.json")
CLIENT_SETTINGS_FILE = os.path.join(os.path.dirname(__file__), "config", "client_settings.json")
# Screenshot IPC (written by display_manager, polled by simclient)
SCREENSHOT_DIR = os.path.join(os.path.dirname(__file__), "screenshots")
SCREENSHOT_META_FILE = os.path.join(SCREENSHOT_DIR, "meta.json")
discovered = False
@@ -635,19 +638,56 @@ def publish_log_message(client, client_id, level: str, message: str, context: di
logging.debug(f"Error publishing log: {e}")
def send_screenshot_heartbeat(client, client_id):
def _read_and_clear_meta():
"""Read screenshots/meta.json and atomically clear the send_immediately flag.
Returns the parsed dict (with the *original* send_immediately value) if the
file exists and is valid JSON, else None. The flag is cleared on disk before
returning so a crash between read and publish does not re-send on the next tick.
"""
try:
if not os.path.exists(SCREENSHOT_META_FILE):
return None
with open(SCREENSHOT_META_FILE, 'r', encoding='utf-8') as f:
meta = json.load(f)
if meta.get('send_immediately'):
# Write cleared copy atomically so the flag is gone before we return
cleared = dict(meta)
cleared['send_immediately'] = False
tmp_path = SCREENSHOT_META_FILE + '.tmp'
with open(tmp_path, 'w', encoding='utf-8') as f:
json.dump(cleared, f)
os.replace(tmp_path, SCREENSHOT_META_FILE)
return meta # original dict; send_immediately is True if it was set
except Exception as e:
logging.debug(f"Could not read screenshot meta: {e}")
return None
def send_screenshot_heartbeat(client, client_id, capture_type: str = "periodic"):
"""Send heartbeat with screenshot to server for dashboard monitoring"""
try:
screenshot_info = get_latest_screenshot()
# Also read health state and include in heartbeat
health = read_health_state()
# Compute screenshot age so the server can flag stale images
screenshot_age_s = None
if screenshot_info:
try:
ts = datetime.fromisoformat(screenshot_info["timestamp"])
screenshot_age_s = round((datetime.now(timezone.utc) - ts).total_seconds(), 1)
except Exception:
pass
heartbeat_data = {
"timestamp": datetime.now(timezone.utc).isoformat(),
"client_id": client_id,
"status": "alive",
"screenshot_type": capture_type,
"screenshot": screenshot_info,
"screenshot_age_s": screenshot_age_s,
"system_info": {
"hostname": socket.gethostname(),
"ip": get_ip(),
@@ -685,18 +725,46 @@ def send_screenshot_heartbeat(client, client_id):
def screenshot_service_thread(client, client_id):
"""Background thread for screenshot monitoring and transmission"""
logging.info(f"Screenshot service started with {SCREENSHOT_INTERVAL}s interval")
"""Background thread for screenshot monitoring and transmission.
Runs on a 1-second tick. A heartbeat is sent when either:
- display_manager set send_immediately=True in screenshots/meta.json
(event_start / event_stop triggered captures); fired within <=1 second, OR
- the periodic SCREENSHOT_INTERVAL has elapsed since the last send.
The interval timer resets on every send, so a triggered send pushes out the
next periodic heartbeat rather than causing a double-send shortly after.
"""
logging.info(f"Screenshot service started with {SCREENSHOT_INTERVAL}s periodic interval")
last_sent = 0.0
last_meta_type = None
while True:
try:
send_screenshot_heartbeat(client, client_id)
time.sleep(SCREENSHOT_INTERVAL)
time.sleep(1)
now = time.time()
meta = _read_and_clear_meta()
triggered = bool(meta and meta.get('send_immediately'))
interval_due = (now - last_sent) >= SCREENSHOT_INTERVAL
if meta:
current_type = meta.get('type', 'unknown')
if current_type != last_meta_type:
logging.debug(f"Meta.json detected: type={current_type}, send_immediately={meta.get('send_immediately')}, file={meta.get('file')}")
last_meta_type = current_type
if triggered or interval_due:
capture_type = meta['type'] if (triggered and meta) else "periodic"
if triggered:
logging.info(f"Sending triggered screenshot: type={capture_type}")
send_screenshot_heartbeat(client, client_id, capture_type)
last_sent = now
except Exception as e:
logging.error(f"Screenshot service error: {e}")
time.sleep(60) # Wait a minute before retrying
def main():
global discovered
print(f"[{datetime.now(timezone.utc).isoformat()}] simclient.py: program started")