fix(screenshots): harden event-triggered MQTT screenshot flow and cleanup docs
- fix race where periodic captures could overwrite pending event_start and event_stop metadata before simclient published - keep latest.jpg and meta.json synchronized so triggered screenshots are not lost - add stale pending trigger self-healing to recover from old or invalid metadata states - improve non-interactive capture reliability with DISPLAY and XAUTHORITY fallbacks - allow periodic idle captures in development mode so dashboard previews stay fresh without active events - add deeper simclient screenshot diagnostics for trigger and metadata handling - add regression test script for metadata preservation and trigger delivery - add root-cause and fix documentation for the screenshot MQTT issue - align and deduplicate README screenshot and troubleshooting sections; update release notes to March 2026 - fix scripts/start-dev.sh .env loading to ignore comments safely and remove export invalid identifier warnings
This commit is contained in:
@@ -144,6 +144,9 @@ logging.info(f"Monitoring logger initialized: {MONITORING_LOG_PATH}")
|
||||
# Health state file (written by display_manager, read by simclient)
|
||||
HEALTH_STATE_FILE = os.path.join(os.path.dirname(__file__), "current_process_health.json")
|
||||
CLIENT_SETTINGS_FILE = os.path.join(os.path.dirname(__file__), "config", "client_settings.json")
|
||||
# Screenshot IPC (written by display_manager, polled by simclient)
|
||||
SCREENSHOT_DIR = os.path.join(os.path.dirname(__file__), "screenshots")
|
||||
SCREENSHOT_META_FILE = os.path.join(SCREENSHOT_DIR, "meta.json")
|
||||
|
||||
|
||||
discovered = False
|
||||
@@ -635,19 +638,56 @@ def publish_log_message(client, client_id, level: str, message: str, context: di
|
||||
logging.debug(f"Error publishing log: {e}")
|
||||
|
||||
|
||||
def send_screenshot_heartbeat(client, client_id):
|
||||
def _read_and_clear_meta():
|
||||
"""Read screenshots/meta.json and atomically clear the send_immediately flag.
|
||||
|
||||
Returns the parsed dict (with the *original* send_immediately value) if the
|
||||
file exists and is valid JSON, else None. The flag is cleared on disk before
|
||||
returning so a crash between read and publish does not re-send on the next tick.
|
||||
"""
|
||||
try:
|
||||
if not os.path.exists(SCREENSHOT_META_FILE):
|
||||
return None
|
||||
with open(SCREENSHOT_META_FILE, 'r', encoding='utf-8') as f:
|
||||
meta = json.load(f)
|
||||
if meta.get('send_immediately'):
|
||||
# Write cleared copy atomically so the flag is gone before we return
|
||||
cleared = dict(meta)
|
||||
cleared['send_immediately'] = False
|
||||
tmp_path = SCREENSHOT_META_FILE + '.tmp'
|
||||
with open(tmp_path, 'w', encoding='utf-8') as f:
|
||||
json.dump(cleared, f)
|
||||
os.replace(tmp_path, SCREENSHOT_META_FILE)
|
||||
return meta # original dict; send_immediately is True if it was set
|
||||
except Exception as e:
|
||||
logging.debug(f"Could not read screenshot meta: {e}")
|
||||
return None
|
||||
|
||||
|
||||
def send_screenshot_heartbeat(client, client_id, capture_type: str = "periodic"):
|
||||
"""Send heartbeat with screenshot to server for dashboard monitoring"""
|
||||
try:
|
||||
screenshot_info = get_latest_screenshot()
|
||||
|
||||
|
||||
# Also read health state and include in heartbeat
|
||||
health = read_health_state()
|
||||
|
||||
|
||||
# Compute screenshot age so the server can flag stale images
|
||||
screenshot_age_s = None
|
||||
if screenshot_info:
|
||||
try:
|
||||
ts = datetime.fromisoformat(screenshot_info["timestamp"])
|
||||
screenshot_age_s = round((datetime.now(timezone.utc) - ts).total_seconds(), 1)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
heartbeat_data = {
|
||||
"timestamp": datetime.now(timezone.utc).isoformat(),
|
||||
"client_id": client_id,
|
||||
"status": "alive",
|
||||
"screenshot_type": capture_type,
|
||||
"screenshot": screenshot_info,
|
||||
"screenshot_age_s": screenshot_age_s,
|
||||
"system_info": {
|
||||
"hostname": socket.gethostname(),
|
||||
"ip": get_ip(),
|
||||
@@ -685,18 +725,46 @@ def send_screenshot_heartbeat(client, client_id):
|
||||
|
||||
|
||||
def screenshot_service_thread(client, client_id):
|
||||
"""Background thread for screenshot monitoring and transmission"""
|
||||
logging.info(f"Screenshot service started with {SCREENSHOT_INTERVAL}s interval")
|
||||
|
||||
"""Background thread for screenshot monitoring and transmission.
|
||||
|
||||
Runs on a 1-second tick. A heartbeat is sent when either:
|
||||
- display_manager set send_immediately=True in screenshots/meta.json
|
||||
(event_start / event_stop triggered captures); fired within <=1 second, OR
|
||||
- the periodic SCREENSHOT_INTERVAL has elapsed since the last send.
|
||||
|
||||
The interval timer resets on every send, so a triggered send pushes out the
|
||||
next periodic heartbeat rather than causing a double-send shortly after.
|
||||
"""
|
||||
logging.info(f"Screenshot service started with {SCREENSHOT_INTERVAL}s periodic interval")
|
||||
last_sent = 0.0
|
||||
last_meta_type = None
|
||||
|
||||
while True:
|
||||
try:
|
||||
send_screenshot_heartbeat(client, client_id)
|
||||
time.sleep(SCREENSHOT_INTERVAL)
|
||||
time.sleep(1)
|
||||
now = time.time()
|
||||
meta = _read_and_clear_meta()
|
||||
triggered = bool(meta and meta.get('send_immediately'))
|
||||
interval_due = (now - last_sent) >= SCREENSHOT_INTERVAL
|
||||
|
||||
if meta:
|
||||
current_type = meta.get('type', 'unknown')
|
||||
if current_type != last_meta_type:
|
||||
logging.debug(f"Meta.json detected: type={current_type}, send_immediately={meta.get('send_immediately')}, file={meta.get('file')}")
|
||||
last_meta_type = current_type
|
||||
|
||||
if triggered or interval_due:
|
||||
capture_type = meta['type'] if (triggered and meta) else "periodic"
|
||||
if triggered:
|
||||
logging.info(f"Sending triggered screenshot: type={capture_type}")
|
||||
send_screenshot_heartbeat(client, client_id, capture_type)
|
||||
last_sent = now
|
||||
except Exception as e:
|
||||
logging.error(f"Screenshot service error: {e}")
|
||||
time.sleep(60) # Wait a minute before retrying
|
||||
|
||||
|
||||
|
||||
def main():
|
||||
global discovered
|
||||
print(f"[{datetime.now(timezone.utc).isoformat()}] simclient.py: program started")
|
||||
|
||||
Reference in New Issue
Block a user