feat: crash recovery, service_failed monitoring, broker health fields, command expiry sweep

- Add GET /api/clients/crashed endpoint (process_status=crashed or stale heartbeat)
- Add restart_app command action with same lifecycle + lockout as reboot_host
- Scheduler: crash auto-recovery loop (CRASH_RECOVERY_ENABLED flag, lockout, MQTT publish)
- Scheduler: unconditional command expiry sweep per poll cycle (sweep_expired_commands)
- Listener: subscribe to infoscreen/+/service_failed; persist service_failed_at + unit
- Listener: extract broker_connection block from health payload; persist reconnect_count + last_disconnect_at
- DB migration b1c2d3e4f5a6: service_failed_at, service_failed_unit, mqtt_reconnect_count, mqtt_last_disconnect_at on clients
- Add GET /api/clients/service_failed and POST /api/clients/<uuid>/clear_service_failed
- Monitoring overview API: include mqtt_reconnect_count + mqtt_last_disconnect_at per client
- Frontend: orange service-failed alert panel (hidden when empty, auto-refresh, quittieren action)
- Frontend: MQTT reconnect count + last disconnect in client detail panel
- MQTT auth hardening: listener/scheduler/server use env credentials; broker enforces allow_anonymous false
- Client command lifecycle foundation: ClientCommand model, reboot_host/shutdown_host, full ACK lifecycle
- Docs: TECH-CHANGELOG, DEV-CHANGELOG, MQTT_EVENT_PAYLOAD_GUIDE, copilot-instructions updated
- Add implementation-plans/, RESTART_VALIDATION_CHECKLIST.md, TODO.md
This commit is contained in:
2026-04-05 10:17:56 +00:00
parent 4d652f0554
commit 03e3c11e90
35 changed files with 2511 additions and 80 deletions

View File

@@ -2,9 +2,9 @@
<html lang="en">
<head>
<meta charset="UTF-8" />
<link rel="icon" type="image/svg+xml" href="/vite.svg" />
<link rel="icon" type="image/png" href="/favicon.png" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Vite + React + TS</title>
<title>Infoscreen</title>
</head>
<body>
<div id="root"></div>

Binary file not shown.

After

Width:  |  Height:  |  Size: 225 KiB

View File

@@ -25,10 +25,6 @@
{ "name": "Alembic", "license": "MIT" }
]
},
"buildInfo": {
"buildDate": "2025-12-29T12:00:00Z",
"commitId": "9f2ae8b44c3a"
},
"changelog": [
{
"version": "2026.1.0-alpha.16",

View File

@@ -39,6 +39,8 @@ export interface MonitoringClient {
};
latestLog?: MonitoringLogEntry | null;
latestError?: MonitoringLogEntry | null;
mqttReconnectCount?: number | null;
mqttLastDisconnectAt?: string | null;
}
export interface MonitoringOverview {

View File

@@ -24,6 +24,62 @@ export interface Group {
is_active?: boolean;
clients: Client[];
}
export interface CrashedClient {
uuid: string;
description?: string | null;
hostname?: string | null;
ip?: string | null;
group_id?: number | null;
is_alive: boolean;
process_status?: string | null;
screen_health_status?: string | null;
last_alive?: string | null;
crash_reason: 'process_crashed' | 'heartbeat_stale';
}
export interface CrashedClientsResponse {
crashed_count: number;
grace_period_seconds: number;
clients: CrashedClient[];
}
export interface ServiceFailedClient {
uuid: string;
description?: string | null;
hostname?: string | null;
ip?: string | null;
group_id?: number | null;
is_alive: boolean;
last_alive?: string | null;
service_failed_at: string;
service_failed_unit?: string | null;
}
export interface ServiceFailedClientsResponse {
service_failed_count: number;
clients: ServiceFailedClient[];
}
export interface ClientCommand {
commandId: string;
clientUuid: string;
action: 'reboot_host' | 'shutdown_host' | 'restart_app';
status: string;
reason?: string | null;
requestedBy?: number | null;
issuedAt?: string | null;
expiresAt?: string | null;
publishedAt?: string | null;
ackedAt?: string | null;
executionStartedAt?: string | null;
completedAt?: string | null;
failedAt?: string | null;
errorCode?: string | null;
errorMessage?: string | null;
createdAt?: string | null;
updatedAt?: string | null;
}
// Liefert alle Gruppen mit zugehörigen Clients
export async function fetchGroupsWithClients(): Promise<Group[]> {
const response = await fetch('/api/groups/with_clients');
@@ -79,9 +135,11 @@ export async function updateClient(uuid: string, data: { description?: string; m
return await res.json();
}
export async function restartClient(uuid: string): Promise<{ success: boolean; message?: string }> {
export async function restartClient(uuid: string, reason?: string): Promise<{ success: boolean; message?: string; command?: ClientCommand }> {
const response = await fetch(`/api/clients/${uuid}/restart`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ reason: reason || null }),
});
if (!response.ok) {
const error = await response.json();
@@ -90,6 +148,58 @@ export async function restartClient(uuid: string): Promise<{ success: boolean; m
return await response.json();
}
export async function shutdownClient(uuid: string, reason?: string): Promise<{ success: boolean; message?: string; command?: ClientCommand }> {
const response = await fetch(`/api/clients/${uuid}/shutdown`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ reason: reason || null }),
});
if (!response.ok) {
const error = await response.json();
throw new Error(error.error || 'Fehler beim Herunterfahren des Clients');
}
return await response.json();
}
export async function fetchClientCommandStatus(commandId: string): Promise<ClientCommand> {
const response = await fetch(`/api/clients/commands/${commandId}`);
if (!response.ok) {
const error = await response.json();
throw new Error(error.error || 'Fehler beim Laden des Command-Status');
}
return await response.json();
}
export async function fetchCrashedClients(): Promise<CrashedClientsResponse> {
const response = await fetch('/api/clients/crashed', { credentials: 'include' });
if (!response.ok) {
const err = await response.json().catch(() => ({}));
throw new Error(err.error || 'Fehler beim Laden der abgestürzten Clients');
}
return await response.json();
}
export async function fetchServiceFailedClients(): Promise<ServiceFailedClientsResponse> {
const response = await fetch('/api/clients/service_failed', { credentials: 'include' });
if (!response.ok) {
const err = await response.json().catch(() => ({}));
throw new Error(err.error || 'Fehler beim Laden der service_failed Clients');
}
return await response.json();
}
export async function clearServiceFailed(uuid: string): Promise<{ success: boolean; message?: string }> {
const response = await fetch(`/api/clients/${uuid}/clear_service_failed`, {
method: 'POST',
credentials: 'include',
});
if (!response.ok) {
const err = await response.json().catch(() => ({}));
throw new Error(err.error || 'Fehler beim Quittieren des service_failed Flags');
}
return await response.json();
}
export async function deleteClient(uuid: string) {
const res = await fetch(`/api/clients/${uuid}`, {
method: 'DELETE',

View File

@@ -370,4 +370,49 @@
.monitoring-log-dialog-actions {
padding: 0 0.2rem 0.4rem;
}
}
/* Crash recovery panel */
.monitoring-crash-panel {
border-left: 4px solid #dc2626;
margin-bottom: 1.5rem;
}
.monitoring-service-failed-panel {
border-left: 4px solid #ea580c;
margin-bottom: 1.5rem;
}
.monitoring-crash-table {
width: 100%;
border-collapse: collapse;
font-size: 0.875rem;
}
.monitoring-crash-table th {
text-align: left;
padding: 0.5rem 0.75rem;
font-weight: 600;
color: #64748b;
border-bottom: 1px solid #e2e8f0;
background: #f8fafc;
}
.monitoring-crash-table td {
padding: 0.55rem 0.75rem;
border-bottom: 1px solid #f1f5f9;
vertical-align: middle;
}
.monitoring-crash-table tr:last-child td {
border-bottom: none;
}
.monitoring-crash-table tr:hover td {
background: #fef2f2;
}
.monitoring-meta-hint {
color: #94a3b8;
font-size: 0.8rem;
}

View File

@@ -7,6 +7,16 @@ import {
type MonitoringLogEntry,
type MonitoringOverview,
} from './apiClientMonitoring';
import {
fetchCrashedClients,
fetchServiceFailedClients,
clearServiceFailed,
restartClient,
type CrashedClient,
type CrashedClientsResponse,
type ServiceFailedClient,
type ServiceFailedClientsResponse,
} from './apiClients';
import { useAuth } from './useAuth';
import { ButtonComponent } from '@syncfusion/ej2-react-buttons';
import { DropDownListComponent } from '@syncfusion/ej2-react-dropdowns';
@@ -156,6 +166,12 @@ const MonitoringDashboard: React.FC = () => {
const [screenshotErrored, setScreenshotErrored] = React.useState<boolean>(false);
const selectedClientUuidRef = React.useRef<string | null>(null);
const [selectedLogEntry, setSelectedLogEntry] = React.useState<MonitoringLogEntry | null>(null);
const [crashedClients, setCrashedClients] = React.useState<CrashedClientsResponse | null>(null);
const [restartStates, setRestartStates] = React.useState<Record<string, 'idle' | 'loading' | 'success' | 'failed'>>({});
const [restartErrors, setRestartErrors] = React.useState<Record<string, string>>({});
const [serviceFailedClients, setServiceFailedClients] = React.useState<ServiceFailedClientsResponse | null>(null);
const [clearStates, setClearStates] = React.useState<Record<string, 'idle' | 'loading' | 'success' | 'failed'>>({});
const [clearErrors, setClearErrors] = React.useState<Record<string, string>>({});
const selectedClient = React.useMemo<MonitoringClient | null>(() => {
if (!overview || !selectedClientUuid) return null;
@@ -197,9 +213,37 @@ const MonitoringDashboard: React.FC = () => {
}
}, []);
const loadCrashedClients = React.useCallback(async () => {
try {
const data = await fetchCrashedClients();
setCrashedClients(data);
} catch {
// non-fatal: crashes panel just stays stale
}
}, []);
const loadServiceFailedClients = React.useCallback(async () => {
try {
const data = await fetchServiceFailedClients();
setServiceFailedClients(data);
} catch {
// non-fatal
}
}, []);
React.useEffect(() => {
loadOverview(hours, false);
}, [hours, loadOverview]);
loadCrashedClients();
loadServiceFailedClients();
}, [hours, loadOverview, loadCrashedClients, loadServiceFailedClients]);
React.useEffect(() => {
const id = window.setInterval(() => {
loadCrashedClients();
loadServiceFailedClients();
}, REFRESH_INTERVAL_MS);
return () => window.clearInterval(id);
}, [loadCrashedClients, loadServiceFailedClients]);
React.useEffect(() => {
const hasActivePriorityScreenshots = (overview?.summary.activePriorityScreenshots || 0) > 0;
@@ -308,6 +352,194 @@ const MonitoringDashboard: React.FC = () => {
{renderMetricCard('Fehler-Logs', overview?.summary.errorLogs || 0, 'Im gewählten Zeitraum', '#b91c1c')}
</div>
{crashedClients && crashedClients.crashed_count > 0 && (
<div className="monitoring-panel monitoring-crash-panel">
<div className="monitoring-panel-header">
<h3 style={{ color: '#dc2626' }}>
Abgestürzte / Nicht erreichbare Clients
</h3>
<span
style={{
background: '#fee2e2',
color: '#991b1b',
padding: '2px 10px',
borderRadius: '12px',
fontWeight: 600,
fontSize: '0.85rem',
}}
>
{crashedClients.crashed_count}
</span>
</div>
<table className="monitoring-crash-table">
<thead>
<tr>
<th>Client</th>
<th>Gruppe</th>
<th>Ursache</th>
<th>Prozessstatus</th>
<th>Letztes Signal</th>
<th>Aktion</th>
</tr>
</thead>
<tbody>
{crashedClients.clients.map((c: CrashedClient) => {
const state = restartStates[c.uuid] || 'idle';
const errMsg = restartErrors[c.uuid];
const displayName = c.description || c.hostname || c.uuid;
return (
<tr key={c.uuid}>
<td>
<span title={c.uuid}>{displayName}</span>
{c.ip && <span className="monitoring-meta-hint"> ({c.ip})</span>}
</td>
<td>{c.group_id ?? '—'}</td>
<td>
<span
className="monitoring-status-badge"
style={
c.crash_reason === 'process_crashed'
? { color: '#991b1b', backgroundColor: '#fee2e2' }
: { color: '#78350f', backgroundColor: '#fef3c7' }
}
>
{c.crash_reason === 'process_crashed' ? 'Prozess abgestürzt' : 'Heartbeat veraltet'}
</span>
</td>
<td>{c.process_status || '—'}</td>
<td>{formatRelative(c.last_alive)}</td>
<td>
{state === 'loading' && <span style={{ color: '#6b7280', fontSize: '0.85rem' }}>Wird gesendet</span>}
{state === 'success' && <span style={{ color: '#15803d', fontSize: '0.85rem' }}> Neustart gesendet</span>}
{state === 'failed' && (
<span style={{ color: '#dc2626', fontSize: '0.85rem' }} title={errMsg}>
Fehler
</span>
)}
{(state === 'idle' || state === 'failed') && (
<ButtonComponent
cssClass="e-small e-danger"
disabled={state === 'loading'}
onClick={async () => {
setRestartStates(prev => ({ ...prev, [c.uuid]: 'loading' }));
setRestartErrors(prev => { const n = { ...prev }; delete n[c.uuid]; return n; });
try {
await restartClient(c.uuid, c.crash_reason);
setRestartStates(prev => ({ ...prev, [c.uuid]: 'success' }));
setTimeout(() => {
setRestartStates(prev => ({ ...prev, [c.uuid]: 'idle' }));
loadCrashedClients();
}, 8000);
} catch (e) {
const msg = e instanceof Error ? e.message : 'Unbekannter Fehler';
setRestartStates(prev => ({ ...prev, [c.uuid]: 'failed' }));
setRestartErrors(prev => ({ ...prev, [c.uuid]: msg }));
}
}}
>
Neustart
</ButtonComponent>
)}
</td>
</tr>
);
})}
</tbody>
</table>
</div>
)}
{serviceFailedClients && serviceFailedClients.service_failed_count > 0 && (
<div className="monitoring-panel monitoring-service-failed-panel">
<div className="monitoring-panel-header">
<h3 style={{ color: '#7c2d12' }}>
Service dauerhaft ausgefallen (systemd hat aufgegeben)
</h3>
<span
style={{
background: '#ffedd5',
color: '#7c2d12',
padding: '2px 10px',
borderRadius: '12px',
fontWeight: 600,
fontSize: '0.85rem',
}}
>
{serviceFailedClients.service_failed_count}
</span>
</div>
<p className="monitoring-meta-hint" style={{ marginBottom: '0.75rem' }}>
Diese Clients konnten von systemd nicht mehr automatisch neugestartet werden.
Manuelle Intervention erforderlich. Nach Behebung bitte quittieren.
</p>
<table className="monitoring-crash-table">
<thead>
<tr>
<th>Client</th>
<th>Gruppe</th>
<th>Unit</th>
<th>Ausgefallen am</th>
<th>Letztes Signal</th>
<th>Aktion</th>
</tr>
</thead>
<tbody>
{serviceFailedClients.clients.map((c: ServiceFailedClient) => {
const state = clearStates[c.uuid] || 'idle';
const errMsg = clearErrors[c.uuid];
const displayName = c.description || c.hostname || c.uuid;
const failedAt = c.service_failed_at
? new Date(c.service_failed_at.endsWith('Z') ? c.service_failed_at : c.service_failed_at + 'Z').toLocaleString('de-DE')
: '—';
return (
<tr key={c.uuid}>
<td>
<span title={c.uuid}>{displayName}</span>
{c.ip && <span className="monitoring-meta-hint"> ({c.ip})</span>}
</td>
<td>{c.group_id ?? '—'}</td>
<td><code style={{ fontSize: '0.8rem' }}>{c.service_failed_unit || '—'}</code></td>
<td>{failedAt}</td>
<td>{formatRelative(c.last_alive)}</td>
<td>
{state === 'loading' && <span style={{ color: '#6b7280', fontSize: '0.85rem' }}>Wird quittiert</span>}
{state === 'success' && <span style={{ color: '#15803d', fontSize: '0.85rem' }}> Quittiert</span>}
{state === 'failed' && (
<span style={{ color: '#dc2626', fontSize: '0.85rem' }} title={errMsg}> Fehler</span>
)}
{(state === 'idle' || state === 'failed') && (
<ButtonComponent
cssClass="e-small e-warning"
disabled={state === 'loading'}
onClick={async () => {
setClearStates(prev => ({ ...prev, [c.uuid]: 'loading' }));
setClearErrors(prev => { const n = { ...prev }; delete n[c.uuid]; return n; });
try {
await clearServiceFailed(c.uuid);
setClearStates(prev => ({ ...prev, [c.uuid]: 'success' }));
setTimeout(() => {
setClearStates(prev => ({ ...prev, [c.uuid]: 'idle' }));
loadServiceFailedClients();
}, 4000);
} catch (e) {
const msg = e instanceof Error ? e.message : 'Unbekannter Fehler';
setClearStates(prev => ({ ...prev, [c.uuid]: 'failed' }));
setClearErrors(prev => ({ ...prev, [c.uuid]: msg }));
}
}}
>
Quittieren
</ButtonComponent>
)}
</td>
</tr>
);
})}
</tbody>
</table>
</div>
)}
{loading && !overview ? (
<MessageComponent severity="Info" content="Monitoring-Daten werden geladen ..." />
) : (
@@ -393,6 +625,16 @@ const MonitoringDashboard: React.FC = () => {
<span>Bildschirmstatus</span>
<strong>{selectedClient.screenHealthStatus || 'UNKNOWN'}</strong>
</div>
<div className="monitoring-detail-row">
<span>MQTT Reconnects</span>
<strong>{selectedClient.mqttReconnectCount != null ? selectedClient.mqttReconnectCount : '—'}</strong>
</div>
{selectedClient.mqttLastDisconnectAt && (
<div className="monitoring-detail-row">
<span>Letzter Disconnect</span>
<strong>{formatTimestamp(selectedClient.mqttLastDisconnectAt)}</strong>
</div>
)}
<div className="monitoring-detail-row">
<span>Letzte Analyse</span>
<strong>{formatTimestamp(selectedClient.lastScreenshotAnalyzed)}</strong>

View File

@@ -12,10 +12,6 @@ interface ProgramInfo {
frontend: { name: string; license: string }[];
backend: { name: string; license: string }[];
};
buildInfo: {
buildDate: string;
commitId: string;
};
changelog: {
version: string;
date: string;
@@ -85,30 +81,30 @@ const Programminfo: React.FC = () => {
</div>
</div>
<div className="e-card-content">
<div style={{ display: 'flex', flexDirection: 'column', gap: '0.5rem' }}>
<p>
<strong>Version:</strong> {info.version}
</p>
<p>
<strong>Copyright:</strong> {info.copyright}
</p>
<p>
<div style={{ display: 'flex', flexDirection: 'column', gap: '0.25rem' }}>
<div><strong>Version:</strong> {info.version}</div>
<div><strong>Copyright:</strong> {info.copyright}</div>
<div>
<strong>Support:</strong>{' '}
<a href={`mailto:${info.supportContact}`} style={{ color: '#2563eb', textDecoration: 'none' }}>
{info.supportContact}
</a>
</p>
<hr style={{ margin: '1rem 0' }} />
<h4 style={{ fontWeight: 600 }}>Build-Informationen</h4>
<p>
<strong>Build-Datum:</strong> {new Date(info.buildInfo.buildDate).toLocaleString('de-DE')}
</p>
<p>
<strong>Commit-ID:</strong>{' '}
</div>
<hr style={{ margin: '0.5rem 0' }} />
<div style={{ fontWeight: 600, fontSize: '0.875rem', marginBottom: '0.125rem' }}>Build-Informationen</div>
<div><strong>Build-Datum:</strong> {new Date(__BUILD_DATE__).toLocaleString('de-DE')}</div>
<div>
<strong>Umgebung:</strong>{' '}
<span style={{ fontFamily: monoFont, fontSize: '0.875rem', background: '#f3f4f6', padding: '0.125rem 0.25rem', borderRadius: '0.25rem' }}>
{info.buildInfo.commitId}
{__BUILD_ENV__}
</span>
</p>
</div>
<div>
<strong>Node.js:</strong>{' '}
<span style={{ fontFamily: monoFont, fontSize: '0.875rem', background: '#f3f4f6', padding: '0.125rem 0.25rem', borderRadius: '0.25rem' }}>
{__NODE_VERSION__}
</span>
</div>
</div>
</div>
</div>

View File

@@ -1 +1,5 @@
/// <reference types="vite/client" />
declare const __BUILD_DATE__: string;
declare const __NODE_VERSION__: string;
declare const __BUILD_ENV__: string;

View File

@@ -6,6 +6,11 @@ import react from '@vitejs/plugin-react';
export default defineConfig({
cacheDir: './.vite',
plugins: [react()],
define: {
__BUILD_DATE__: JSON.stringify(new Date().toISOString()),
__NODE_VERSION__: JSON.stringify(process.version),
__BUILD_ENV__: JSON.stringify(process.env.NODE_ENV ?? 'development'),
},
resolve: {
// 🔧 KORRIGIERT: Entferne die problematischen Aliases komplett
// Diese verursachen das "not an absolute path" Problem