stage3_worker: v2.2 — absolute sudo/systemctl paths, error logging, reset failure counter on recovery failure

Mirrors stage2_worker v2.1 (da98019) resilience fixes:
- Absolute paths for /usr/bin/sudo and /bin/systemctl
- Log stdout/stderr when sidecar restart fails
- Reset consecutive_failures even when wedge recovery fails (prevents
  permanent stuck state if restart itself is broken)
This commit is contained in:
2026-05-01 18:40:25 +00:00
parent da980193dd
commit 1a8e0353f5
+9 -5
View File
@@ -44,7 +44,7 @@ HEARTBEAT_FILE = Path("/var/log/aaronai/stage3-heartbeat")
RETRY_ATTEMPTS = 2
POLL_INTERVAL = 5
INGEST_TIMEOUT = 600
WORKER_VERSION = "2.1"
WORKER_VERSION = "2.2"
# Match Stage 1 chunking parameters
CHUNK_SIZE_WORDS = 500
@@ -73,10 +73,13 @@ def recover_wedge():
Mirrors Stage 2's recover_wedge() for ollama. Requires passwordless sudo
for `systemctl restart aaronai-graphiti.service` for the worker's user."""
log.warning("Graphiti wedge detected — restarting sidecar")
subprocess.run(
["sudo", "systemctl", "restart", "aaronai-graphiti.service"],
capture_output=True
result = subprocess.run(
["/usr/bin/sudo", "/bin/systemctl", "restart", "aaronai-graphiti.service"],
capture_output=True, text=True
)
if result.returncode != 0:
log.error(f"Sidecar restart failed (rc={result.returncode}): stdout={result.stdout!r} stderr={result.stderr!r}")
return False
# Sidecar needs longer than ollama for model loading (sentence-transformers
# + BGE reranker + Graphiti library init)
time.sleep(45)
@@ -256,7 +259,8 @@ def run():
if consecutive_failures >= 2:
log.warning("Multiple consecutive failures — checking for Graphiti wedge")
recovered = recover_wedge()
if recovered:
if not recovered:
log.error("Wedge recovery failed — continuing anyway")
consecutive_failures = 0
time.sleep(10)
else: