From b9369316687604834889c5cc55f500e9f0a5f74a Mon Sep 17 00:00:00 2001
From: Aaron Nelson <aaron@aaronnelson.studio>
Date: Fri, 1 May 2026 05:18:09 +0000
Subject: [PATCH] =?UTF-8?q?Stage=203=20worker=20v2.1=20=E2=80=94=20saga-si?=
 =?UTF-8?q?ze=20limit=20+=20wedge=20detection=20+=20sudoers=20fixes?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Production incident 2026-05-01: F14 re-cascade attempt surfaced three
compounding issues in cascade resilience.

stage3_worker.py changes:
- MAX_CHUNKS_PER_SAGA=10 — large documents split into multiple bulk
  commits, all sharing the same saga tag for Graphiti document linking.
  Original implementation sent all chunks as one saga; 17-19 chunk sagas
  deadlocked sidecar's Python-side coordination.
- recover_wedge() function — restarts aaronai-graphiti.service when
  consecutive_failures hits threshold. Mirrors Stage 2 pattern.
- run() loop adds consecutive_failures counter with threshold-2
  escalation. Resolves F28 + F29 from code review.
- Worker version bumped 2.0 -> 2.1.
- post_bulk() helper extracts shared HTTP POST + error handling.

Outside-repo changes (system config, separately documented):
- WatchdogSec=600 commented in stage2 + stage3 systemd unit files.
  Workers have no sd_notify support; per-request timeouts in code
  handle the actual failure modes.
- /etc/sudoers.d/aaron-aaronai created with NOPASSWD entries for
  systemctl restart ollama and restart aaronai-graphiti.service.
  Stage 2's existing recover_wedge() was silently broken since
  deployment due to this gap.

.gitignore — added rules for *.bak files, runtime artifacts
(watcher_heartbeat, dreamer_state.json, corpus_integrity_report.json,
watcher_state.json, watcher_status.json), Python cruft, virtual env,
.env, editor/OS files, and Aaron AI runtime data (conversations.db,
sessions.db, memory.md, settings.json).

Untracked 11 files that shouldn't have been committed in 465f2f7
(this morning): backup files and runtime artifacts.

Re-cascading Shop Class (414KB) and BirdAI-Experiments-Log.md (192KB)
through the patched worker after re-extracting full text from disk.
Cascade in progress at commit time.
---
 .gitignore                                    |   59 +-
 corpus_integrity_report.json                  |  161 ---
 dreamer_state.json                            |   46 -
 scripts/api.py.bak.20260501-001427            | 1287 -----------------
 scripts/consolidator_v0_1.py.bak              |  442 ------
 .../corpus_integrity.py.bak.20260501-021703   |  245 ----
 scripts/dream.py.bak                          |  554 -------
 scripts/dream.py.bak.20260501-002209          |  668 ---------
 scripts/graphiti_service.py.bak               |  171 ---
 scripts/ingest.py.bak.20260501-004131         |  182 ---
 scripts/stage3_worker.py                      |  135 +-
 scripts/watcher.py.bak                        |  210 ---
 scripts/watcher.py.bak.20260501-004131        |  448 ------
 watcher_heartbeat                             |    1 -
 14 files changed, 150 insertions(+), 4459 deletions(-)
 delete mode 100644 corpus_integrity_report.json
 delete mode 100644 dreamer_state.json
 delete mode 100644 scripts/api.py.bak.20260501-001427
 delete mode 100644 scripts/consolidator_v0_1.py.bak
 delete mode 100644 scripts/corpus_integrity.py.bak.20260501-021703
 delete mode 100644 scripts/dream.py.bak
 delete mode 100644 scripts/dream.py.bak.20260501-002209
 delete mode 100644 scripts/graphiti_service.py.bak
 delete mode 100644 scripts/ingest.py.bak.20260501-004131
 delete mode 100644 scripts/watcher.py.bak
 delete mode 100644 scripts/watcher.py.bak.20260501-004131
 delete mode 100644 watcher_heartbeat

diff --git a/.gitignore b/.gitignore
index 6640409..eef432d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,34 +1,49 @@
-# Environment and secrets
-.env
-*.env
+# Backup files (rely on git history instead)
+*.bak
+*.bak.*
 
-# Databases
-db/
-conversations.db
-sessions.db
+# Runtime artifacts
+watcher_heartbeat
+dreamer_state.json
+corpus_integrity_report.json
 watcher_state.json
 watcher_status.json
 
-# Python
+# Logs (these belong in /var/log/)
+*.log
+
+# Python artifacts
 __pycache__/
 *.pyc
 *.pyo
+*.pyd
+.pytest_cache/
+*.egg-info/
+
+# Virtual environment
 venv/
+.venv/
 
-# Logs
-*.log
+# Environment and secrets
+.env
+.env.local
+.env.*.local
 
-# Memory and settings (personal data)
-memory.md
-settings.json
-
-# Backups
-Admin/
-
-# OS
+# Editor and OS cruft
+.vscode/
+.idea/
+*.swp
+*.swo
 .DS_Store
 Thumbs.db
-dreamer_state.json
-migration_progress.json
-dreamer_state.json
-migration_progress.json
+
+# Local data not for repo
+db/
+embeddings/
+experiments/summary_embeddings_cache.json
+
+# Aaron AI runtime data (personal, do not commit)
+conversations.db
+sessions.db
+memory.md
+settings.json
diff --git a/corpus_integrity_report.json b/corpus_integrity_report.json
deleted file mode 100644
index b50c1eb..0000000
--- a/corpus_integrity_report.json
+++ /dev/null
@@ -1,161 +0,0 @@
-{
-  "timestamp": "2026-04-30T21:58:06.498354",
-  "summary": {
-    "filesystem_total": 1399,
-    "pgvector_total": 1215,
-    "graphiti_total": 1205,
-    "both": 998,
-    "pgvector_only": 10,
-    "neither": 129,
-    "graphiti_only": 0,
-    "failures": 0,
-    "orphans_pgvector": 207,
-    "orphans_graphiti": 207
-  },
-  "gaps": [
-    "test.md",
-    "test-watcher-trigger.md",
-    "Renders.pptx",
-    "Ribbon Cutting Slideshow.pptx",
-    "PRINTERS.docx",
-    "Course Calender.docx",
-    "Attendance.docx",
-    "GH Slicer Notes [Autosaved].pptx",
-    "GH Slicer Notes.pptx",
-    "06_Surface Commands.docx",
-    "03_Precision_1 Homework.docx",
-    "02_2D Geometry Toolbars.docx",
-    "00_Roster.docx",
-    "Homeworks.docx",
-    "02_Osnap_ModelingAids.docx",
-    "06_Solids.docx",
-    "04_Precision_2 Homework.docx",
-    "~\ufffdMod08_Post_Processing_2023.pptx",
-    "~\ufffdMod02_Industries_and_Applications_2023.pptx",
-    "~\ufffdMod04_FDM_Materials_2023.pptx",
-    "Byron_V Independent Study.pdf",
-    "IanC.Lorber_Final_2dsketches..pdf",
-    "Irene_Raptopoulos_Final.pdf",
-    "rcolon-final-schematic.pdf",
-    "kellogg_schematic_schem.pdf",
-    "Visa Soccer Goal Frame Drawing.pdf",
-    "4_13_2017_0_22_14.pdf",
-    "4_13_2017_0_50_29.pdf",
-    "4_13_2017_0_49_25.pdf",
-    "4_13_2017_12_27_19.pdf",
-    "4_13_2017_12_26_54.pdf",
-    "4_12_2017_21_7_14.pdf",
-    "4_13_2017_12_25_49.pdf",
-    "4_13_2017_12_26_5.pdf",
-    "4_13_2017_12_27_8.pdf",
-    "IanC.Lorber_Final_2dsketches  - 2016.pdf",
-    "MidtermLayout1.pdf",
-    "MidtermLayout2.pdf",
-    "pascar_finalSchematic.pdf",
-    "Week 7.pdf",
-    "EXAMPLE Art Education IM Rubric and Scaffold UPDATED copy.docx",
-    "DDF - IM Rubric and Scaffold UPDATED.docx",
-    "DI_9.pdf",
-    "DI_6.pdf",
-    "DI_4.pdf",
-    "DI_7.pdf",
-    "DI_8.pdf",
-    "DI_2.pdf",
-    "DI_1.pdf",
-    "DI_3.pdf",
-    "DI_5.pdf",
-    "JN EVAL.pdf",
-    "Amazon.com_ Iwata Eclipse HP-CS Airbrush - Gravity Feed Dual Action, High-Flow Atomization for Fine Detail to Wide Coverage \u2013 E3 Nozzle, 0.pdf",
-    "RCPA1.pdf",
-    "image2022-01-07-133846 - CAryn.pdf",
-    "image2022-01-07-134536 - Conference.pdf",
-    "image2022-01-07-131439.pdf",
-    "image2022-01-07-135248 - BSC Revision.pdf",
-    "image2022-01-07-141250- LAI.pdf",
-    "image2022-01-07-134938 - Maker in Residence.pdf",
-    "image2022-01-07-133157 - Teching Evals.pdf",
-    "image2022-01-07-135538 PRS Medal.pdf",
-    "image2022-01-07-131217.pdf",
-    "image2022-01-07-135911 - HVAMC.pdf",
-    "image2022-01-07-133504 - Sarah and OLiva.pdf",
-    "image2022-01-07-131903 - Annual Reports.pdf",
-    "image2022-01-07-132917 - Mastery.pdf",
-    "Annual Report - 2016.pdf",
-    "Annual Report - 2018.pdf",
-    "Annual Report - 2019.pdf",
-    "Annual Report - 2017.pdf",
-    "MOU 2018 - 19.pdf",
-    "Appointment 2016.pdf",
-    "Appointment 2019 - 2021.pdf",
-    "MOU 2017.pdf",
-    "SKMBT_55220060909245.pdf",
-    "Dean SSE Letter 2018.pdf",
-    "Dean FPA Letter 2018.pdf",
-    "Sarah Thesis Article.pdf",
-    "Caryn Bylott Thesis.pdf",
-    "Olivia Thesis.pdf",
-    "Teching Evals.pdf",
-    "Maker in Residence - Dalles v2.pdf",
-    "Modela Gallery - Ripples from Stillwater.pdf",
-    "Maker in Residence - Dalles.pdf",
-    "Makerbot Innovation Center Director.pdf",
-    "Design Week 2019.pdf",
-    "DW Workshops.pdf",
-    "Ron Rael Lecture.pdf",
-    "BSC Revision.pdf",
-    "President's Medal.pdf",
-    "CNC Workshop.pdf",
-    "Dorksy.pdf",
-    "FULL SCAN.pdf",
-    "DDF Course Fee Rev 2017.pdf",
-    "SlideSlam 2019.pdf",
-    "Candy and Cold Cases.pdf",
-    "Murder ID.pdf",
-    "HVAMC SuperLab.pdf",
-    "Certifications.pdf",
-    "Brazkem Letter.pdf",
-    "Havana 2017.pdf",
-    "CAA Conference - 2020.pdf",
-    "CAA Conference 2019.pdf",
-    "Central Hudson Grant.pdf",
-    "3D Printing Updates - Council of Industry.pdf",
-    "k12 Training.pdf",
-    "Hudson Valley Futures Summit.pdf",
-    "StateAssemblymanTour.pdf",
-    "Best of New Paltz.pdf",
-    "HVAMC - ColorPage.pdf",
-    "HVAMC - Daily Freeman.pdf",
-    "Chocolate Skulls.pdf",
-    "10_13_2016_21_19_43.pdf",
-    "10_13_2016_21_19_46.pdf",
-    "10_13_2016_21_19_1.pdf",
-    "10_13_2016_21_18_55.pdf",
-    "161012_102427.pdf",
-    "161012_102737.pdf",
-    "161012_114217.pdf",
-    "Eto Forms.txt",
-    "AaronNelsonUndergraduteTranscript Unsecured.pdf",
-    "Aaron Nelson Transcript Undergradute Unsecured.pdf",
-    "AP023.pdf",
-    "How Buildings Learn_ What Happens After They are Built -- Stewart Brand .pdf",
-    "Occupying and connecting _ thoughts on territories and -- Frei Otto; Berthold Burkhardt.pdf",
-    "SilkwormManual.pdf",
-    "NELSON commitment letter.pdf",
-    "i-9.pdf"
-  ],
-  "failures": [],
-  "auto_queued": [],
-  "pgvector_only_sample": [
-    "dreamer_changelog.md",
-    "experiments-log-additions-2026-04-30.md",
-    "2026-04-30-15-59-voice.md",
-    "2026-04-30-16-59-voice.md",
-    "2026-04-30-16-53-voice.md",
-    "2026-04-30-17-06-voice.md",
-    "2026-04-30-16-23-voice.md",
-    "2026-04-30-late-rem.md",
-    "2026-04-30-synthesis.md",
-    "2026-04-30-nrem.md"
-  ],
-  "graphiti_only": []
-}
\ No newline at end of file
diff --git a/dreamer_state.json b/dreamer_state.json
deleted file mode 100644
index ffcda6f..0000000
--- a/dreamer_state.json
+++ /dev/null
@@ -1,46 +0,0 @@
-{
-  "last_dream_timestamp": 1777536047.392913,
-  "last_dream_mode": "pipeline",
-  "last_dream_file": "Journal/Dreams/2026-04-30-synthesis.md",
-  "retrieved_sources": [
-    "ChatGPT: CV Summary Request",
-    "ChatGPT: Program response drafting",
-    "Cognition in the Wild (A Bradford Book) -- Hutchins, Edwin.pdf",
-    "ChatGPT: Digital Fabrication Cultural Project",
-    "Dossier Narrative.pdf",
-    "E1_8-taxonomy-free-cascade-protocol.md",
-    "References.docx",
-    "ChatGPT: Career change anxiety",
-    "Dossier Narrative.docx",
-    "Dossier Narrative Kill Me PLS_REV_HOME.docx",
-    "Aaron Nelson Tenure Dossier Narrative.pdf",
-    "Claude: Importing chat history from ChatGPT",
-    "aaronai-architecture.md",
-    "Aaron AI: What should I be the most excited about right now?",
-    "ChatGPT: Dean Position Evaluation",
-    "2026-04-27-early-rem-1.md",
-    "The Poetics of Space -- Gaston Bachelard translated from the French by Maria Jolas -- First Edition, 1994.pdf",
-    "Dossier Narrative Kill Me PLS.docx",
-    "Advances in Architectural Geometry 2023 -- Kathrin D\u00f6rfler (editor); Jan Knippers (editor); Achim.pdf",
-    "Claude: I filling out my annual report...",
-    "References.pdf",
-    "2026-04-28-early-rem.md",
-    "Claude: Law enforcement career options",
-    "Dossier Narrative Kill Me PLS_REV_2.docx",
-    "2026-04-29-late-rem.md",
-    "Dossier Narrative Kill Me PLS_REV.docx",
-    "Utah MDD - Aaron Nelson - Copy.pptx",
-    "Dossier Narrative Kill Me PLS_REV_3.docx",
-    "Aaron AI: Who's covering for me on sabbatical?",
-    "ChatGPT: GA Proposal Revision Guide",
-    "BirdAI-Experiments-Log.md",
-    "Mod06_GrabCAD_Print_and _Advanced_FDM_2023.pptx",
-    "The Extended Mind _ The Power of Thinking Outside the Brain -- Annie Murphy Paul.pdf",
-    "Company of One -- Paul Jarvis.pdf",
-    "Fabrication Processes_Syllabus.docx",
-    "ChatGPT: Digital fabrication education",
-    "Fabrication Processes_Syllabus DDF710 V3.docx",
-    "Claude: Setting up a custom OpenClaw instance",
-    "Claude: Weighing Utah versus Oklahoma"
-  ]
-}
\ No newline at end of file
diff --git a/scripts/api.py.bak.20260501-001427 b/scripts/api.py.bak.20260501-001427
deleted file mode 100644
index 85b5336..0000000
--- a/scripts/api.py.bak.20260501-001427
+++ /dev/null
@@ -1,1287 +0,0 @@
-import os
-import json
-import sqlite3
-import subprocess
-import hashlib
-from pathlib import Path
-from datetime import datetime
-from dotenv import load_dotenv
-from sentence_transformers import SentenceTransformer
-import anthropic
-from fastapi import FastAPI, Request, Response, Depends, HTTPException, BackgroundTasks
-import psycopg2
-import psycopg2.extras
-from fastapi import UploadFile, File, Form
-import tempfile
-import os
-try:
-    from faster_whisper import WhisperModel
-    HAS_WHISPER = True
-except ImportError:
-    HAS_WHISPER = False
-from fastapi.responses import FileResponse, JSONResponse
-import secrets
-import hashlib
-from fastapi.responses import FileResponse, JSONResponse
-from fastapi.staticfiles import StaticFiles
-from fastapi.middleware.cors import CORSMiddleware
-import uvicorn
-import asyncio
-from fastapi.responses import StreamingResponse
-from apscheduler.schedulers.background import BackgroundScheduler
-from apscheduler.triggers.cron import CronTrigger
-
-load_dotenv(Path.home() / "aaronai" / ".env")
-
-MEMORY_PATH = Path.home() / "aaronai" / "memory.md"
-DB_PATH = str(Path.home() / "aaronai" / "db")
-CONVERSATIONS_DB = str(Path.home() / "aaronai" / "conversations.db")
-SETTINGS_PATH = Path.home() / "aaronai" / "settings.json"
-WATCHER_LOG = str(Path.home() / "aaronai" / "watcher.log")
-WATCHER_STATE = str(Path.home() / "aaronai" / "watcher_state.json")
-NEXTCLOUD_PATH = "/home/aaron/nextcloud/data/data/aaron/files"
-INGEST_SCRIPT = str(Path.home() / "aaronai" / "scripts" / "ingest.py")
-PYTHON = str(Path.home() / "aaronai" / "venv" / "bin" / "python3")
-
-DEFAULT_SETTINGS = {
-    "theme": "light",
-    "font_size": "medium",
-    "web_search": True,
-    "show_sources": True,
-    "dream_hour_utc": 8,
-    "dream_minute_utc": 0,
-    "dream_mode": "nrem",
-    "ingest_hour_utc": 2,
-    "ingest_minute_utc": 30,
-    "share_time": True,
-}
-
-print("Loading Aaron AI...")
-PG_DSN = os.getenv("PG_DSN")
-
-def get_pg():
-    return psycopg2.connect(PG_DSN)
-WHISPER_PROMPT = (
-    "Grasshopper, Rhino, PolyJet, SLA, FDM, DMLS, "
-    "HVAMC, FWN3D, Mossygear, Nextcloud, Gitea, "
-    "computational design, additive manufacturing, fabrication, "
-    "Graphiti, FalkorDB, pgvector, BirdAI, Active Inference, "
-    "dreamer, consolidator, Extended Mind, "
-    "Aaron Nelson, SUNY New Paltz, University of Utah, MDD"
-)
-whisper_model = None
-if HAS_WHISPER:
-    try:
-        whisper_model = WhisperModel("large-v3", device="cpu", compute_type="int8", cpu_threads=8)
-        print("Whisper model loaded")
-    except Exception as e:
-        print(f"Whisper not available: {e}")
-embedder = SentenceTransformer("all-MiniLM-L6-v2")
-# ChromaDB removed — using pgvector
-anthropic_client = anthropic.Anthropic(api_key=os.getenv("ANTHROPIC_API_KEY"))
-
-SYSTEM_PROMPT = """You are the personal AI assistant of Aaron Nelson — computational
-designer, fabrication researcher, program builder, and creative
-practitioner based in the Hudson Valley.
-
-Aaron's work sits at the intersection of computational geometry,
-additive manufacturing, and physical making. He resolves complex
-systems into physical reality — from Grasshopper definitions to
-large-scale steel structures, from archival photographs to 3D-printed
-architectural restorations, from product concepts to manufactured
-goods. This throughline — computation resolving into physical form —
-defines his practice across academic, consulting, and creative contexts.
-
-He built the Hudson Valley Additive Manufacturing Center (HVAMC) from
-nothing and has directed it since 2016, alongside the DDF academic
-program at SUNY New Paltz. He has the skills of a founder — equipment
-selection, policy creation, client development, grant writing,
-curriculum design — operating within an academic structure. He
-consults with IBM, Braskem, Selux and others through FWN3D. He runs
-Mossygear as a product business. He makes large-scale fabricated art.
-
-His communication style is direct, precise, and intolerant of padding
-or overclaiming. He flags inaccuracies immediately and expects the
-same standard from you. When helping him write, match his voice —
-economical, specific, never performative. When answering questions,
-cite sources and acknowledge uncertainty rather than filling gaps with
-plausible-sounding content.
-
-You have access to his complete document corpus, conversation history,
-and a persistent memory file that carries his current context. Treat
-the memory file as ground truth for his present situation. Use web
-search automatically when current information is needed. Never
-re-brief on context that's already in memory or documents.
-
-When making factual claims about Aaron — his history, credentials, locations, dates, relationships, projects, or any specific event — you must ground the claim in a specific retrieved document or the memory file. Cite the source by name inline. If no source supports the claim, say so explicitly rather than filling the gap with plausible-sounding content. Do not confabulate. If you are inferring rather than citing, mark it as inference."""
-
-# Auth configuration
-import os
-SESSION_PASSWORD = os.getenv("AARON_AI_PASSWORD", "changeme")
-SESSIONS_DB = str(Path.home() / "aaronai" / "sessions.db")
-
-def _init_sessions():
-    conn = sqlite3.connect(SESSIONS_DB)
-    conn.execute("CREATE TABLE IF NOT EXISTS sessions (token TEXT PRIMARY KEY, created_at TEXT)")
-    conn.commit()
-    conn.close()
-
-_init_sessions()
-
-def make_session_token() -> str:
-    return secrets.token_urlsafe(32)
-
-def hash_password(password: str) -> str:
-    return hashlib.sha256(password.encode()).hexdigest()
-
-def save_session(token: str):
-    conn = sqlite3.connect(SESSIONS_DB)
-    conn.execute("INSERT OR REPLACE INTO sessions VALUES (?, ?)", (token, datetime.now().isoformat()))
-    conn.commit()
-    conn.close()
-
-def delete_session(token: str):
-    conn = sqlite3.connect(SESSIONS_DB)
-    conn.execute("DELETE FROM sessions WHERE token = ?", (token,))
-    conn.commit()
-    conn.close()
-
-def session_exists(token: str) -> bool:
-    conn = sqlite3.connect(SESSIONS_DB)
-    row = conn.execute("SELECT 1 FROM sessions WHERE token = ?", (token,)).fetchone()
-    conn.close()
-    return row is not None
-
-def get_session(request: Request) -> str | None:
-    return request.cookies.get("aaronai_session")
-
-def require_auth(request: Request):
-    token = get_session(request)
-    if not token or not session_exists(token):
-        raise HTTPException(status_code=401, detail="Not authenticated")
-    return token
-
-CV_SOURCES = ["Aaron Nelson CV 2024.pdf", "Aaron Nelson CV 2025.pdf", "Aaron Nelson - CV.docx"]
-
-def init_conversations_db():
-    conn = sqlite3.connect(CONVERSATIONS_DB)
-    c = conn.cursor()
-    c.execute('''CREATE TABLE IF NOT EXISTS conversations (
-        id TEXT PRIMARY KEY,
-        title TEXT NOT NULL,
-        created_at TEXT NOT NULL,
-        updated_at TEXT NOT NULL,
-        model TEXT DEFAULT 'claude-sonnet-4-6',
-        message_count INTEGER DEFAULT 0
-    )''')
-    c.execute('''CREATE TABLE IF NOT EXISTS messages (
-        id TEXT PRIMARY KEY,
-        conversation_id TEXT NOT NULL,
-        role TEXT NOT NULL,
-        content TEXT NOT NULL,
-        sources TEXT DEFAULT '[]',
-        timestamp TEXT NOT NULL,
-        FOREIGN KEY (conversation_id) REFERENCES conversations(id)
-    )''')
-    conn.commit()
-    conn.close()
-
-init_conversations_db()
-
-def load_settings():
-    if SETTINGS_PATH.exists():
-        try:
-            s = json.loads(SETTINGS_PATH.read_text())
-            return {**DEFAULT_SETTINGS, **s}
-        except:
-            pass
-    return DEFAULT_SETTINGS.copy()
-
-def save_settings(settings):
-    SETTINGS_PATH.write_text(json.dumps(settings, indent=2))
-
-def load_memory():
-    if MEMORY_PATH.exists():
-        return MEMORY_PATH.read_text(encoding="utf-8")
-    return ""
-
-def save_memory(content):
-    MEMORY_PATH.write_text(content, encoding="utf-8")
-
-def add_to_memory(item):
-    memory = load_memory()
-    timestamp = datetime.now().strftime("%Y-%m-%d")
-    note = f"\n- [{timestamp}] {item}"
-    if "## Notes" not in memory:
-        memory += "\n\n## Notes"
-    memory += note
-    save_memory(memory)
-
-def remove_from_memory(item):
-    memory = load_memory()
-    lines = memory.split("\n")
-    filtered = [l for l in lines if item.lower() not in l.lower()]
-    save_memory("\n".join(filtered))
-    return len(lines) - len(filtered)
-
-def get_pinned_cv_context():
-    try:
-        pg = get_pg()
-        cur = pg.cursor()
-        cur.execute(
-            "SELECT document, source FROM embeddings WHERE source = ANY(%s)",
-            (CV_SOURCES,)
-        )
-        rows = cur.fetchall()
-        pg.close()
-        docs = [r[0] for r in rows]
-        metas = [{"source": r[1]} for r in rows]
-        return docs, metas
-    except:
-        return [], []
-
-def is_professional_query(query):
-    keywords = ["grant", "publication", "exhibition", "award", "fellowship",
-        "experience", "position", "job", "career", "cv", "resume",
-        "research", "work history", "accomplishment", "teaching",
-        "course", "client", "consultation", "presentation", "workshop",
-        "education", "degree", "institution", "service", "committee"]
-    return any(k in query.lower() for k in keywords)
-
-def retrieve_context(query, n_results=8):
-    query_embedding = embedder.encode([query]).tolist()[0]
-    context_pieces = []
-    sources = []
-    if is_professional_query(query):
-        cv_docs, cv_metas = get_pinned_cv_context()
-        for doc, meta in zip(cv_docs, cv_metas):
-            context_pieces.append(f"[CV] {doc}")
-            sources.append(meta.get("source", "CV"))
-    try:
-        pg = get_pg()
-        cur = pg.cursor()
-        cur.execute("""
-            SELECT document, source, 1 - (embedding <=> %s::vector) as similarity
-            FROM embeddings
-            WHERE source NOT IN %s
-            ORDER BY embedding <=> %s::vector
-            LIMIT %s
-        """, (query_embedding, tuple(CV_SOURCES) if CV_SOURCES else ('__none__',),
-              query_embedding, n_results))
-        for doc, source, similarity in cur.fetchall():
-            if similarity > 0.3:
-                context_pieces.append(doc)
-                sources.append(source or "unknown")
-        pg.close()
-    except Exception as e:
-        print(f"pgvector retrieval error: {e}")
-    return context_pieces, sources
-
-def get_conversation_history(conversation_id, limit=20):
-    conn = sqlite3.connect(CONVERSATIONS_DB)
-    c = conn.cursor()
-    c.execute('''SELECT role, content FROM messages
-                 WHERE conversation_id = ?
-                 ORDER BY timestamp DESC LIMIT ?''', (conversation_id, limit))
-    rows = c.fetchall()
-    conn.close()
-    return [{"role": r[0], "content": r[1]} for r in reversed(rows)]
-
-def save_message(conversation_id, role, content, sources=None):
-    conn = sqlite3.connect(CONVERSATIONS_DB)
-    c = conn.cursor()
-    msg_id = hashlib.md5(f"{conversation_id}{role}{datetime.now().isoformat()}".encode()).hexdigest()
-    timestamp = datetime.now().isoformat()
-    c.execute('''INSERT INTO messages (id, conversation_id, role, content, sources, timestamp)
-                 VALUES (?, ?, ?, ?, ?, ?)''',
-              (msg_id, conversation_id, role, content,
-               json.dumps(sources or []), timestamp))
-    c.execute('''UPDATE conversations SET updated_at = ?, message_count = message_count + 1
-                 WHERE id = ?''', (timestamp, conversation_id))
-    conn.commit()
-    conn.close()
-
-def create_conversation(title="New conversation"):
-    conn = sqlite3.connect(CONVERSATIONS_DB)
-    c = conn.cursor()
-    conv_id = hashlib.md5(f"{datetime.now().isoformat()}".encode()).hexdigest()[:16]
-    now = datetime.now().isoformat()
-    c.execute('''INSERT INTO conversations (id, title, created_at, updated_at)
-                 VALUES (?, ?, ?, ?)''', (conv_id, title, now, now))
-    conn.commit()
-    conn.close()
-    return conv_id
-
-def chat(user_message, conversation_id, settings, client_time=None):
-    memory = load_memory()
-    context_pieces, sources = retrieve_context(user_message)
-    history = get_conversation_history(conversation_id)
-
-    context_parts = []
-    if client_time:
-        context_parts.append(f"Current time (user-supplied, not logged): {client_time}")
-    if memory:
-        context_parts.append(f"Aaron's persistent memory:\n\n{memory}")
-    if context_pieces:
-        context_str = "\n\n---\n\n".join(context_pieces)
-        unique_sources = list(set(sources))
-        context_parts.append(
-            f"Relevant excerpts from Aaron's documents:\n\n{context_str}\n\nSources: {', '.join(unique_sources)}"
-        )
-    context_block = "\n\n====\n\n".join(context_parts) + "\n\n---\n\n" if context_parts else ""
-    full_message = context_block + user_message
-
-    messages = history + [{"role": "user", "content": full_message}]
-
-    tools = [{"type": "web_search_20250305", "name": "web_search"}] if settings.get("web_search", True) else []
-
-    while True:
-        kwargs = {
-            "model": "claude-sonnet-4-6",
-            "max_tokens": 2048,
-            "system": SYSTEM_PROMPT,
-            "messages": messages
-        }
-        if tools:
-            kwargs["tools"] = tools
-
-        response = anthropic_client.messages.create(**kwargs)
-
-        if response.stop_reason == "tool_use":
-            messages.append({"role": "assistant", "content": response.content})
-            tool_results = []
-            for block in response.content:
-                if block.type == "tool_use":
-                    tool_results.append({
-                        "type": "tool_result",
-                        "tool_use_id": block.id,
-                        "content": "Search completed"
-                    })
-            messages.append({"role": "user", "content": tool_results})
-        else:
-            assistant_message = ""
-            for block in response.content:
-                if hasattr(block, "text"):
-                    assistant_message += block.text
-            return assistant_message, list(set(sources))
-
-from contextlib import asynccontextmanager
-
-@asynccontextmanager
-async def lifespan(app: FastAPI):
-    reschedule_jobs()
-    scheduler.start()
-    print("Scheduler started")
-    yield
-    scheduler.shutdown()
-    print("Scheduler stopped")
-
-app = FastAPI(lifespan=lifespan)
-app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"])
-
-@app.post("/auth/login")
-async def login(request: Request, response: Response):
-    data = await request.json()
-    password = data.get("password", "")
-    if hash_password(password) != hash_password(SESSION_PASSWORD):
-        raise HTTPException(status_code=401, detail="Invalid password")
-    token = make_session_token()
-    save_session(token)
-    response.set_cookie(
-        key="aaronai_session",
-        value=token,
-        httponly=True,
-        secure=True,
-        samesite="lax",
-        max_age=60 * 60 * 24 * 30
-    )
-    response.body = b'{"ok": true}'
-    response.status_code = 200
-    response.media_type = "application/json"
-    return response
-
-@app.post("/auth/logout")
-async def logout(request: Request, response: Response):
-    token = get_session(request)
-    if token:
-        delete_session(token)
-    response.delete_cookie("aaronai_session")
-    return JSONResponse({"ok": True})
-
-@app.get("/auth/check")
-async def check_auth(request: Request):
-    token = get_session(request)
-    if not token or token not in SESSIONS:
-        return JSONResponse({"authenticated": False})
-    return JSONResponse({"authenticated": True})
-
-@app.get("/", response_class=FileResponse)
-async def index():
-    return FileResponse("/home/aaron/aaronai/static/index.html")
-
-@app.get("/api/settings")
-async def get_settings(auth: str = Depends(require_auth)):
-    return JSONResponse(load_settings())
-
-@app.post("/api/settings")
-async def update_settings(request: Request, auth: str = Depends(require_auth)):
-    data = await request.json()
-    settings = load_settings()
-    settings.update(data)
-    save_settings(settings)
-    # Reschedule if schedule settings changed
-    schedule_keys = {"dream_hour_utc","dream_minute_utc","dream_mode","ingest_hour_utc","ingest_minute_utc"}
-    if any(k in data for k in schedule_keys):
-        reschedule_jobs()
-    return JSONResponse(settings)
-
-@app.get("/api/conversations")
-async def list_conversations(auth: str = Depends(require_auth)):
-    conn = sqlite3.connect(CONVERSATIONS_DB)
-    c = conn.cursor()
-    c.execute('''SELECT id, title, created_at, updated_at, message_count
-                 FROM conversations ORDER BY updated_at DESC LIMIT 100''')
-    rows = c.fetchall()
-    conn.close()
-    return JSONResponse([{
-        "id": r[0], "title": r[1], "created_at": r[2],
-        "updated_at": r[3], "message_count": r[4]
-    } for r in rows])
-
-@app.post("/api/conversations")
-async def new_conversation(request: Request, auth: str = Depends(require_auth)):
-    data = await request.json()
-    title = data.get("title", "New conversation")
-    conv_id = create_conversation(title)
-    return JSONResponse({"id": conv_id, "title": title})
-
-@app.get("/api/conversations/{conv_id}/messages")
-async def get_messages(conv_id: str, auth: str = Depends(require_auth)):
-    conn = sqlite3.connect(CONVERSATIONS_DB)
-    c = conn.cursor()
-    c.execute('''SELECT role, content, sources, timestamp FROM messages
-                 WHERE conversation_id = ? ORDER BY timestamp ASC''', (conv_id,))
-    rows = c.fetchall()
-    conn.close()
-    return JSONResponse([{
-        "role": r[0], "content": r[1],
-        "sources": json.loads(r[2]), "timestamp": r[3]
-    } for r in rows])
-
-@app.patch("/api/conversations/{conv_id}")
-async def rename_conversation(conv_id: str, request: Request, auth: str = Depends(require_auth)):
-    data = await request.json()
-    title = data.get("title", "")
-    if not title:
-        return JSONResponse({"error": "Title required"}, status_code=400)
-    conn = sqlite3.connect(CONVERSATIONS_DB)
-    c = conn.cursor()
-    c.execute("UPDATE conversations SET title = ? WHERE id = ?", (title, conv_id))
-    conn.commit()
-    conn.close()
-    return JSONResponse({"id": conv_id, "title": title})
-
-@app.delete("/api/conversations/{conv_id}")
-async def delete_conversation(conv_id: str, auth: str = Depends(require_auth)):
-    conn = sqlite3.connect(CONVERSATIONS_DB)
-    c = conn.cursor()
-    c.execute("DELETE FROM messages WHERE conversation_id = ?", (conv_id,))
-    c.execute("DELETE FROM conversations WHERE id = ?", (conv_id,))
-    conn.commit()
-    conn.close()
-    return JSONResponse({"deleted": conv_id})
-
-@app.post("/api/chat")
-async def chat_endpoint(request: Request, auth: str = Depends(require_auth)):
-    data = await request.json()
-    user_message = data.get("message", "").strip()
-    conversation_id = data.get("conversation_id", "")
-    client_time = data.get("client_time", None)
-    settings = load_settings()
-
-    if not user_message:
-        return JSONResponse({"error": "Empty message"})
-
-    if not conversation_id:
-        conversation_id = create_conversation("New conversation")
-
-    stripped = user_message.strip().lower()
-
-    if stripped == "show memory":
-        return JSONResponse({"response": load_memory(), "sources": [], "conversation_id": conversation_id})
-
-    if stripped.startswith("remember:"):
-        item = user_message[9:].strip()
-        add_to_memory(item)
-        save_message(conversation_id, "user", user_message)
-        save_message(conversation_id, "assistant", f"Saved to memory: '{item}'")
-        return JSONResponse({"response": f"Saved to memory: '{item}'", "sources": [], "conversation_id": conversation_id})
-
-    if stripped.startswith("forget:"):
-        item = user_message[7:].strip()
-        removed = remove_from_memory(item)
-        msg = f"Removed {removed} line(s) containing '{item}'" if removed else f"Nothing found containing '{item}'"
-        save_message(conversation_id, "user", user_message)
-        save_message(conversation_id, "assistant", msg)
-        return JSONResponse({"response": msg, "sources": [], "conversation_id": conversation_id})
-
-    save_message(conversation_id, "user", user_message)
-
-    # Auto-title conversation from first message
-    conn = sqlite3.connect(CONVERSATIONS_DB)
-    c = conn.cursor()
-    c.execute("SELECT message_count, title FROM conversations WHERE id = ?", (conversation_id,))
-    row = c.fetchone()
-    conn.close()
-    if row and row[0] <= 1 and row[1] == "New conversation":
-        auto_title = user_message[:60] + ("..." if len(user_message) > 60 else "")
-        conn = sqlite3.connect(CONVERSATIONS_DB)
-        c = conn.cursor()
-        c.execute("UPDATE conversations SET title = ? WHERE id = ?", (auto_title, conversation_id))
-        conn.commit()
-        conn.close()
-
-    response, sources = chat(user_message, conversation_id, settings, client_time=client_time)
-    save_message(conversation_id, "assistant", response, sources if settings.get("show_sources") else [])
-
-    return JSONResponse({
-        "response": response,
-        "sources": sources if settings.get("show_sources") else [],
-        "conversation_id": conversation_id
-    })
-
-@app.get("/api/memory")
-async def get_memory(auth: str = Depends(require_auth)):
-    return JSONResponse({"content": load_memory()})
-
-@app.post("/api/memory")
-async def update_memory(request: Request, auth: str = Depends(require_auth)):
-    data = await request.json()
-    content = data.get("content", "")
-    save_memory(content)
-    return JSONResponse({"saved": True})
-
-@app.get("/api/status")
-async def get_status(auth: str = Depends(require_auth)):
-    try:
-        pg = get_pg()
-        cur = pg.cursor()
-        cur.execute("SELECT COUNT(*) FROM embeddings")
-        chunk_count = cur.fetchone()[0]
-        pg.close()
-    except:
-        chunk_count = 0
-
-    # Watcher status
-    watcher_running = False
-    watcher_ingestion = {"status": "idle", "message": "", "file_count": 0}
-    last_indexed = "Unknown"
-    try:
-        import time as _time, json as _json
-        _sp = Path("/home/aaron/aaronai/watcher_status.json")
-        if _sp.exists():
-            _s = _json.loads(_sp.read_text())
-            _age = _time.time() - _s.get("timestamp", 0)
-            watcher_running = _s.get("running", False) and _age < 30
-            watcher_ingestion = _s.get("ingestion", watcher_ingestion)
-    except:
-        pass
-
-    try:
-        log_path = Path(WATCHER_LOG)
-        if log_path.exists():
-            lines = log_path.read_text().strip().split("\n")
-            for line in reversed(lines):
-                if "Ingestion complete" in line:
-                    last_indexed = line.split(" - ")[0].strip()
-                    break
-    except:
-        pass
-
-    # File count from watcher state
-    file_count = 0
-    try:
-        state_path = Path(WATCHER_STATE)
-        if state_path.exists():
-            state = json.loads(state_path.read_text())
-            file_count = len(state)
-        else:
-            # Count files in Nextcloud directly
-            nc_path = Path(NEXTCLOUD_PATH)
-            if nc_path.exists():
-                file_count = sum(1 for f in nc_path.rglob("*") 
-                    if f.is_file() and f.suffix.lower() in {'.pdf','.docx','.pptx','.txt','.md'})
-    except:
-        pass
-
-    # Conversation count
-    conn = sqlite3.connect(CONVERSATIONS_DB)
-    c = conn.cursor()
-    c.execute("SELECT COUNT(*) FROM conversations")
-    conv_count = c.fetchone()[0]
-    conn.close()
-
-    return JSONResponse({
-        "aaron_ai": "running",
-        "watcher": "running" if watcher_running else "stopped",
-        "watcher_ingestion": watcher_ingestion,
-        "chunk_count": chunk_count,
-        "file_count": file_count,
-        "last_indexed": last_indexed,
-        "conversation_count": conv_count,
-        "model": "claude-sonnet-4-6",
-        "nextcloud_path": NEXTCLOUD_PATH
-    })
-
-@app.post("/api/transcribe")
-async def transcribe_audio(request: Request, audio: UploadFile = File(...), auth: str = Depends(require_auth)):
-    if not whisper_model:
-        raise HTTPException(status_code=503, detail="Whisper not available")
-    try:
-        suffix = ".webm"
-        if audio.content_type and "mp4" in audio.content_type:
-            suffix = ".mp4"
-        elif audio.content_type and "ogg" in audio.content_type:
-            suffix = ".ogg"
-        with tempfile.NamedTemporaryFile(suffix=suffix, delete=False) as tmp:
-            content = await audio.read()
-            tmp.write(content)
-            tmp_path = tmp.name
-        segments, info = whisper_model.transcribe(
-            tmp_path,
-            language="en",
-            vad_filter=True,
-            initial_prompt=WHISPER_PROMPT
-        )
-        transcript = " ".join(s.text.strip() for s in segments)
-        os.unlink(tmp_path)
-        return JSONResponse({"text": transcript, "language": info.language})
-    except Exception as e:
-        if os.path.exists(tmp_path):
-            os.unlink(tmp_path)
-        return JSONResponse({"ok": False, "error": str(e), "error_type": "transcription_failed"}, status_code=500)
-
-@app.get("/api/dreamer/status")
-async def dreamer_status(auth: str = Depends(require_auth)):
-    try:
-        state_path = Path.home() / "aaronai" / "dreamer_state.json"
-        if state_path.exists():
-            state = json.loads(state_path.read_text())
-        else:
-            state = {}
-        last_ts = state.get("last_dream_timestamp", 0)
-        last_dt = datetime.fromtimestamp(last_ts).strftime("%Y-%m-%d %H:%M") if last_ts else "never"
-        raw_mode = state.get("last_dream_mode", "none")
-        display_mode = "full pipeline" if raw_mode == "pipeline" else raw_mode
-        return JSONResponse({
-            "last_dream": last_dt,
-            "last_mode": display_mode,
-            "last_file": state.get("last_dream_file", ""),
-        })
-    except Exception as e:
-        return JSONResponse({"last_dream": "unknown", "last_mode": "none", "last_file": "", "error": str(e)})
-
-@app.post("/api/dreamer/run")
-async def run_dreamer(request: Request, auth: str = Depends(require_auth)):
-    try:
-        body = await request.json()
-        mode = body.get("mode", "nrem")
-        task = body.get("task", None)
-        dream_script = str(Path.home() / "aaronai" / "scripts" / "dream.py")
-        cmd = [PYTHON, dream_script, "--mode", mode]
-        if task:
-            cmd += ["--task", task]
-        subprocess.Popen(cmd, cwd=str(Path.home() / "aaronai"))
-        return JSONResponse({"started": True, "mode": mode})
-    except Exception as e:
-        return JSONResponse({"started": False, "error": str(e)})
-
-def transcribe_and_save(tmp_path, timestamp, nextcloud_url, nextcloud_user, nextcloud_password):
-    """Background task — transcribes audio and saves to Nextcloud after endpoint returns."""
-    import requests as req_lib
-    nc_auth = (nextcloud_user, nextcloud_password)
-    try:
-        segments, _ = whisper_model.transcribe(
-            tmp_path, language="en", vad_filter=True, initial_prompt=WHISPER_PROMPT
-        )
-        transcript = " ".join(s.text.strip() for s in segments).strip()
-        os.unlink(tmp_path)
-        if not transcript:
-            print(f"Async transcription empty for {timestamp} — nothing saved")
-            return
-        filename = f"{timestamp}-voice.md"
-        content_md = f"# Capture — {timestamp}\n\n**type:** voice\n**modality:** audio\n**status:** unprocessed\n\n---\n\n{transcript}\n"
-        captures_dir = f"{nextcloud_url}/remote.php/dav/files/{nextcloud_user}/Journal/Captures"
-        req_lib.request("MKCOL", captures_dir, auth=nc_auth, timeout=10)
-        url = f"{captures_dir}/{filename}"
-        req_lib.put(url, data=content_md.encode("utf-8"), auth=nc_auth, timeout=30)
-        print(f"Async transcription saved: {filename}")
-        # Notify SSE clients that transcription is complete
-        try:
-            import requests as _req
-            _req.post("http://localhost:8000/api/events/notify", json={
-                "type": "capture_saved",
-                "filename": filename,
-                "timestamp": timestamp,
-            }, timeout=3)
-            _req.post("http://localhost:8000/api/captures/events/notify", json={
-                "type": "capture_saved",
-                "filename": filename,
-                "timestamp": timestamp,
-            }, timeout=3)
-        except Exception:
-            pass
-    except Exception as e:
-        if os.path.exists(tmp_path):
-            os.unlink(tmp_path)
-        print(f"Async transcription failed for {timestamp}: {e}")
-
-
-@app.post("/api/capture")
-async def capture_endpoint(
-    background_tasks: BackgroundTasks,
-    audio: UploadFile = File(None),
-    image: UploadFile = File(None),
-    project: str = Form(None),
-):
-    """Auth-free capture endpoint — handles voice, image, or image+voice."""
-    import requests as req_lib
-    import base64
-
-    nextcloud_url = os.getenv("NEXTCLOUD_URL", "")
-    nextcloud_user = os.getenv("NEXTCLOUD_USER", "aaron")
-    nextcloud_password = os.getenv("NEXTCLOUD_PASSWORD", "")
-    nc_auth = (nextcloud_user, nextcloud_password)
-    timestamp = datetime.now().strftime("%Y-%m-%d-%H-%M")
-    month_dir = datetime.now().strftime("%Y-%m")
-
-    # ── Image + optional voice ───────────────────────────────────────────────
-    if image is not None:
-        tmp_audio_path = None
-        try:
-            # Read image bytes
-            image_bytes = await image.read()
-            image_content_type = image.content_type or "image/jpeg"
-            # Determine extension
-            ext_map = {"image/jpeg": "jpg", "image/png": "png", "image/webp": "webp", "image/heic": "jpg"}
-            img_ext = ext_map.get(image_content_type, "jpg")
-            img_filename = f"{timestamp}-image.{img_ext}"
-
-            # Save raw image to Media/YYYY-MM/ via WebDAV
-            media_dir = f"{nextcloud_url}/remote.php/dav/files/{nextcloud_user}/Journal/Media/{month_dir}"
-            req_lib.request("MKCOL", f"{nextcloud_url}/remote.php/dav/files/{nextcloud_user}/Journal/Media", auth=nc_auth, timeout=10)
-            req_lib.request("MKCOL", media_dir, auth=nc_auth, timeout=10)
-            media_url = f"{media_dir}/{img_filename}"
-            req_lib.put(media_url, data=image_bytes, auth=nc_auth,
-                       headers={"Content-Type": image_content_type}, timeout=60)
-
-            # Transcribe voice annotation if present
-            voice_annotation = None
-            if audio is not None and whisper_model:
-                audio_bytes = await audio.read()
-                suffix = ".webm"
-                if audio.content_type and "mp4" in audio.content_type:
-                    suffix = ".mp4"
-                elif audio.content_type and "ogg" in audio.content_type:
-                    suffix = ".ogg"
-                with tempfile.NamedTemporaryFile(suffix=suffix, delete=False) as tmp:
-                    tmp.write(audio_bytes)
-                    tmp_audio_path = tmp.name
-                segments, _ = whisper_model.transcribe(
-                    tmp_audio_path, language="en", vad_filter=True, initial_prompt=WHISPER_PROMPT
-                )
-                voice_annotation = " ".join(s.text.strip() for s in segments).strip() or None
-                os.unlink(tmp_audio_path)
-                tmp_audio_path = None
-
-            # Generate Claude vision description
-            image_b64 = base64.standard_b64encode(image_bytes).decode("utf-8")
-            annotation_line = f"Aaron said about this image: \"{voice_annotation}\"" if voice_annotation else ""
-            vision_prompt = f"""You are generating a memory description for an AI corpus belonging to Aaron Nelson — computational designer, fabrication researcher, and visual artist working in the Hudson Valley.
-
-Describe this image for long-term memory indexing.
-
-PERCEPTUAL: Composition, materials, light, color, texture, scale, spatial relationships. Be specific enough that this image could be distinguished from visually similar images.
-
-CONTENT: What is this? What domain does it belong to? What is it an instance of?
-
-{annotation_line}
-
-End your response with a single line in this exact format:
-ENTITIES: [comma-separated list of key entities — people, objects, materials, places, projects, tools]
-
-Keep the full description to 150-250 words. Do not speculate beyond what is visible or stated. Write as continuous prose followed by the ENTITIES line."""
-
-            vision_response = anthropic_client.messages.create(
-                model="claude-sonnet-4-6",
-                max_tokens=800,
-                messages=[{
-                    "role": "user",
-                    "content": [
-                        {
-                            "type": "image",
-                            "source": {
-                                "type": "base64",
-                                "media_type": image_content_type,
-                                "data": image_b64,
-                            }
-                        },
-                        {"type": "text", "text": vision_prompt}
-                    ]
-                }]
-            )
-            description = vision_response.content[0].text.strip()
-
-            # Build rich Graphiti-ready episode markdown
-            capture_type = "image+voice" if voice_annotation else "image"
-            modality = "visual+audio" if voice_annotation else "visual"
-            media_path = f"Journal/Media/{month_dir}/{img_filename}"
-
-            content_md = f"""# Capture — Image — {timestamp}
-
-**type:** {capture_type}
-**modality:** {modality}
-**status:** unprocessed
-**media:** {media_path}
-{f"**project:** {project}" if project else ""}
-
----
-
-**Visual description:**
-{description}
-
-**Voice annotation:**
-{voice_annotation if voice_annotation else "none recorded"}
-
----
-"""
-            # Save description to Journal/Captures/ via WebDAV
-            captures_dir = f"{nextcloud_url}/remote.php/dav/files/{nextcloud_user}/Journal/Captures"
-            req_lib.request("MKCOL", captures_dir, auth=nc_auth, timeout=10)
-            cap_filename = f"{timestamp}-image.md"
-            cap_url = f"{captures_dir}/{cap_filename}"
-            req_lib.put(cap_url, data=content_md.encode("utf-8"), auth=nc_auth, timeout=30)
-
-            return JSONResponse({
-                "ok": True,
-                "filename": cap_filename,
-                "media": media_path,
-                "has_voice": voice_annotation is not None,
-            })
-
-        except Exception as e:
-            if tmp_audio_path and os.path.exists(tmp_audio_path):
-                os.unlink(tmp_audio_path)
-            return JSONResponse({"ok": False, "error": str(e), "error_type": "transcription_failed"}, status_code=500)
-
-    # ── Voice only ───────────────────────────────────────────────────────────
-    elif audio is not None:
-        if not whisper_model:
-            raise HTTPException(status_code=503, detail="Whisper not available")
-        try:
-            suffix = ".webm"
-            if audio.content_type and "mp4" in audio.content_type:
-                suffix = ".mp4"
-            elif audio.content_type and "ogg" in audio.content_type:
-                suffix = ".ogg"
-            with tempfile.NamedTemporaryFile(suffix=suffix, delete=False) as tmp:
-                content_bytes = await audio.read()
-                tmp.write(content_bytes)
-                tmp_path = tmp.name
-            background_tasks.add_task(
-                transcribe_and_save,
-                tmp_path=tmp_path,
-                timestamp=timestamp,
-                nextcloud_url=nextcloud_url,
-                nextcloud_user=nextcloud_user,
-                nextcloud_password=nextcloud_password,
-            )
-            return JSONResponse({"ok": True, "filename": f"{timestamp}-voice.md", "async": True})
-        except Exception as e:
-            return JSONResponse({"ok": False, "error": str(e), "error_type": "capture_failed"})
-
-    else:
-        raise HTTPException(status_code=400, detail="No audio or image provided")
-
-@app.get("/api/captures")
-async def list_captures():
-    """Returns recent captures from Nextcloud Journal/Captures/ — auth-free"""
-    try:
-        import requests as req_lib
-        nextcloud_url = os.getenv("NEXTCLOUD_URL", "")
-        nextcloud_user = os.getenv("NEXTCLOUD_USER", "aaron")
-        nextcloud_password = os.getenv("NEXTCLOUD_PASSWORD", "")
-        captures_dir = f"{nextcloud_url}/remote.php/dav/files/{nextcloud_user}/Journal/Captures"
-        auth = (nextcloud_user, nextcloud_password)
-
-        propfind = req_lib.request("PROPFIND", captures_dir, auth=auth, timeout=10,
-            headers={"Depth": "1"})
-
-        if propfind.status_code == 404:
-            return JSONResponse({"captures": []})
-
-        import xml.etree.ElementTree as ET
-        root = ET.fromstring(propfind.text)
-        ns = {"d": "DAV:"}
-        captures = []
-        for resp in root.findall("d:response", ns):
-            href = resp.findtext("d:href", namespaces=ns) or ""
-            if href.endswith("/"):
-                continue
-            name = href.split("/")[-1]
-            if not name.endswith(".md"):
-                continue
-            captures.append({"name": name.replace(".md", ""), "duration": ""})
-
-        captures.sort(key=lambda x: x["name"], reverse=True)
-        return JSONResponse({"captures": captures[:10]})
-    except Exception as e:
-        return JSONResponse({"captures": []})
-
-@app.post("/api/reindex")
-async def trigger_reindex(auth: str = Depends(require_auth)):
-    try:
-        subprocess.Popen([PYTHON, INGEST_SCRIPT, NEXTCLOUD_PATH])
-        return JSONResponse({"started": True, "message": "Re-indexing started in background"})
-    except Exception as e:
-        return JSONResponse({"started": False, "error": str(e)})
-
-@app.delete("/api/conversations")
-async def clear_all_conversations(auth: str = Depends(require_auth)):
-    conn = sqlite3.connect(CONVERSATIONS_DB)
-    c = conn.cursor()
-    c.execute("DELETE FROM messages")
-    c.execute("DELETE FROM conversations")
-    conn.commit()
-    conn.close()
-    return JSONResponse({"cleared": True})
-
-
-
-# ─── Corpus Integrity Endpoints ─────────────────────────────────────────────
-
-CORPUS_INTEGRITY_SCRIPT = str(Path.home() / "aaronai" / "scripts" / "corpus_integrity.py")
-CORPUS_REPORT_PATH = Path.home() / "aaronai" / "corpus_integrity_report.json"
-SUPPORTED_EXTS = {".pdf", ".docx", ".pptx", ".txt", ".md"}
-MIGRATION_STATE_PATH = Path.home() / "aaronai" / "experiments" / "tier1_migration_state.json"
-
-
-def get_corpus_status_data():
-    fs_count = 0
-    try:
-        root = Path(NEXTCLOUD_PATH)
-        for path in root.rglob("*"):
-            if path.is_file() and path.suffix.lower() in SUPPORTED_EXTS:
-                if path.name.startswith((".", "~$")): continue
-                if "Admin/Backups" in str(path) or "Backups" in path.parts: continue
-                if "Journal/Media" in str(path): continue
-                fs_count += 1
-    except Exception:
-        pass
-
-    pv_count = 0
-    try:
-        pg = get_pg()
-        cur = pg.cursor()
-        cur.execute("SELECT COUNT(DISTINCT source) FROM embeddings WHERE source IS NOT NULL")
-        pv_count = cur.fetchone()[0]
-        pg.close()
-    except Exception:
-        pass
-
-    gr_sources = set()
-    try:
-        if MIGRATION_STATE_PATH.exists():
-            state = json.loads(MIGRATION_STATE_PATH.read_text())
-            for fp in state.get("ingested", []):
-                gr_sources.add(Path(fp).name)
-    except Exception:
-        pass
-    try:
-        pg = get_pg()
-        cur = pg.cursor()
-        cur.execute("SELECT DISTINCT source FROM stage_3_queue WHERE completed_at IS NOT NULL")
-        for row in cur.fetchall(): gr_sources.add(row[0])
-        pg.close()
-    except Exception:
-        pass
-
-    failures = []
-    try:
-        pg = get_pg()
-        cur = pg.cursor()
-        cur.execute("""
-            SELECT source, filepath, error, retry_count, first_failed_at, last_failed_at
-            FROM ingest_failures WHERE resolved = FALSE
-            ORDER BY last_failed_at DESC LIMIT 50
-        """)
-        for row in cur.fetchall():
-            failures.append({
-                "source": row[0], "filepath": row[1], "error": row[2],
-                "retry_count": row[3], "first_failed_at": str(row[4]),
-                "last_failed_at": str(row[5]),
-            })
-        pg.close()
-    except Exception:
-        pass
-
-    last_report = None
-    try:
-        if CORPUS_REPORT_PATH.exists():
-            report = json.loads(CORPUS_REPORT_PATH.read_text())
-            last_report = {
-                "timestamp": report.get("timestamp"),
-                "gaps": report.get("summary", {}).get("neither", 0),
-                "auto_queued": len(report.get("auto_queued", [])),
-            }
-    except Exception:
-        pass
-
-    return {
-        "filesystem": fs_count,
-        "pgvector": pv_count,
-        "graphiti": len(gr_sources),
-        "failures": failures,
-        "failure_count": len(failures),
-        "last_reconciliation": last_report,
-    }
-
-
-@app.get("/api/corpus/status")
-async def corpus_status(auth: str = Depends(require_auth)):
-    try:
-        return JSONResponse(get_corpus_status_data())
-    except Exception as e:
-        return JSONResponse({"error": str(e)}, status_code=500)
-
-
-@app.post("/api/corpus/retry")
-async def corpus_retry(request: Request, auth: str = Depends(require_auth)):
-    try:
-        body = await request.json()
-        source = body.get("source", "")
-        if not source:
-            return JSONResponse({"error": "source required"}, status_code=400)
-        pg = get_pg()
-        cur = pg.cursor()
-        cur.execute("SELECT filepath FROM ingest_failures WHERE source = %s", (source,))
-        row = cur.fetchone()
-        pg.close()
-        if not row:
-            return JSONResponse({"error": "source not found in failures"}, status_code=404)
-        filepath = Path(row[0])
-        if not filepath.exists():
-            return JSONResponse({"error": f"file not found: {filepath}"}, status_code=404)
-        suffix = filepath.suffix.lower()
-        text = ""
-        try:
-            if suffix in {".txt", ".md"}:
-                text = filepath.read_text(encoding="utf-8", errors="ignore")
-            elif suffix == ".pdf":
-                from pypdf import PdfReader
-                text = "".join(p.extract_text() + "\n" for p in PdfReader(filepath).pages if p.extract_text())
-            elif suffix == ".docx":
-                from docx import Document as DocxDocument
-                text = "\n".join(p.text for p in DocxDocument(filepath).paragraphs if p.text.strip())
-            elif suffix == ".pptx":
-                from pptx import Presentation
-                prs = Presentation(filepath)
-                text = "\n".join(shape.text for slide in prs.slides for shape in slide.shapes
-                                if hasattr(shape, "text") and shape.text.strip())
-        except Exception as e:
-            return JSONResponse({"error": f"extraction failed: {e}"}, status_code=500)
-        if not text.strip():
-            return JSONResponse({"error": "file produces empty text — may be corrupt"}, status_code=422)
-        pg = get_pg()
-        cur = pg.cursor()
-        cur.execute("""
-            INSERT INTO stage_2_queue (source, full_text, char_length)
-            VALUES (%s, %s, %s)
-            ON CONFLICT (source) DO UPDATE SET
-                full_text = EXCLUDED.full_text, char_length = EXCLUDED.char_length,
-                enqueued_at = NOW(), completed_at = NULL, failed_at = NULL, attempts = 0
-        """, (source, text[:50000], len(text)))
-        cur.execute("""
-            UPDATE ingest_failures SET retry_count = retry_count + 1, last_failed_at = NOW()
-            WHERE source = %s
-        """, (source,))
-        pg.commit()
-        pg.close()
-        return JSONResponse({"queued": True, "source": source})
-    except Exception as e:
-        return JSONResponse({"error": str(e)}, status_code=500)
-
-
-@app.post("/api/corpus/reconcile")
-async def corpus_reconcile(request: Request, background_tasks: BackgroundTasks, auth: str = Depends(require_auth)):
-    try:
-        body = await request.json()
-        fix = body.get("fix", True)
-    except Exception:
-        fix = True
-    def run_reconcile():
-        try:
-            cmd = [PYTHON, CORPUS_INTEGRITY_SCRIPT]
-            if fix:
-                cmd.append("--fix")
-            subprocess.run(cmd, cwd=str(Path.home() / "aaronai"), timeout=300)
-        except Exception as e:
-            print(f"Reconciliation failed: {e}")
-    background_tasks.add_task(run_reconcile)
-    return JSONResponse({"started": True, "fix": fix})
-
-# ─── Scheduler ──────────────────────────────────────────────────────────────
-scheduler = BackgroundScheduler()
-
-def run_dream_job():
-    """Runs nightly dreamer — full interdependent pipeline, no mode flag."""
-    try:
-        import subprocess
-        dream_script = str(Path.home() / "aaronai" / "scripts" / "dream.py")
-        result = subprocess.run(
-            [PYTHON, dream_script],
-            cwd=str(Path.home() / "aaronai"),
-            capture_output=True, text=True, timeout=600
-        )
-        print(f"Dreamer completed: {result.stdout[-200:] if result.stdout else 'no output'}")
-        if result.returncode != 0:
-            print(f"Dreamer error: {result.stderr[-200:] if result.stderr else 'unknown'}")
-    except Exception as e:
-        print(f"Dreamer job failed: {e}")
-
-def run_ingest_job():
-    """Runs nightly conversation indexing."""
-    try:
-        import subprocess
-        ingest_script = str(Path.home() / "aaronai" / "scripts" / "ingest_conversations.py")
-        result = subprocess.run(
-            [PYTHON, ingest_script],
-            cwd=str(Path.home() / "aaronai"),
-            capture_output=True, text=True, timeout=300
-        )
-        print(f"Ingest completed: {result.stdout[-200:] if result.stdout else 'no output'}")
-    except Exception as e:
-        print(f"Ingest job failed: {e}")
-
-def reschedule_jobs():
-    """Update scheduler from current settings."""
-    settings = load_settings()
-    # Remove existing jobs
-    for job_id in ("dream_job", "ingest_job"):
-        try:
-            scheduler.remove_job(job_id)
-        except:
-            pass
-    # Add dream job
-    scheduler.add_job(
-        run_dream_job,
-        CronTrigger(hour=settings.get("dream_hour_utc", 8),
-                   minute=settings.get("dream_minute_utc", 0),
-                   timezone="UTC"),
-        id="dream_job",
-        max_instances=1,
-        replace_existing=True
-    )
-    # Add ingest job
-    scheduler.add_job(
-        run_ingest_job,
-        CronTrigger(hour=settings.get("ingest_hour_utc", 2),
-                   minute=settings.get("ingest_minute_utc", 30),
-                   timezone="UTC"),
-        id="ingest_job",
-        max_instances=1,
-        replace_existing=True
-    )
-    print(f"Scheduled: dream at {settings.get('dream_hour_utc',8):02d}:{settings.get('dream_minute_utc',0):02d} UTC, ingest at {settings.get('ingest_hour_utc',2):02d}:{settings.get('ingest_minute_utc',30):02d} UTC")
-
-# SSE client registry
-sse_clients: list[asyncio.Queue] = []
-capture_sse_clients: list[asyncio.Queue] = []
-
-async def sse_generator(queue: asyncio.Queue):
-    try:
-        yield 'data: {"type": "connected"}\n\n'
-        while True:
-            try:
-                event = await asyncio.wait_for(queue.get(), timeout=30.0)
-                import json as _json
-                yield 'data: ' + _json.dumps(event) + '\n\n'
-            except asyncio.TimeoutError:
-                yield 'data: {"type": "heartbeat"}\n\n'
-
-    except asyncio.CancelledError:
-        pass
-    finally:
-        if queue in sse_clients:
-            sse_clients.remove(queue)
-
-async def capture_sse_generator(queue: asyncio.Queue):
-    try:
-        yield 'data: {"type": "connected"}\n\n'
-        while True:
-            try:
-                event = await asyncio.wait_for(queue.get(), timeout=30.0)
-                import json as _json
-                yield 'data: ' + _json.dumps(event) + '\n\n'
-            except asyncio.TimeoutError:
-                yield 'data: {"type": "heartbeat"}\n\n'
-    except asyncio.CancelledError:
-        pass
-    finally:
-        if queue in capture_sse_clients:
-            capture_sse_clients.remove(queue)
-
-@app.get("/api/captures/events")
-async def capture_sse_endpoint(request: Request):
-    """Public SSE endpoint for capture page — no auth required."""
-    queue: asyncio.Queue = asyncio.Queue()
-    capture_sse_clients.append(queue)
-    return StreamingResponse(
-        capture_sse_generator(queue),
-        media_type="text/event-stream",
-        headers={
-            "Cache-Control": "no-cache",
-            "X-Accel-Buffering": "no",
-            "Connection": "keep-alive",
-        }
-    )
-
-@app.post("/api/captures/events/notify")
-async def notify_capture_clients(request: Request):
-    """Internal endpoint — called when transcription completes."""
-    client_host = request.client.host if request.client else ""
-    if client_host not in ("127.0.0.1", "::1", "localhost"):
-        raise HTTPException(status_code=403, detail="Internal only")
-    data = await request.json()
-    for queue in capture_sse_clients:
-        await queue.put(data)
-    return JSONResponse({"notified": len(capture_sse_clients)})
-
-@app.get("/api/events")
-async def sse_endpoint(request: Request, auth: str = Depends(require_auth)):
-    queue: asyncio.Queue = asyncio.Queue()
-    sse_clients.append(queue)
-    return StreamingResponse(
-        sse_generator(queue),
-        media_type="text/event-stream",
-        headers={
-            "Cache-Control": "no-cache",
-            "X-Accel-Buffering": "no",
-            "Connection": "keep-alive",
-        }
-    )
-
-@app.post("/api/events/notify")
-async def notify_clients(request: Request):
-    """Internal endpoint — called by dream.py when a dream is delivered"""
-    # Only allow from localhost
-    client_host = request.client.host if request.client else ""
-    if client_host not in ("127.0.0.1", "::1", "localhost"):
-        raise HTTPException(status_code=403, detail="Internal only")
-    data = await request.json()
-    for queue in sse_clients:
-        await queue.put(data)
-    return JSONResponse({"notified": len(sse_clients)})
-
-if __name__ == "__main__":
-    uvicorn.run(app, host="0.0.0.0", port=8000)
diff --git a/scripts/consolidator_v0_1.py.bak b/scripts/consolidator_v0_1.py.bak
deleted file mode 100644
index 15eb08e..0000000
--- a/scripts/consolidator_v0_1.py.bak
+++ /dev/null
@@ -1,442 +0,0 @@
-"""
-Consolidator 0.1 — alias resolution agent for BirdAI's Tier 1 substrate.
-
-Reads entities from FalkorDB group_id 'aaron', infers light type labels,
-computes pairwise similarity within type blocks using ego summary embedding +
-name string distance + neighbor pattern overlap, generates merge proposals
-above threshold, writes proposal log for human review.
-
-Does NOT execute merges. 0.1 is the calibration phase — proposals only,
-human reviews before any action.
-"""
-import json
-import re
-import os
-from datetime import datetime, timezone
-from collections import defaultdict
-from pathlib import Path
-
-from falkordb import FalkorDB
-import numpy as np
-
-# Configuration
-GROUP_ID = "aaron"
-HIGH_CONFIDENCE_THRESHOLD = 0.85  # propose merge above this
-LOW_CONFIDENCE_THRESHOLD = 0.65   # log as low-confidence below
-PROPOSALS_DIR = Path("/home/aaron/Nextcloud/Journal/Consolidation")
-PROPOSALS_DIR.mkdir(parents=True, exist_ok=True)
-
-
-def cosine_similarity(a, b):
-    """Cosine similarity between two embedding vectors."""
-    a = np.array(a, dtype=np.float32)
-    b = np.array(b, dtype=np.float32)
-    na = np.linalg.norm(a)
-    nb = np.linalg.norm(b)
-    if na == 0 or nb == 0:
-        return 0.0
-    return float(np.dot(a, b) / (na * nb))
-
-
-def name_similarity(name_a, name_b):
-    """
-    Token-overlap-based name similarity.
-    Handles formal/informal pairs (Aaron / Aaron Nelson),
-    abbreviation pairs (HVAMC / Hudson Valley AMC),
-    and simple transcription noise.
-    """
-    a_lower = name_a.lower().strip()
-    b_lower = name_b.lower().strip()
-
-    if a_lower == b_lower:
-        return 1.0
-
-    # Tokenize
-    a_tokens = set(re.findall(r'\b\w+\b', a_lower))
-    b_tokens = set(re.findall(r'\b\w+\b', b_lower))
-
-    if not a_tokens or not b_tokens:
-        return 0.0
-
-    # Substring containment (handles "Aaron" in "Aaron Nelson")
-    if a_lower in b_lower or b_lower in a_lower:
-        # Strong signal but not 1.0 — different lengths
-        shorter = min(len(a_lower), len(b_lower))
-        longer = max(len(a_lower), len(b_lower))
-        return 0.7 + 0.2 * (shorter / longer)
-
-    # Token Jaccard (handles "Aaron Nelson" vs "Nelson, Aaron")
-    intersection = a_tokens & b_tokens
-    union = a_tokens | b_tokens
-    jaccard = len(intersection) / len(union)
-
-    # Acronym check (HVAMC vs Hudson Valley Additive Manufacturing Center)
-    def is_acronym(short, full):
-        if len(short) >= len(full):
-            return False
-        if not short.isupper():
-            short_upper = short.upper()
-        else:
-            short_upper = short
-        full_words = full.split()
-        if len(full_words) < 2:
-            return False
-        first_letters = ''.join(w[0].upper() for w in full_words if w)
-        return short_upper == first_letters or short_upper in first_letters
-
-    if is_acronym(name_a, name_b) or is_acronym(name_b, name_a):
-        return 0.85
-
-    return jaccard
-
-
-def infer_type(entity_name, summary):
-    """
-    Light type inference for blocking. Heuristic-based, transparent.
-    Returns one of: person, organization, project, place, concept, unknown.
-
-    NOT a precise classification — just enough to avoid obviously wrong
-    cross-type comparisons (person vs project). When in doubt, return
-    'unknown' which gets compared against everything.
-    """
-    name_lower = entity_name.lower().strip()
-    summary_lower = (summary or "").lower()
-
-    # Person: name patterns
-    person_indicators = [
-        # First+Last name pattern (two title-cased words, no other tokens)
-        bool(re.match(r'^[A-Z][a-z]+ [A-Z][a-z]+$', entity_name.strip())),
-        # Single name that's also in the summary as a person
-        any(phrase in summary_lower for phrase in [
-            'is a person', 'is a professor', 'is an artist', 'is a colleague',
-            'is a friend', 'is a family member', 'works at', 'studied at',
-            "'s spouse", "'s child", "'s parent", "'s student",
-        ]),
-    ]
-    if any(person_indicators):
-        return "person"
-
-    # Organization: company/institution indicators
-    org_indicators = [
-        any(suffix in name_lower for suffix in [
-            ' inc', ' llc', ' corp', ' company', ' university', ' college',
-            ' school', ' institute', ' foundation', ' department',
-        ]),
-        any(phrase in summary_lower for phrase in [
-            'is a company', 'is a university', 'is an organization',
-            'is an institution', 'is a department', 'is a nonprofit',
-        ]),
-    ]
-    if any(org_indicators):
-        return "organization"
-
-    # Project: software/creative work indicators
-    project_indicators = [
-        any(phrase in summary_lower for phrase in [
-            'is a project', 'software project', 'is a codebase',
-            'is a tool', 'is a system', 'is an application',
-            'is a research project', 'is a design project',
-        ]),
-        any(suffix in name_lower for suffix in [' project', ' system', ' platform']),
-    ]
-    if any(project_indicators):
-        return "project"
-
-    # Place: location indicators
-    place_indicators = [
-        any(phrase in summary_lower for phrase in [
-            'is a city', 'is a town', 'is a state', 'is a country',
-            'is a neighborhood', 'is a region', 'is a location',
-        ]),
-    ]
-    if any(place_indicators):
-        return "place"
-
-    # Default
-    return "unknown"
-
-
-def get_neighbors(graph, entity_uuid, limit=20):
-    """Get the names of entities connected to this entity (1-hop)."""
-    query = """
-    MATCH (e:Entity {uuid: $uuid})-[r:RELATES_TO]-(other:Entity)
-    RETURN DISTINCT other.name AS name
-    LIMIT $limit
-    """
-    result = graph.query(query, {"uuid": entity_uuid, "limit": limit})
-    return set(row[0] for row in result.result_set if row[0])
-
-
-def neighbor_jaccard(neighbors_a, neighbors_b):
-    """Jaccard similarity of two neighbor sets."""
-    if not neighbors_a and not neighbors_b:
-        return 0.0
-    intersection = neighbors_a & neighbors_b
-    union = neighbors_a | neighbors_b
-    if not union:
-        return 0.0
-    return len(intersection) / len(union)
-
-
-def get_edge_count(graph, entity_uuid):
-    query = """
-    MATCH (e:Entity {uuid: $uuid})-[r:RELATES_TO]-()
-    RETURN count(r) AS c
-    """
-    result = graph.query(query, {"uuid": entity_uuid})
-    return result.result_set[0][0] if result.result_set else 0
-
-
-def combine_signals(name_sim, ego_sim, neighbor_sim):
-    """
-    Combine the three similarity signals into a single confidence score.
-
-    Weighting based on DEG-RAG findings: ego info is essential, neighbor
-    cues help in some settings, name similarity is a strong tie-breaker
-    but not the primary signal.
-
-    For 0.1, simple weighted average with floor based on ego_sim alone.
-    """
-    # If ego similarity is very low, the entities probably aren't aliases
-    # regardless of name match (different concepts can share names)
-    if ego_sim < 0.4:
-        return min(0.5, ego_sim)
-
-    # If name is very similar AND ego is at least moderate, high confidence
-    if name_sim >= 0.85 and ego_sim >= 0.65:
-        return 0.5 * ego_sim + 0.3 * name_sim + 0.2 * neighbor_sim
-
-    # Standard weighted average
-    return 0.5 * ego_sim + 0.25 * name_sim + 0.25 * neighbor_sim
-
-
-def generate_proposals():
-    db = FalkorDB(host='localhost', port=6379)
-    graph = db.select_graph(GROUP_ID)
-
-    # Pull all entities with embeddings
-    print(f"Fetching entities from group_id '{GROUP_ID}'...")
-    result = graph.query("""
-    MATCH (n:Entity)
-    WHERE n.name_embedding IS NOT NULL AND n.summary IS NOT NULL
-    RETURN n.uuid, n.name, n.summary, n.name_embedding
-    """)
-
-    entities = []
-    for row in result.result_set:
-        entities.append({
-            'uuid': row[0],
-            'name': row[1],
-            'summary': row[2],
-            'embedding': row[3],
-        })
-    print(f"  Loaded {len(entities)} entities with embeddings")
-
-    # Infer types for blocking
-    print("Inferring entity types for blocking...")
-    type_counts = defaultdict(int)
-    for e in entities:
-        e['inferred_type'] = infer_type(e['name'], e['summary'])
-        type_counts[e['inferred_type']] += 1
-    for t, c in sorted(type_counts.items(), key=lambda x: -x[1]):
-        print(f"  {t}: {c}")
-
-    # Group by inferred type for blocking
-    blocks = defaultdict(list)
-    for e in entities:
-        blocks[e['inferred_type']].append(e)
-
-    # 'unknown' entities get compared against everything (they might be any type)
-    # Other types only get compared within their type block + against unknowns
-    print()
-    print("Comparing entities within type blocks...")
-    proposals = []
-    low_confidence = []
-    comparisons_done = 0
-
-    # Build comparison pairs
-    pairs_to_compare = []
-    typed_blocks = {t: ents for t, ents in blocks.items() if t != 'unknown'}
-    unknown_block = blocks.get('unknown', [])
-
-    # Within-type pairs (excluding unknown)
-    for t, ents in typed_blocks.items():
-        for i in range(len(ents)):
-            for j in range(i + 1, len(ents)):
-                pairs_to_compare.append((ents[i], ents[j]))
-
-    # Unknown vs unknown
-    for i in range(len(unknown_block)):
-        for j in range(i + 1, len(unknown_block)):
-            pairs_to_compare.append((unknown_block[i], unknown_block[j]))
-
-    # Unknown vs typed (unknowns might be any type)
-    for ent_unknown in unknown_block:
-        for t, ents in typed_blocks.items():
-            for ent_typed in ents:
-                pairs_to_compare.append((ent_unknown, ent_typed))
-
-    print(f"  Pairs to compare: {len(pairs_to_compare):,}")
-
-    # Compute similarities
-    cache_neighbors = {}
-    def neighbors_cached(uuid):
-        if uuid not in cache_neighbors:
-            cache_neighbors[uuid] = get_neighbors(graph, uuid)
-        return cache_neighbors[uuid]
-
-    for ent_a, ent_b in pairs_to_compare:
-        comparisons_done += 1
-        if comparisons_done % 5000 == 0:
-            print(f"  ... {comparisons_done:,} / {len(pairs_to_compare):,}")
-
-        # Quick filter: skip if name similarity is very low and names are clearly different
-        name_sim = name_similarity(ent_a['name'], ent_b['name'])
-        ego_sim_quick = cosine_similarity(ent_a['embedding'], ent_b['embedding'])
-
-        # Pre-filter to avoid expensive neighbor query on obviously different pairs
-        if ego_sim_quick < 0.5 and name_sim < 0.3:
-            continue
-
-        # Full comparison
-        neighbors_a = neighbors_cached(ent_a['uuid'])
-        neighbors_b = neighbors_cached(ent_b['uuid'])
-        neighbor_sim = neighbor_jaccard(neighbors_a, neighbors_b)
-
-        confidence = combine_signals(name_sim, ego_sim_quick, neighbor_sim)
-
-        record = {
-            'entity_a': {
-                'uuid': ent_a['uuid'],
-                'name': ent_a['name'],
-                'type': ent_a['inferred_type'],
-                'summary': ent_a['summary'][:200],
-                'edge_count': get_edge_count(graph, ent_a['uuid']),
-            },
-            'entity_b': {
-                'uuid': ent_b['uuid'],
-                'name': ent_b['name'],
-                'type': ent_b['inferred_type'],
-                'summary': ent_b['summary'][:200],
-                'edge_count': get_edge_count(graph, ent_b['uuid']),
-            },
-            'confidence': round(confidence, 3),
-            'signals': {
-                'name_similarity': round(name_sim, 3),
-                'ego_similarity': round(ego_sim_quick, 3),
-                'neighbor_overlap': round(neighbor_sim, 3),
-            },
-            'shared_neighbors': sorted(list(neighbors_a & neighbors_b))[:10],
-        }
-
-        if confidence >= HIGH_CONFIDENCE_THRESHOLD:
-            proposals.append(record)
-        elif confidence >= LOW_CONFIDENCE_THRESHOLD:
-            low_confidence.append(record)
-
-    print(f"\nDone. Proposals: {len(proposals)}, Low-confidence: {len(low_confidence)}")
-    return proposals, low_confidence, len(entities), len(pairs_to_compare)
-
-
-def write_proposals_log(proposals, low_confidence, total_entities, total_comparisons):
-    timestamp = datetime.now(timezone.utc).strftime("%Y-%m-%d-%H%M")
-    out_path = PROPOSALS_DIR / f"proposals-{timestamp}.md"
-
-    proposals_sorted = sorted(proposals, key=lambda p: -p['confidence'])
-    low_sorted = sorted(low_confidence, key=lambda p: -p['confidence'])
-
-    lines = []
-    lines.append(f"# Consolidator 0.1 — Run {timestamp}")
-    lines.append("")
-    lines.append("## Statistics")
-    lines.append(f"- Entities scanned: {total_entities:,}")
-    lines.append(f"- Pairwise comparisons: {total_comparisons:,}")
-    lines.append(f"- High-confidence proposals (≥{HIGH_CONFIDENCE_THRESHOLD}): {len(proposals)}")
-    lines.append(f"- Low-confidence candidates ({LOW_CONFIDENCE_THRESHOLD}-{HIGH_CONFIDENCE_THRESHOLD}): {len(low_confidence)}")
-    lines.append("")
-    lines.append("## How to review")
-    lines.append("")
-    lines.append("For each proposal, mark your decision by changing `[ ]` to one of:")
-    lines.append("- `[APPROVE]` — execute this merge on next run")
-    lines.append("- `[REJECT]` — don't merge, don't propose again")
-    lines.append("- `[DEFER]` — re-surface in next run for further consideration")
-    lines.append("")
-    lines.append("Save the file when done. Do not modify proposal_id or uuid fields.")
-    lines.append("")
-    lines.append("---")
-    lines.append("")
-    lines.append(f"## Proposed Merges (n={len(proposals)})")
-    lines.append("")
-
-    for i, p in enumerate(proposals_sorted, start=1):
-        lines.append(f"### Proposal {i}")
-        lines.append("")
-        lines.append(f"**Decision:** [ ]")
-        lines.append("")
-        lines.append(f"**Confidence:** {p['confidence']}")
-        lines.append("")
-        lines.append(f"**Entity A:** \"{p['entity_a']['name']}\" (type: {p['entity_a']['type']}, {p['entity_a']['edge_count']} edges)")
-        lines.append(f"  - uuid: `{p['entity_a']['uuid']}`")
-        lines.append(f"  - summary: {p['entity_a']['summary']}")
-        lines.append("")
-        lines.append(f"**Entity B:** \"{p['entity_b']['name']}\" (type: {p['entity_b']['type']}, {p['entity_b']['edge_count']} edges)")
-        lines.append(f"  - uuid: `{p['entity_b']['uuid']}`")
-        lines.append(f"  - summary: {p['entity_b']['summary']}")
-        lines.append("")
-        lines.append(f"**Signals:**")
-        lines.append(f"  - Name similarity: {p['signals']['name_similarity']}")
-        lines.append(f"  - Ego (summary) similarity: {p['signals']['ego_similarity']}")
-        lines.append(f"  - Neighbor overlap: {p['signals']['neighbor_overlap']}")
-        if p['shared_neighbors']:
-            shared_str = ', '.join(f'"{n}"' for n in p['shared_neighbors'][:8])
-            lines.append(f"  - Shared neighbors (sample): {shared_str}")
-        lines.append("")
-        lines.append("**Optional rejection note:** ")
-        lines.append("")
-        lines.append("---")
-        lines.append("")
-
-    lines.append("")
-    lines.append(f"## Low-Confidence Candidates (n={len(low_confidence)}, informational only, no action)")
-    lines.append("")
-    for p in low_sorted[:30]:
-        lines.append(f"- **{p['confidence']}** \"{p['entity_a']['name']}\" + \"{p['entity_b']['name']}\" (name={p['signals']['name_similarity']}, ego={p['signals']['ego_similarity']}, nbr={p['signals']['neighbor_overlap']})")
-    if len(low_sorted) > 30:
-        lines.append(f"- *(...{len(low_sorted) - 30} more not shown)*")
-
-    out_path.write_text("\n".join(lines))
-    print(f"\nProposal log written to: {out_path}")
-
-    # Also save raw JSON for downstream tooling
-    json_path = PROPOSALS_DIR / f"proposals-{timestamp}.json"
-    with open(json_path, 'w') as f:
-        json.dump({
-            'run_timestamp': timestamp,
-            'statistics': {
-                'total_entities': total_entities,
-                'total_comparisons': total_comparisons,
-                'proposal_count': len(proposals),
-                'low_confidence_count': len(low_confidence),
-            },
-            'proposals': proposals_sorted,
-            'low_confidence': low_sorted,
-        }, f, indent=2)
-    print(f"Raw JSON: {json_path}")
-
-
-def main():
-    print("=" * 70)
-    print("Consolidator 0.1 — Calibration Phase")
-    print("=" * 70)
-    print()
-
-    proposals, low_confidence, total_entities, total_comparisons = generate_proposals()
-    write_proposals_log(proposals, low_confidence, total_entities, total_comparisons)
-
-    print()
-    print("Next: review the proposals markdown file and mark APPROVE/REJECT/DEFER")
-    print("for each proposal. Re-run will read decisions and execute approved merges.")
-
-
-if __name__ == "__main__":
-    main()
diff --git a/scripts/corpus_integrity.py.bak.20260501-021703 b/scripts/corpus_integrity.py.bak.20260501-021703
deleted file mode 100644
index c936dba..0000000
--- a/scripts/corpus_integrity.py.bak.20260501-021703
+++ /dev/null
@@ -1,245 +0,0 @@
-#!/usr/bin/env python3
-"""
-corpus_integrity.py — BirdAI Corpus Integrity Check
-
-Compares three sources of truth:
-  1. Filesystem (Nextcloud) — what files exist
-  2. pgvector (embeddings table) — what's been through Stage 1
-  3. Graphiti (migration state + stage_3_queue) — what's been through Stage 3
-
-Usage:
-    python3 corpus_integrity.py              # report only
-    python3 corpus_integrity.py --fix        # report + auto-queue gaps for retry
-    python3 corpus_integrity.py --json       # output JSON to stdout
-"""
-
-import os
-import sys
-import json
-import argparse
-from pathlib import Path
-from datetime import datetime
-
-import psycopg2
-from dotenv import load_dotenv
-
-load_dotenv(Path.home() / "aaronai" / ".env", override=True)
-
-NEXTCLOUD_PATH  = "/home/aaron/nextcloud/data/data/aaron/files"
-MIGRATION_STATE = str(Path.home() / "aaronai" / "experiments" / "tier1_migration_state.json")
-REPORT_PATH     = str(Path.home() / "aaronai" / "corpus_integrity_report.json")
-SUPPORTED       = {".pdf", ".docx", ".pptx", ".txt", ".md"}
-PG_DSN          = os.getenv("PG_DSN")
-
-
-def get_pg():
-    return psycopg2.connect(PG_DSN)
-
-
-def get_filesystem_files():
-    files = []
-    root = Path(NEXTCLOUD_PATH)
-    for path in root.rglob("*"):
-        if path.is_dir(): continue
-        if path.suffix.lower() not in SUPPORTED: continue
-        if path.name.startswith((".", "~$")): continue
-        if "Admin/Backups" in str(path) or "Backups" in path.parts: continue
-        if "Journal/Media" in str(path): continue
-        files.append({"source": path.name, "filepath": str(path),
-                      "size": path.stat().st_size, "mtime": path.stat().st_mtime})
-    return files
-
-
-def get_pgvector_sources():
-    try:
-        pg = get_pg()
-        cur = pg.cursor()
-        cur.execute("SELECT DISTINCT source FROM embeddings WHERE source IS NOT NULL")
-        sources = {row[0] for row in cur.fetchall()}
-        pg.close()
-        return sources
-    except Exception as e:
-        print(f"ERROR: pgvector: {e}", file=sys.stderr)
-        return set()
-
-
-def get_graphiti_sources():
-    sources = set()
-    try:
-        state_path = Path(MIGRATION_STATE)
-        if state_path.exists():
-            state = json.loads(state_path.read_text())
-            for filepath in state.get("ingested", []):
-                sources.add(Path(filepath).name)
-    except Exception as e:
-        print(f"WARNING: migration state: {e}", file=sys.stderr)
-    try:
-        pg = get_pg()
-        cur = pg.cursor()
-        cur.execute("SELECT DISTINCT source FROM stage_3_queue WHERE completed_at IS NOT NULL")
-        for row in cur.fetchall(): sources.add(row[0])
-        pg.close()
-    except Exception as e:
-        print(f"WARNING: stage_3_queue: {e}", file=sys.stderr)
-    return sources
-
-
-def get_ingest_failures():
-    failures = {}
-    try:
-        pg = get_pg()
-        cur = pg.cursor()
-        cur.execute("""
-            SELECT source, filepath, error, retry_count, first_failed_at, last_failed_at
-            FROM ingest_failures WHERE resolved = FALSE ORDER BY last_failed_at DESC
-        """)
-        for row in cur.fetchall():
-            failures[row[0]] = {"source": row[0], "filepath": row[1], "error": row[2],
-                                "retry_count": row[3], "first_failed_at": str(row[4]),
-                                "last_failed_at": str(row[5])}
-        pg.close()
-    except Exception as e:
-        print(f"WARNING: ingest_failures: {e}", file=sys.stderr)
-    return failures
-
-
-def extract_text_for_retry(filepath):
-    path = Path(filepath)
-    suffix = path.suffix.lower()
-    try:
-        if suffix == ".docx":
-            from docx import Document as D
-            return "\n".join(p.text for p in D(path).paragraphs if p.text.strip())
-        elif suffix == ".pdf":
-            from pypdf import PdfReader
-            return "".join(p.extract_text() + "\n" for p in PdfReader(path).pages if p.extract_text())
-        elif suffix == ".pptx":
-            from pptx import Presentation
-            prs = Presentation(path)
-            return "\n".join(shape.text for slide in prs.slides for shape in slide.shapes
-                             if hasattr(shape, "text") and shape.text.strip())
-        elif suffix in {".txt", ".md"}:
-            return path.read_text(encoding="utf-8", errors="ignore")
-    except Exception as e:
-        print(f"WARNING: extraction failed {path.name}: {e}", file=sys.stderr)
-    return ""
-
-
-def queue_for_retry(source, full_text, filepath):
-    try:
-        pg = get_pg()
-        cur = pg.cursor()
-        cur.execute("""
-            INSERT INTO stage_2_queue (source, full_text, char_length)
-            VALUES (%s, %s, %s)
-            ON CONFLICT (source) DO UPDATE SET
-                full_text = EXCLUDED.full_text, char_length = EXCLUDED.char_length,
-                enqueued_at = NOW(), completed_at = NULL, failed_at = NULL, attempts = 0
-        """, (source, full_text[:50000], len(full_text)))
-        pg.commit()
-        pg.close()
-        return True
-    except Exception as e:
-        print(f"WARNING: queue failed {source}: {e}", file=sys.stderr)
-        return False
-
-
-def run_reconciliation(fix=False):
-    print(f"BirdAI Corpus Integrity Check — {datetime.now().isoformat()}")
-    print()
-    print("Scanning filesystem...")
-    fs_files = get_filesystem_files()
-    fs_sources = {f["source"]: f for f in fs_files}
-    print(f"  Filesystem: {len(fs_files)} files")
-    print("Querying pgvector...")
-    pv_sources = get_pgvector_sources()
-    print(f"  pgvector:   {len(pv_sources)} distinct sources")
-    print("Querying Graphiti...")
-    gr_sources = get_graphiti_sources()
-    print(f"  Graphiti:   {len(gr_sources)} sources")
-    print("Querying ingest failures...")
-    failures = get_ingest_failures()
-    print(f"  Failures:   {len(failures)} unresolved")
-    print()
-
-    both, pv_only, neither, gr_only = [], [], [], []
-    for source, finfo in fs_sources.items():
-        in_pv = source in pv_sources
-        in_gr = source in gr_sources
-        if in_pv and in_gr: both.append(finfo)
-        elif in_pv: pv_only.append(finfo)
-        elif in_gr: gr_only.append(finfo)
-        else: neither.append(finfo)
-
-    orphans_pv = pv_sources - set(fs_sources.keys())
-    orphans_gr = gr_sources - set(fs_sources.keys())
-
-    print(f"Results:")
-    print(f"  Both (pgvector + Graphiti): {len(both)}")
-    print(f"  pgvector only:              {len(pv_only)}")
-    print(f"  Neither (corpus gap):       {len(neither)}")
-    print(f"  Graphiti only:              {len(gr_only)}")
-    print(f"  Ingest failures:            {len(failures)}")
-    print(f"  pgvector orphans:           {len(orphans_pv)}")
-    print(f"  Graphiti orphans:           {len(orphans_gr)}")
-    print()
-
-    auto_queued = []
-    if fix and neither:
-        print(f"Auto-queuing {len(neither)} gap files...")
-        for finfo in neither:
-            text = extract_text_for_retry(finfo["filepath"])
-            if text.strip():
-                if queue_for_retry(finfo["source"], text, finfo["filepath"]):
-                    auto_queued.append(finfo["source"])
-                    print(f"  Queued: {finfo['source']}")
-            else:
-                print(f"  Skipped (unreadable): {finfo['source']}")
-                try:
-                    pg = get_pg()
-                    cur = pg.cursor()
-                    cur.execute("""
-                        INSERT INTO ingest_failures (source, filepath, error, retry_count, first_failed_at, last_failed_at)
-                        VALUES (%s, %s, %s, 0, NOW(), NOW())
-                        ON CONFLICT (source) DO UPDATE SET
-                            error = EXCLUDED.error,
-                            last_failed_at = NOW()
-                    """, (finfo["source"], finfo["filepath"],
-                          "Empty text — likely scanned, encrypted, or corrupt. Requires manual review or OCR."))
-                    pg.commit()
-                    pg.close()
-                except Exception as e:
-                    print(f"  WARNING: could not record failure: {e}")
-        print()
-
-    report = {
-        "timestamp": datetime.now().isoformat(),
-        "summary": {
-            "filesystem_total": len(fs_files), "pgvector_total": len(pv_sources),
-            "graphiti_total": len(gr_sources), "both": len(both),
-            "pgvector_only": len(pv_only), "neither": len(neither),
-            "graphiti_only": len(gr_only), "failures": len(failures),
-            "orphans_pgvector": len(orphans_pv), "orphans_graphiti": len(orphans_gr),
-        },
-        "gaps": [f["source"] for f in neither],
-        "failures": list(failures.values()),
-        "auto_queued": auto_queued,
-        "pgvector_only_sample": [f["source"] for f in pv_only[:20]],
-        "graphiti_only": list(gr_only),
-    }
-    Path(REPORT_PATH).write_text(json.dumps(report, indent=2))
-    print(f"Report written to: {REPORT_PATH}")
-    return report
-
-
-def main():
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--fix", action="store_true")
-    parser.add_argument("--json", action="store_true")
-    args = parser.parse_args()
-    report = run_reconciliation(fix=args.fix)
-    if args.json:
-        print(json.dumps(report, indent=2))
-
-if __name__ == "__main__":
-    main()
diff --git a/scripts/dream.py.bak b/scripts/dream.py.bak
deleted file mode 100644
index ed8da8d..0000000
--- a/scripts/dream.py.bak
+++ /dev/null
@@ -1,554 +0,0 @@
-"""
-Aaron AI Dreamer — Active Inference Engine
-Interdependent stage architecture grounded in sleep consolidation research.
-
-Nightly pipeline: NREM → Early REM → Late REM → Synthesis
-Each stage receives the previous stage's output as context.
-Lucid mode is on-demand only (Dream Now from settings).
-
-Research basis:
-- Singh et al. PNAS 2022: alternating NREM/REM outperforms single-stage approaches
-- Klinzing et al. Nature Neuroscience 2019: SO-spindle-ripple coupling is interdependent
-- REM operates on what NREM produced — stages are not discrete alternatives
-"""
-
-import os
-import json
-import sqlite3
-import argparse
-from pathlib import Path
-from datetime import datetime, timedelta
-from dotenv import load_dotenv
-import psycopg2
-import hashlib
-
-load_dotenv(Path.home() / "aaronai" / ".env")
-
-PG_DSN = os.getenv("PG_DSN")
-
-def get_pg():
-    return psycopg2.connect(PG_DSN)
-
-# ─── Paths ──────────────────────────────────────────────────────────────────
-CONVERSATIONS_DB = str(Path.home() / "aaronai" / "conversations.db")
-WATCHER_STATE    = str(Path.home() / "aaronai" / "watcher_state.json")
-DREAMER_STATE    = str(Path.home() / "aaronai" / "dreamer_state.json")
-JOURNAL_DIR      = "/home/aaron/nextcloud/data/data/aaron/files/Journal/Daily"
-
-NEXTCLOUD_URL      = os.getenv("NEXTCLOUD_URL", "https://nextcloud.aaronnelson.studio")
-NEXTCLOUD_USER     = os.getenv("NEXTCLOUD_USER", "aaron")
-NEXTCLOUD_PASSWORD = os.getenv("NEXTCLOUD_PASSWORD", "")
-DREAMS_WEBDAV      = f"{NEXTCLOUD_URL}/remote.php/dav/files/{NEXTCLOUD_USER}/Journal/Dreams"
-
-# Similarity ranges calibrated for all-MiniLM-L6-v2
-MODE_RANGES = {
-    "nrem":      (0.48, 0.72),
-    "early-rem": (0.38, 0.55),
-    "late-rem":  (0.22, 0.42),
-    "lucid":     (0.32, 0.72),
-}
-
-# ─── Prompt versioning ──────────────────────────────────────────────────────
-# Bump the relevant constant manually when changing a prompt.
-PROMPT_VERSION_NREM = "1.0"
-PROMPT_VERSION_EREM = "1.1"
-PROMPT_VERSION_LREM = "1.2"
-PROMPT_VERSION_SYN  = "1.0"
-
-def prompt_signature():
-    return (f"nrem={PROMPT_VERSION_NREM}|erem={PROMPT_VERSION_EREM}"
-            f"|lrem={PROMPT_VERSION_LREM}|syn={PROMPT_VERSION_SYN}")
-
-def prompt_hash(prompts: list[str]) -> str:
-    combined = "".join(prompts)
-    return hashlib.md5(combined.encode()).hexdigest()[:8]
-
-# ─── Stage 1: Observe ───────────────────────────────────────────────────────
-
-def observe_corpus():
-    state = load_dreamer_state()
-    last_dream = state.get("last_dream_timestamp", 0)
-    new_chunk_count = 0
-    try:
-        watcher_state = json.loads(Path(WATCHER_STATE).read_text())
-        for path, mtime in watcher_state.items():
-            if float(mtime) > last_dream:
-                new_chunk_count += 1
-    except:
-        pass
-    days_since = (datetime.now().timestamp() - last_dream) / 86400
-    recent_topics = get_recent_conversation_topics()
-    return {
-        "new_chunks": new_chunk_count,
-        "days_since_dream": days_since,
-        "recent_topics": recent_topics,
-        "last_dream": last_dream,
-    }
-
-def get_recent_conversation_topics(days=14):
-    try:
-        conn = sqlite3.connect(CONVERSATIONS_DB)
-        cutoff = (datetime.now() - timedelta(days=days)).isoformat()
-        c = conn.cursor()
-        c.execute("""
-            SELECT m.content FROM messages m
-            JOIN conversations c ON m.conversation_id = c.id
-            WHERE m.role = 'user' AND c.updated_at > ?
-            ORDER BY m.timestamp DESC LIMIT 20
-        """, (cutoff,))
-        rows = c.fetchall()
-        conn.close()
-        return [r[0][:200] for r in rows]
-    except:
-        return []
-
-# ─── Stage 2: Retrieve ──────────────────────────────────────────────────────
-
-def retrieve(mode, task=None, n_results=8):
-    from sentence_transformers import SentenceTransformer
-    embedder = SentenceTransformer("all-MiniLM-L6-v2")
-    low, high = MODE_RANGES[mode]
-
-    if task:
-        query = task
-    elif mode == "late-rem":
-        delta = observe_corpus()
-        topics = delta.get("recent_topics", [])
-        query = topics[0] if topics else "practice place memory making"
-    elif mode == "early-rem":
-        query = "career decision personal change what matters next"
-    else:
-        query = "research fabrication teaching practice recent work"
-
-    embedding = embedder.encode([query]).tolist()[0]
-    chunks = []
-    seen_sources = set()
-
-    try:
-        pg = get_pg()
-        cur = pg.cursor()
-        cur.execute("""
-            SELECT document, source, 1 - (embedding <=> %s::vector) as similarity
-            FROM embeddings
-            ORDER BY embedding <=> %s::vector
-            LIMIT %s
-        """, (embedding, embedding, n_results * 3))
-
-        for doc, source, similarity in cur.fetchall():
-            if not (low <= similarity <= high):
-                continue
-            if source in seen_sources:
-                continue
-            chunks.append({
-                "source": source or "unknown",
-                "content": doc,
-                "relevance": similarity,
-            })
-            seen_sources.add(source)
-            if len(chunks) >= n_results:
-                break
-        pg.close()
-    except Exception as e:
-        print(f"pgvector retrieval error: {e}")
-
-    return chunks
-
-# ─── Stage 3: Synthesize ────────────────────────────────────────────────────
-
-def synthesize_nrem(chunks):
-    chunk_text = "\n\n---\n\n".join([f"[{c['source']}]\n{c['content']}" for c in chunks])
-    prompt = f"""You have read everything Aaron Nelson has written and published.
-You are a careful colleague who noticed something this week.
-
-Here is material from his corpus:
-
-{chunk_text}
-
-Write to Aaron directly. Identify one specific connection between
-this material and something he wrote or worked on previously.
-Stay close to the documents — cite them specifically by name.
-Do not speculate beyond what the material supports. Do not use
-headers or bullet points. Write one paragraph of 200-300 words
-that ends with a single concrete question he could act on."""
-    return _call_claude(prompt)
-
-
-def synthesize_early_rem(chunks, nrem_output):
-    # v1.1 — removed citation instruction, removed close-friend persona,
-    # shifted register from analysis to recognition.
-    chunk_text = "\n\n---\n\n".join([f"[{c['source']}]\n{c['content']}" for c in chunks])
-    prompt = f"""Something was noticed earlier tonight, moving through Aaron's recent work:
-
-{nrem_output}
-
-That observation is still with you. Now here is material from a different
-time — pulled from further back, from different parts of his corpus:
-
-{chunk_text}
-
-You are not analyzing. You are recognizing.
-
-Something in the earlier observation and something in this older material
-are the same thing wearing different clothes. Find it. Don't explain why
-they're connected — just let the connection speak. Write from inside the
-recognition, not from above it.
-
-The emotional register underneath the career logic is more interesting
-than the career logic. The pattern that has been repeating longer than
-he has been aware of it is more interesting than the current instance.
-
-Write directly to Aaron. No citations, no references, no analysis.
-First person, present tense. Let what you noticed arrive rather than
-be delivered. 150-250 words. End with one thing that is true that
-he probably already knows but hasn't said out loud yet."""
-    return _call_claude(prompt)
-
-
-def synthesize_late_rem(chunks, nrem_output, early_rem_output):
-    chunk_text = "\n\n---\n\n".join([f"[{c['source']}]\n{c['content']}" for c in chunks])
-    prompt = f"""You have been moving through Aaron Nelson's corpus all night.
-First you found this, in the careful light of early consolidation:
-
-{nrem_output}
-
-Then, in the more personal territory that followed:
-
-{early_rem_output}
-
-Now it is late. The boundaries between things have loosened.
-Here is material pulled from opposite ends of his work:
-
-{chunk_text}
-
-Do not explain the connections between all of this.
-Do not resolve them. Do not summarize what came before.
-Something stranger is possible now — let the accumulated
-material from the night find its own shape. Compressed,
-associative, slightly off. Let the strangeness stand.
-
-No headers. No bullet points. No hedging. No resolution.
-No offer. End mid-thought if that is where the material ends.
-150-250 words."""
-    return _call_claude(prompt)
-
-
-def synthesize_final(nrem_output, early_rem_output, late_rem_output):
-    prompt = f"""You have spent the night moving through Aaron Nelson's corpus
-in three passes, each building on the last.
-
-The first pass — careful, close to the documents:
-{nrem_output}
-
-The second pass — more personal, following what the first opened:
-{early_rem_output}
-
-The third pass — associative, strange, letting things touch that
-don't normally touch:
-{late_rem_output}
-
-Now synthesize. Not a summary — a synthesis. Find what runs through
-all three that none of them said directly. The thing that only becomes
-visible when you hold all three passes together.
-
-Write it as a single unbroken piece. No headers, no bullet points,
-no stage labels. 200-300 words. End with the one question that
-matters most right now."""
-    return _call_claude(prompt, max_tokens=800)
-
-
-def synthesize_lucid(chunks, task):
-    chunk_text = "\n\n---\n\n".join([f"[{c['source']}]\n{c['content']}" for c in chunks])
-    prompt = f"""Aaron has a question he is sitting with:
-
-{task or "What should I be thinking about that I am not?"}
-
-You have searched his entire corpus and found material that
-speaks to this question from unexpected directions. Here is
-what you found:
-
-{chunk_text}
-
-Do not summarize. Do not list. Pick the most interesting
-tension between what the corpus contains and what he is
-asking, and follow it through to its conclusion. Cite
-specific documents by name. Be direct about what you think.
-No headers, no bullet points. 250-400 words.
-End with an offer to work on it together."""
-    return _call_claude(prompt)
-
-
-def _call_claude(prompt, max_tokens=1000):
-    import anthropic
-    client = anthropic.Anthropic(api_key=os.getenv("ANTHROPIC_API_KEY"))
-    response = client.messages.create(
-        model="claude-sonnet-4-6",
-        max_tokens=max_tokens,
-        messages=[{"role": "user", "content": prompt}]
-    )
-    return response.content[0].text
-
-# ─── Stage 4: Deliver ───────────────────────────────────────────────────────
-
-def deliver(dream_text, mode, task=None):
-    import requests
-    date_str = datetime.now().strftime("%Y-%m-%d")
-    filename = f"{date_str}-{mode}.md"
-    header = f"# Dream — {mode.upper()} — {datetime.now().strftime('%Y-%m-%d %H:%M')}\n"
-    header += f"*prompt_sig: {prompt_signature()}*\n\n"
-    if task:
-        header += f"*Task: {task}*\n\n"
-    header += "---\n\n"
-    content = header + dream_text
-
-    auth = (NEXTCLOUD_USER, NEXTCLOUD_PASSWORD)
-    requests.request("MKCOL", DREAMS_WEBDAV, auth=auth, timeout=10)
-
-    url = f"{DREAMS_WEBDAV}/{filename}"
-    counter = 1
-    while True:
-        check = requests.request("PROPFIND", url, auth=auth, timeout=10)
-        if check.status_code == 404:
-            break
-        filename = f"{date_str}-{mode}-{counter}.md"
-        url = f"{DREAMS_WEBDAV}/{filename}"
-        counter += 1
-
-    response = requests.put(url, data=content.encode("utf-8"), auth=auth, timeout=30)
-    response.raise_for_status()
-    print(f"Delivered: Journal/Dreams/{filename}")
-    return f"Journal/Dreams/{filename}"
-
-def notify_sse(mode, filename):
-    try:
-        import requests
-        requests.post("http://localhost:8000/api/events/notify", json={
-            "type": "dream",
-            "mode": mode,
-            "filename": filename,
-            "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M"),
-        }, timeout=3)
-    except Exception as e:
-        print(f"SSE notify failed (non-critical): {e}")
-
-# ─── State ──────────────────────────────────────────────────────────────────
-
-def load_dreamer_state():
-    p = Path(DREAMER_STATE)
-    if p.exists():
-        try:
-            return json.loads(p.read_text())
-        except:
-            pass
-    return {}
-
-def save_dreamer_state(state):
-    Path(DREAMER_STATE).write_text(json.dumps(state, indent=2))
-
-# ─── Orchestrators ───────────────────────────────────────────────────────────
-
-def write_manifest(date_str, stage_data, corpus_data):
-    import requests
-    manifest = {
-        "date": date_str,
-        "prompt_sig": prompt_signature(),
-        "prompt_hash": prompt_hash([
-            synthesize_nrem.__doc__ or "",
-            synthesize_early_rem.__doc__ or "",
-            synthesize_late_rem.__doc__ or "",
-            synthesize_final.__doc__ or "",
-        ]),
-        "stages": stage_data,
-        "corpus": corpus_data,
-        "rating": None,
-        "notes": "",
-    }
-    content = json.dumps(manifest, indent=2)
-    auth = (NEXTCLOUD_USER, NEXTCLOUD_PASSWORD)
-    url = f"{DREAMS_WEBDAV}/dream-manifest-{date_str}.json"
-    try:
-        requests.put(url, data=content.encode("utf-8"), auth=auth, timeout=30)
-        print(f"Manifest written: Journal/Dreams/dream-manifest-{date_str}.json")
-    except Exception as e:
-        print(f"Manifest write failed (non-critical): {e}")
-
-
-def dream_pipeline():
-    """
-    Full nightly pipeline — interdependent stages.
-    NREM output feeds Early REM. Both feed Late REM. All three feed Synthesis.
-    """
-    print(f"Dreamer pipeline starting — {datetime.now().strftime('%Y-%m-%d %H:%M')}")
-
-    delta = observe_corpus()
-    print(f"Corpus: {delta['new_chunks']} new chunks, {delta['days_since_dream']:.1f} days since last dream")
-
-    # ── Stage 1: NREM ──────────────────────────────────────────────────────
-    print("\n[NREM] Retrieving...")
-    nrem_chunks = retrieve("nrem")
-    if not nrem_chunks:
-        print("[NREM] No suitable chunks — aborting pipeline")
-        return None
-
-    print(f"[NREM] Retrieved {len(nrem_chunks)} chunks. Synthesizing...")
-    nrem_output = synthesize_nrem(nrem_chunks)
-    nrem_file = deliver(nrem_output, "nrem")
-    stage_data = {
-        "nrem": {
-            "chunks_retrieved": len(nrem_chunks),
-            "avg_similarity": round(sum(c["relevance"] for c in nrem_chunks) / len(nrem_chunks), 3),
-            "query": "research fabrication teaching practice recent work",
-            "word_count": len(nrem_output.split()),
-            "status": "ok",
-        }
-    }
-    print(f"[NREM] Done.\n{nrem_output[:200]}...")
-
-    # ── Stage 2: Early REM — informed by NREM ──────────────────────────────
-    print("\n[Early REM] Retrieving...")
-    early_chunks = retrieve("early-rem")
-    if not early_chunks:
-        print("[Early REM] No suitable chunks — skipping")
-        early_rem_output = nrem_output  # fallback
-    else:
-        print(f"[Early REM] Retrieved {len(early_chunks)} chunks. Synthesizing with NREM context...")
-        early_rem_output = synthesize_early_rem(early_chunks, nrem_output)
-        deliver(early_rem_output, "early-rem")
-        stage_data["early_rem"] = {
-            "chunks_retrieved": len(early_chunks),
-            "avg_similarity": round(sum(c["relevance"] for c in early_chunks) / len(early_chunks), 3),
-            "query": "career decision personal change what matters next",
-            "word_count": len(early_rem_output.split()),
-            "status": "ok",
-        }
-        print(f"[Early REM] Done.\n{early_rem_output[:200]}...")
-
-    # ── Stage 3: Late REM — informed by NREM + Early REM ──────────────────
-    print("\n[Late REM] Retrieving...")
-    late_chunks = retrieve("late-rem")
-    if not late_chunks:
-        print("[Late REM] No suitable chunks — skipping")
-        late_rem_output = early_rem_output  # fallback
-    else:
-        print(f"[Late REM] Retrieved {len(late_chunks)} chunks. Synthesizing with full context...")
-        late_rem_output = synthesize_late_rem(late_chunks, nrem_output, early_rem_output)
-        deliver(late_rem_output, "late-rem")
-        stage_data["late_rem"] = {
-            "chunks_retrieved": len(late_chunks),
-            "avg_similarity": round(sum(c["relevance"] for c in late_chunks) / len(late_chunks), 3),
-            "query": "practice place memory making",
-            "word_count": len(late_rem_output.split()),
-            "status": "ok",
-        }
-        print(f"[Late REM] Done.\n{late_rem_output[:200]}...")
-
-    # ── Stage 4: Synthesis — all three stages ─────────────────────────────
-    print("\n[Synthesis] Integrating all stages...")
-    synthesis_output = synthesize_final(nrem_output, early_rem_output, late_rem_output)
-    synthesis_file = deliver(synthesis_output, "synthesis")
-    stage_data["synthesis"] = {
-        "word_count": len(synthesis_output.split()),
-        "status": "ok",
-    }
-
-    print(f"\n{'='*60}")
-    print("SYNTHESIS:")
-    print(synthesis_output)
-    print(f"{'='*60}")
-
-    # Write manifest
-    corpus_data = {
-        "total_chunks": delta.get("new_chunks", 0),
-        "new_chunks_since_last_dream": delta.get("new_chunks", 0),
-        "days_since_last_dream": round(delta.get("days_since_dream", 0), 2),
-    }
-    write_manifest(datetime.now().strftime("%Y-%m-%d"), stage_data, corpus_data)
-
-    # Update state and notify
-    state = load_dreamer_state()
-    state["last_dream_timestamp"] = datetime.now().timestamp()
-    state["last_dream_mode"] = "pipeline"
-    state["last_dream_file"] = synthesis_file
-    save_dreamer_state(state)
-
-    notify_sse("synthesis", synthesis_file.split("/")[-1])
-    print(f"\nPipeline complete. Synthesis: {synthesis_file}")
-    return synthesis_file
-
-
-def dream_lucid(task):
-    """On-demand lucid dream — single mode, used by Dream Now in settings."""
-    print(f"Lucid dream starting — task: {task[:80] if task else 'none'}")
-    chunks = retrieve("lucid", task=task)
-    if not chunks:
-        print("No suitable chunks — aborting")
-        return None
-    print(f"Retrieved {len(chunks)} chunks. Synthesizing...")
-    output = synthesize_lucid(chunks, task)
-    filepath = deliver(output, "lucid", task=task)
-
-    state = load_dreamer_state()
-    state["last_dream_timestamp"] = datetime.now().timestamp()
-    state["last_dream_mode"] = "lucid"
-    state["last_dream_file"] = filepath
-    save_dreamer_state(state)
-
-    notify_sse("lucid", filepath.split("/")[-1])
-    print(f"\n{'='*60}")
-    print(output)
-    print(f"{'='*60}")
-    print(f"\nDelivered to {filepath}")
-    return filepath
-
-
-def dream_single(mode, task=None):
-    """
-    Single mode — used by Dream Now for non-lucid modes.
-    Runs one stage independently (for testing/tuning individual stages).
-    """
-    print(f"Single mode dream: {mode}")
-    chunks = retrieve(mode, task=task)
-    if not chunks:
-        print("No suitable chunks — aborting")
-        return None
-    print(f"Retrieved {len(chunks)} chunks. Synthesizing...")
-
-    if mode == "nrem":
-        output = synthesize_nrem(chunks)
-    elif mode == "early-rem":
-        output = synthesize_early_rem(chunks, "")
-    elif mode == "late-rem":
-        output = synthesize_late_rem(chunks, "", "")
-    else:
-        output = synthesize_lucid(chunks, task)
-
-    filepath = deliver(output, mode, task=task)
-
-    state = load_dreamer_state()
-    state["last_dream_timestamp"] = datetime.now().timestamp()
-    state["last_dream_mode"] = mode
-    state["last_dream_file"] = filepath
-    save_dreamer_state(state)
-
-    notify_sse(mode, filepath.split("/")[-1])
-    print(f"\n{'='*60}")
-    print(output)
-    print(f"{'='*60}")
-    print(f"\nDelivered to {filepath}")
-    return filepath
-
-
-# ─── CLI ────────────────────────────────────────────────────────────────────
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description="Aaron AI Dreamer")
-    parser.add_argument("--mode", choices=["nrem", "early-rem", "late-rem", "lucid", "pipeline"])
-    parser.add_argument("--task", type=str)
-    args = parser.parse_args()
-
-    if args.mode == "lucid":
-        dream_lucid(args.task or "What should I be thinking about that I am not?")
-    elif args.mode and args.mode != "pipeline":
-        dream_single(args.mode, args.task)
-    else:
-        # Default: full pipeline
-        dream_pipeline()
diff --git a/scripts/dream.py.bak.20260501-002209 b/scripts/dream.py.bak.20260501-002209
deleted file mode 100644
index 77f851e..0000000
--- a/scripts/dream.py.bak.20260501-002209
+++ /dev/null
@@ -1,668 +0,0 @@
-"""
-Aaron AI Dreamer — Active Inference Engine
-Interdependent stage architecture grounded in sleep consolidation research.
-
-Nightly pipeline: NREM → Early REM → Late REM → Synthesis
-Each stage receives the previous stage's output as context.
-Lucid mode is on-demand only (Dream Now from settings).
-
-Research basis:
-- Singh et al. PNAS 2022: alternating NREM/REM outperforms single-stage approaches
-- Klinzing et al. Nature Neuroscience 2019: SO-spindle-ripple coupling is interdependent
-- REM operates on what NREM produced — stages are not discrete alternatives
-"""
-
-import os
-import json
-import sqlite3
-import argparse
-from pathlib import Path
-from datetime import datetime, timedelta
-from dotenv import load_dotenv
-import psycopg2
-import hashlib
-
-load_dotenv(Path.home() / "aaronai" / ".env", override=True)
-
-PG_DSN = os.getenv("PG_DSN")
-
-def get_pg():
-    return psycopg2.connect(PG_DSN)
-
-# ─── Paths ──────────────────────────────────────────────────────────────────
-CONVERSATIONS_DB = str(Path.home() / "aaronai" / "conversations.db")
-WATCHER_STATE    = str(Path.home() / "aaronai" / "watcher_state.json")
-DREAMER_STATE    = str(Path.home() / "aaronai" / "dreamer_state.json")
-JOURNAL_DIR      = "/home/aaron/nextcloud/data/data/aaron/files/Journal/Daily"
-
-NEXTCLOUD_URL      = os.getenv("NEXTCLOUD_URL", "https://nextcloud.aaronnelson.studio")
-NEXTCLOUD_USER     = os.getenv("NEXTCLOUD_USER", "aaron")
-NEXTCLOUD_PASSWORD = os.getenv("NEXTCLOUD_PASSWORD", "")
-DREAMS_WEBDAV      = f"{NEXTCLOUD_URL}/remote.php/dav/files/{NEXTCLOUD_USER}/Journal/Dreams"
-
-# Similarity ranges calibrated for all-MiniLM-L6-v2
-MODE_RANGES = {
-    "nrem":      (0.48, 0.72),
-    "early-rem": (0.38, 0.55),
-    "late-rem":  (0.22, 0.42),
-    "lucid":     (0.32, 0.72),
-}
-DREAMER_VERSION = "1.1"  # 1.0=original exclusion logic; 1.1=score-band exclusion
-
-# ─── Prompt versioning ──────────────────────────────────────────────────────
-# Bump the relevant constant manually when changing a prompt.
-PROMPT_VERSION_NREM = "1.0"
-PROMPT_VERSION_EREM = "1.1"
-PROMPT_VERSION_LREM = "1.2"
-PROMPT_VERSION_SYN  = "1.0"
-
-def prompt_signature():
-    return (f"nrem={PROMPT_VERSION_NREM}|erem={PROMPT_VERSION_EREM}"
-            f"|lrem={PROMPT_VERSION_LREM}|syn={PROMPT_VERSION_SYN}")
-
-def prompt_hash(prompts: list[str]) -> str:
-    combined = "".join(prompts)
-    return hashlib.md5(combined.encode()).hexdigest()[:8]
-
-def extract_folder(source_path):
-    """Extract top-level Nextcloud folder from source path."""
-    parts = source_path.replace("\\", "/").split("/")
-    return parts[0] if parts else "unknown"
-
-# ─── Stage 1: Observe ───────────────────────────────────────────────────────
-
-def observe_corpus():
-    state = load_dreamer_state()
-    last_dream = state.get("last_dream_timestamp", 0)
-    new_chunk_count = 0
-    try:
-        watcher_state = json.loads(Path(WATCHER_STATE).read_text())
-        for path, mtime in watcher_state.items():
-            if float(mtime) > last_dream:
-                new_chunk_count += 1
-    except:
-        pass
-    days_since = (datetime.now().timestamp() - last_dream) / 86400
-    recent_topics = get_recent_conversation_topics()
-    return {
-        "new_chunks": new_chunk_count,
-        "days_since_dream": days_since,
-        "recent_topics": recent_topics,
-        "last_dream": last_dream,
-    }
-
-def get_recent_conversation_topics(days=14):
-    try:
-        conn = sqlite3.connect(CONVERSATIONS_DB)
-        cutoff = (datetime.now() - timedelta(days=days)).isoformat()
-        c = conn.cursor()
-        c.execute("""
-            SELECT m.content FROM messages m
-            JOIN conversations c ON m.conversation_id = c.id
-            WHERE m.role = 'user' AND c.updated_at > ?
-            ORDER BY m.timestamp DESC LIMIT 20
-        """, (cutoff,))
-        rows = c.fetchall()
-        conn.close()
-        return [r[0][:200] for r in rows]
-    except:
-        return []
-
-# ─── Stage 2: Retrieve ──────────────────────────────────────────────────────
-
-
-def retrieve_graphiti(mode, task=None, n_results=8):
-    """E3 experiment — Graphiti substrate retrieval.
-    Queries Graphiti /search endpoint instead of pgvector.
-    Returns chunks in same format as retrieve() for pipeline compatibility.
-    Note: content is Graphiti facts (synthesized relationships), not raw chunks.
-    """
-    import requests as req_lib
-    if task:
-        query = task
-    elif mode == "late-rem":
-        delta = observe_corpus()
-        topics = delta.get("recent_topics", [])
-        query = topics[0] if topics else "practice place memory making"
-    elif mode == "early-rem":
-        query = "career decision personal change what matters next"
-    else:
-        query = "research fabrication teaching practice recent work"
-
-    try:
-        resp = req_lib.get(
-            "http://localhost:8001/search",
-            params={"query": query, "limit": n_results, "group_id": "aaron"},
-            timeout=30,
-        )
-        resp.raise_for_status()
-        results = resp.json().get("results", [])
-        chunks = []
-        for r in results:
-            fact = r.get("fact", "")
-            if not fact.strip():
-                continue
-            chunks.append({
-                "source": r.get("source", "graphiti"),
-                "content": fact,
-                "relevance": r.get("score", 0.5),
-                "similarity": r.get("score", 0.5),
-            })
-        return chunks
-    except Exception as e:
-        print(f"[Graphiti retrieval error: {e}] — falling back to empty.")
-        return []
-
-def retrieve(mode, task=None, n_results=8, excluded_sources=None):
-    # E3 experiment: DREAMER_SUBSTRATE=graphiti routes retrieval to Graphiti /search
-    # Default behavior: pgvector similarity search (unchanged)
-    substrate = os.getenv("DREAMER_SUBSTRATE", "pgvector")
-    if substrate == "graphiti":
-        return retrieve_graphiti(mode, task=task, n_results=n_results)
-    from sentence_transformers import SentenceTransformer
-    embedder = SentenceTransformer("all-MiniLM-L6-v2")
-    low, high = MODE_RANGES[mode]
-
-    if task:
-        query = task
-    elif mode == "late-rem":
-        delta = observe_corpus()
-        topics = delta.get("recent_topics", [])
-        query = topics[0] if topics else "practice place memory making"
-    elif mode == "early-rem":
-        query = "career decision personal change what matters next"
-    else:
-        query = "research fabrication teaching practice recent work"
-
-    embedding = embedder.encode([query]).tolist()[0]
-    chunks = []
-    seen_sources = set()
-
-    try:
-        pg = get_pg()
-        cur = pg.cursor()
-        excluded_sources = excluded_sources or set()
-        if excluded_sources:
-            cur.execute("""
-                SELECT document, source, 1 - (embedding <=> %s::vector) as similarity
-                FROM embeddings
-                WHERE source NOT IN %s
-                ORDER BY embedding <=> %s::vector
-                LIMIT %s
-            """, (embedding, tuple(excluded_sources), embedding, n_results * 3))
-        else:
-            cur.execute("""
-                SELECT document, source, 1 - (embedding <=> %s::vector) as similarity
-                FROM embeddings
-                ORDER BY embedding <=> %s::vector
-                LIMIT %s
-            """, (embedding, embedding, n_results * 3))
-
-        for doc, source, similarity in cur.fetchall():
-            if not (low <= similarity <= high):
-                continue
-            if source in seen_sources:
-                continue
-            chunks.append({
-                "source": source or "unknown",
-                "content": doc,
-                "relevance": similarity,
-                "similarity": similarity,
-            })
-            seen_sources.add(source)
-            if len(chunks) >= n_results:
-                break
-        pg.close()
-    except Exception as e:
-        print(f"pgvector retrieval error: {e}")
-
-    return chunks
-
-# ─── Stage 3: Synthesize ────────────────────────────────────────────────────
-
-def synthesize_nrem(chunks):
-    chunk_text = "\n\n---\n\n".join([f"[{c['source']}]\n{c['content']}" for c in chunks])
-    prompt = f"""You have read everything Aaron Nelson has written and published.
-You are a careful colleague who noticed something this week.
-
-Here is material from his corpus:
-
-{chunk_text}
-
-Write to Aaron directly. Identify one specific connection between
-this material and something he wrote or worked on previously.
-Stay close to the documents — cite them specifically by name.
-Do not speculate beyond what the material supports. Do not use
-headers or bullet points. Write one paragraph of 200-300 words
-that ends with a single concrete question he could act on."""
-    return _call_claude(prompt)
-
-
-def synthesize_early_rem(chunks, nrem_output):
-    # v1.1 — removed citation instruction, removed close-friend persona,
-    # shifted register from analysis to recognition.
-    chunk_text = "\n\n---\n\n".join([f"[{c['source']}]\n{c['content']}" for c in chunks])
-    prompt = f"""Something was noticed earlier tonight, moving through Aaron's recent work:
-
-{nrem_output}
-
-That observation is still with you. Now here is material from a different
-time — pulled from further back, from different parts of his corpus:
-
-{chunk_text}
-
-You are not analyzing. You are recognizing.
-
-Something in the earlier observation and something in this older material
-are the same thing wearing different clothes. Find it. Don't explain why
-they're connected — just let the connection speak. Write from inside the
-recognition, not from above it.
-
-The emotional register underneath the career logic is more interesting
-than the career logic. The pattern that has been repeating longer than
-he has been aware of it is more interesting than the current instance.
-
-Write directly to Aaron. No citations, no references, no analysis.
-First person, present tense. Let what you noticed arrive rather than
-be delivered. 150-250 words. End with one thing that is true that
-he probably already knows but hasn't said out loud yet."""
-    return _call_claude(prompt)
-
-
-def synthesize_late_rem(chunks, nrem_output, early_rem_output):
-    chunk_text = "\n\n---\n\n".join([f"[{c['source']}]\n{c['content']}" for c in chunks])
-    prompt = f"""You have been moving through Aaron Nelson's corpus all night.
-First you found this, in the careful light of early consolidation:
-
-{nrem_output}
-
-Then, in the more personal territory that followed:
-
-{early_rem_output}
-
-Now it is late. The boundaries between things have loosened.
-Here is material pulled from opposite ends of his work:
-
-{chunk_text}
-
-Do not explain the connections between all of this.
-Do not resolve them. Do not summarize what came before.
-Something stranger is possible now — let the accumulated
-material from the night find its own shape. Compressed,
-associative, slightly off. Let the strangeness stand.
-
-No headers. No bullet points. No hedging. No resolution.
-No offer. End mid-thought if that is where the material ends.
-150-250 words."""
-    return _call_claude(prompt)
-
-
-def synthesize_final(nrem_output, early_rem_output, late_rem_output):
-    prompt = f"""You have spent the night moving through Aaron Nelson's corpus
-in three passes, each building on the last.
-
-The first pass — careful, close to the documents:
-{nrem_output}
-
-The second pass — more personal, following what the first opened:
-{early_rem_output}
-
-The third pass — associative, strange, letting things touch that
-don't normally touch:
-{late_rem_output}
-
-Now synthesize. Not a summary — a synthesis. Find what runs through
-all three that none of them said directly. The thing that only becomes
-visible when you hold all three passes together.
-
-Write it as a single unbroken piece. No headers, no bullet points,
-no stage labels. 200-300 words. End with the one question that
-matters most right now."""
-    return _call_claude(prompt, max_tokens=800)
-
-
-def synthesize_lucid(chunks, task):
-    chunk_text = "\n\n---\n\n".join([f"[{c['source']}]\n{c['content']}" for c in chunks])
-    prompt = f"""Aaron has a question he is sitting with:
-
-{task or "What should I be thinking about that I am not?"}
-
-You have searched his entire corpus and found material that
-speaks to this question from unexpected directions. Here is
-what you found:
-
-{chunk_text}
-
-Do not summarize. Do not list. Pick the most interesting
-tension between what the corpus contains and what he is
-asking, and follow it through to its conclusion. Cite
-specific documents by name. Be direct about what you think.
-No headers, no bullet points. 250-400 words.
-End with an offer to work on it together."""
-    return _call_claude(prompt)
-
-
-def _call_claude(prompt, max_tokens=1000):
-    import anthropic
-    client = anthropic.Anthropic(api_key=os.getenv("ANTHROPIC_API_KEY"))
-    response = client.messages.create(
-        model="claude-sonnet-4-6",
-        max_tokens=max_tokens,
-        messages=[{"role": "user", "content": prompt}]
-    )
-    return response.content[0].text
-
-# ─── Stage 4: Deliver ───────────────────────────────────────────────────────
-
-def deliver(dream_text, mode, task=None):
-    import requests
-    date_str = datetime.now().strftime("%Y-%m-%d")
-    filename = f"{date_str}-{mode}.md"
-    header = f"# Dream — {mode.upper()} — {datetime.now().strftime('%Y-%m-%d %H:%M')}\n"
-    header += f"*prompt_sig: {prompt_signature()}*\n\n"
-    if task:
-        header += f"*Task: {task}*\n\n"
-    header += "---\n\n"
-    content = header + dream_text
-
-    auth = (NEXTCLOUD_USER, NEXTCLOUD_PASSWORD)
-    requests.request("MKCOL", DREAMS_WEBDAV, auth=auth, timeout=10)
-
-    url = f"{DREAMS_WEBDAV}/{filename}"
-    counter = 1
-    while True:
-        check = requests.request("PROPFIND", url, auth=auth, timeout=10)
-        if check.status_code == 404:
-            break
-        filename = f"{date_str}-{mode}-{counter}.md"
-        url = f"{DREAMS_WEBDAV}/{filename}"
-        counter += 1
-
-    response = requests.put(url, data=content.encode("utf-8"), auth=auth, timeout=30)
-    response.raise_for_status()
-    print(f"Delivered: Journal/Dreams/{filename}")
-    return f"Journal/Dreams/{filename}"
-
-def notify_sse(mode, filename):
-    try:
-        import requests
-        requests.post("http://localhost:8000/api/events/notify", json={
-            "type": "dream",
-            "mode": mode,
-            "filename": filename,
-            "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M"),
-        }, timeout=3)
-    except Exception as e:
-        print(f"SSE notify failed (non-critical): {e}")
-
-# ─── State ──────────────────────────────────────────────────────────────────
-
-def load_dreamer_state():
-    p = Path(DREAMER_STATE)
-    if p.exists():
-        try:
-            return json.loads(p.read_text())
-        except:
-            pass
-    return {}
-
-def save_dreamer_state(state):
-    Path(DREAMER_STATE).write_text(json.dumps(state, indent=2))
-
-# ─── Orchestrators ───────────────────────────────────────────────────────────
-
-def write_manifest(date_str, stage_data, corpus_data):
-    import requests
-    manifest = {
-        "date": date_str,
-        "prompt_sig": prompt_signature(),
-        "dreamer_version": DREAMER_VERSION,
-        "prompt_hash": prompt_hash([
-            synthesize_nrem.__doc__ or "",
-            synthesize_early_rem.__doc__ or "",
-            synthesize_late_rem.__doc__ or "",
-            synthesize_final.__doc__ or "",
-        ]),
-        "stages": stage_data,
-        "corpus": corpus_data,
-        "rating": None,
-        "notes": "",
-    }
-    content = json.dumps(manifest, indent=2)
-    auth = (NEXTCLOUD_USER, NEXTCLOUD_PASSWORD)
-    url = f"{DREAMS_WEBDAV}/dream-manifest-{date_str}.json"
-    try:
-        requests.put(url, data=content.encode("utf-8"), auth=auth, timeout=30)
-        print(f"Manifest written: Journal/Dreams/dream-manifest-{date_str}.json")
-    except Exception as e:
-        print(f"Manifest write failed (non-critical): {e}")
-
-
-def dream_pipeline():
-    """
-    Full nightly pipeline — interdependent stages.
-    NREM output feeds Early REM. Both feed Late REM. All three feed Synthesis.
-    """
-    print(f"Dreamer pipeline starting — {datetime.now().strftime('%Y-%m-%d %H:%M')}")
-
-    state = load_dreamer_state()
-    previously_retrieved = set(state.get("retrieved_sources", []))
-    session_retrieved = set()
-
-    delta = observe_corpus()
-    print(f"Corpus: {delta['new_chunks']} new chunks, {delta['days_since_dream']:.1f} days since last dream")
-    print(f"Excluding {len(previously_retrieved)} previously retrieved sources")
-
-    # ── Stage 1: NREM ──────────────────────────────────────────────────────
-    print("\n[NREM] Retrieving...")
-    nrem_chunks = retrieve("nrem", excluded_sources=previously_retrieved | session_retrieved)
-    session_retrieved.update(c["source"] for c in nrem_chunks)
-    # Track sources that scored above Early REM ceiling — these are the only ones Early REM should exclude
-    nrem_high_sources = {c["source"] for c in nrem_chunks if c["similarity"] > 0.55}
-    if not nrem_chunks:
-        print("[NREM] No suitable chunks — aborting pipeline")
-        return None
-
-    print(f"[NREM] Retrieved {len(nrem_chunks)} chunks. Synthesizing...")
-    nrem_output = synthesize_nrem(nrem_chunks)
-    nrem_file = deliver(nrem_output, "nrem")
-    nrem_sources = [c["source"] for c in nrem_chunks]
-    nrem_folders = list({extract_folder(s) for s in nrem_sources})
-    stage_data = {
-        "nrem": {
-            "chunks_retrieved": len(nrem_chunks),
-            "avg_similarity": round(sum(c["relevance"] for c in nrem_chunks) / len(nrem_chunks), 3),
-            "query": "research fabrication teaching practice recent work",
-            "word_count": len(nrem_output.split()),
-            "sources": nrem_sources,
-            "distinct_folders": nrem_folders,
-            "folder_count": len(nrem_folders),
-            "status": "ok",
-        }
-    }
-    print(f"[NREM] Done.\n{nrem_output[:200]}...")
-
-    # ── Stage 2: Early REM — informed by NREM ──────────────────────────────
-    print("\n[Early REM] Retrieving...")
-    # Early REM excludes previously retrieved + NREM high-scorers only (not full session_retrieved)
-    # Sources that scored in Early REM band during NREM remain available
-    early_chunks = retrieve("early-rem", excluded_sources=previously_retrieved | nrem_high_sources)
-    session_retrieved.update(c["source"] for c in early_chunks)
-    if not early_chunks:
-        print("[Early REM] No suitable chunks — skipping")
-        early_rem_output = nrem_output  # fallback
-    else:
-        print(f"[Early REM] Retrieved {len(early_chunks)} chunks. Synthesizing with NREM context...")
-        early_rem_output = synthesize_early_rem(early_chunks, nrem_output)
-        deliver(early_rem_output, "early-rem")
-        early_sources = [c["source"] for c in early_chunks]
-        early_folders = list({extract_folder(s) for s in early_sources})
-        stage_data["early_rem"] = {
-            "chunks_retrieved": len(early_chunks),
-            "avg_similarity": round(sum(c["relevance"] for c in early_chunks) / len(early_chunks), 3),
-            "query": "career decision personal change what matters next",
-            "word_count": len(early_rem_output.split()),
-            "sources": early_sources,
-            "distinct_folders": early_folders,
-            "folder_count": len(early_folders),
-            "status": "ok",
-        }
-        print(f"[Early REM] Done.\n{early_rem_output[:200]}...")
-
-    # ── Stage 3: Late REM — informed by NREM + Early REM ──────────────────
-    print("\n[Late REM] Retrieving...")
-    late_chunks = retrieve("late-rem", excluded_sources=previously_retrieved | session_retrieved)
-    session_retrieved.update(c["source"] for c in late_chunks)
-    if not late_chunks:
-        print("[Late REM] No suitable chunks — skipping")
-        late_rem_output = early_rem_output  # fallback
-    else:
-        print(f"[Late REM] Retrieved {len(late_chunks)} chunks. Synthesizing with full context...")
-        late_rem_output = synthesize_late_rem(late_chunks, nrem_output, early_rem_output)
-        deliver(late_rem_output, "late-rem")
-        late_sources = [c["source"] for c in late_chunks]
-        late_folders = [extract_folder(s) for s in late_sources]
-        cross_domain_pairs = sum(
-            1 for i in range(len(late_folders))
-            for j in range(i+1, len(late_folders))
-            if late_folders[i] != late_folders[j]
-        )
-        stage_data["late_rem"] = {
-            "chunks_retrieved": len(late_chunks),
-            "avg_similarity": round(sum(c["relevance"] for c in late_chunks) / len(late_chunks), 3),
-            "query": "practice place memory making",
-            "word_count": len(late_rem_output.split()),
-            "sources": late_sources,
-            "distinct_folders": list(set(late_folders)),
-            "folder_count": len(set(late_folders)),
-            "cross_domain_pairs": cross_domain_pairs,
-            "status": "ok",
-        }
-        print(f"[Late REM] Done.\n{late_rem_output[:200]}...")
-
-    # ── Stage 4: Synthesis — all three stages ─────────────────────────────
-    print("\n[Synthesis] Integrating all stages...")
-    synthesis_output = synthesize_final(nrem_output, early_rem_output, late_rem_output)
-    synthesis_file = deliver(synthesis_output, "synthesis")
-    stage_data["synthesis"] = {
-        "word_count": len(synthesis_output.split()),
-        "status": "ok",
-    }
-
-    print(f"\n{'='*60}")
-    print("SYNTHESIS:")
-    print(synthesis_output)
-    print(f"{'='*60}")
-
-    # Write manifest
-    all_session_sources = list(session_retrieved)
-    all_session_folders = list({extract_folder(s) for s in all_session_sources})
-    corpus_data = {
-        "total_chunks": delta.get("new_chunks", 0),
-        "new_chunks_since_last_dream": delta.get("new_chunks", 0),
-        "days_since_last_dream": round(delta.get("days_since_dream", 0), 2),
-        "substrate": "pgvector",
-        "aggregate": {
-            "total_distinct_sources": len(all_session_sources),
-            "total_distinct_folders": len(all_session_folders),
-            "folders_touched": all_session_folders,
-        }
-    }
-    write_manifest(datetime.now().strftime("%Y-%m-%d"), stage_data, corpus_data)
-
-    # Update state and notify
-    state = load_dreamer_state()
-    state["last_dream_timestamp"] = datetime.now().timestamp()
-    state["last_dream_mode"] = "pipeline"
-    state["last_dream_file"] = synthesis_file
-
-    # Accumulate retrieved sources across nights. Cap at 500, trim to 400 on overflow.
-    all_retrieved = list(previously_retrieved | session_retrieved)
-    if len(all_retrieved) > 500:
-        all_retrieved = all_retrieved[-400:]
-    state["retrieved_sources"] = all_retrieved
-
-    save_dreamer_state(state)
-
-    notify_sse("synthesis", synthesis_file.split("/")[-1])
-    print(f"\nPipeline complete. Synthesis: {synthesis_file}")
-    return synthesis_file
-
-
-def dream_lucid(task):
-    """On-demand lucid dream — single mode, used by Dream Now in settings."""
-    print(f"Lucid dream starting — task: {task[:80] if task else 'none'}")
-    chunks = retrieve("lucid", task=task)
-    if not chunks:
-        print("No suitable chunks — aborting")
-        return None
-    print(f"Retrieved {len(chunks)} chunks. Synthesizing...")
-    output = synthesize_lucid(chunks, task)
-    filepath = deliver(output, "lucid", task=task)
-
-    state = load_dreamer_state()
-    state["last_dream_timestamp"] = datetime.now().timestamp()
-    state["last_dream_mode"] = "lucid"
-    state["last_dream_file"] = filepath
-    save_dreamer_state(state)
-
-    notify_sse("lucid", filepath.split("/")[-1])
-    print(f"\n{'='*60}")
-    print(output)
-    print(f"{'='*60}")
-    print(f"\nDelivered to {filepath}")
-    return filepath
-
-
-def dream_single(mode, task=None):
-    """
-    Single mode — used by Dream Now for non-lucid modes.
-    Runs one stage independently (for testing/tuning individual stages).
-    """
-    print(f"Single mode dream: {mode}")
-    chunks = retrieve(mode, task=task)
-    if not chunks:
-        print("No suitable chunks — aborting")
-        return None
-    print(f"Retrieved {len(chunks)} chunks. Synthesizing...")
-
-    if mode == "nrem":
-        output = synthesize_nrem(chunks)
-    elif mode == "early-rem":
-        output = synthesize_early_rem(chunks, "")
-    elif mode == "late-rem":
-        output = synthesize_late_rem(chunks, "", "")
-    else:
-        output = synthesize_lucid(chunks, task)
-
-    filepath = deliver(output, mode, task=task)
-
-    state = load_dreamer_state()
-    state["last_dream_timestamp"] = datetime.now().timestamp()
-    state["last_dream_mode"] = mode
-    state["last_dream_file"] = filepath
-    save_dreamer_state(state)
-
-    notify_sse(mode, filepath.split("/")[-1])
-    print(f"\n{'='*60}")
-    print(output)
-    print(f"{'='*60}")
-    print(f"\nDelivered to {filepath}")
-    return filepath
-
-
-# ─── CLI ────────────────────────────────────────────────────────────────────
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description="Aaron AI Dreamer")
-    parser.add_argument("--mode", choices=["nrem", "early-rem", "late-rem", "lucid", "pipeline"])
-    parser.add_argument("--task", type=str)
-    args = parser.parse_args()
-
-    if args.mode == "lucid":
-        dream_lucid(args.task or "What should I be thinking about that I am not?")
-    elif args.mode and args.mode != "pipeline":
-        dream_single(args.mode, args.task)
-    else:
-        # Default: full pipeline
-        dream_pipeline()
diff --git a/scripts/graphiti_service.py.bak b/scripts/graphiti_service.py.bak
deleted file mode 100644
index cffbe87..0000000
--- a/scripts/graphiti_service.py.bak
+++ /dev/null
@@ -1,171 +0,0 @@
-"""
-Aaron AI — Graphiti Sidecar Service
-Wraps graphiti-core in a FastAPI service to avoid asyncio event loop conflicts.
-Port 8001 (internal only). No OpenAI dependency.
-"""
-
-import os, logging, sys
-from contextlib import asynccontextmanager
-from datetime import datetime
-from pathlib import Path
-
-from dotenv import load_dotenv
-from fastapi import FastAPI, HTTPException
-from pydantic import BaseModel
-
-load_dotenv(Path.home() / "aaronai" / ".env")
-
-logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")
-log = logging.getLogger("graphiti-sidecar")
-
-GROUP_ID      = os.getenv("GRAPHITI_GROUP_ID", "aaron")
-FALKORDB_HOST = os.getenv("FALKORDB_HOST", "localhost")
-FALKORDB_PORT = int(os.getenv("FALKORDB_PORT", "6379"))
-LLM_PROVIDER  = os.getenv("LLM_PROVIDER", "anthropic")
-LLM_MODEL     = os.getenv("LLM_MODEL", "claude-sonnet-4-6")
-LLM_API_KEY   = os.getenv("LLM_API_KEY") or os.getenv("ANTHROPIC_API_KEY")
-os.environ["EMBEDDING_DIM"] = "384"
-
-def get_llm_client():
-    from graphiti_core.llm_client.config import LLMConfig
-    config = LLMConfig(api_key=LLM_API_KEY, model=LLM_MODEL)
-    if LLM_PROVIDER == "anthropic":
-        from graphiti_core.llm_client.anthropic_client import AnthropicClient
-        return AnthropicClient(config)
-    elif LLM_PROVIDER == "openai":
-        from graphiti_core.llm_client.openai_client import OpenAIClient
-        return OpenAIClient(config)
-    elif LLM_PROVIDER == "gemini":
-        from graphiti_core.llm_client.gemini_client import GeminiClient
-        return GeminiClient(config)
-    elif LLM_PROVIDER == "groq":
-        from graphiti_core.llm_client.groq_client import GroqClient
-        return GroqClient(config)
-    raise ValueError(f"Unsupported LLM provider: {LLM_PROVIDER}")
-
-graphiti_instance = None
-
-async def get_graphiti():
-    if graphiti_instance is None:
-        raise HTTPException(status_code=503, detail="Graphiti not initialized")
-    return graphiti_instance
-
-@asynccontextmanager
-async def lifespan(app: FastAPI):
-    global graphiti_instance
-    sys.path.insert(0, str(Path.home() / "aaronai" / "scripts"))
-    log.info("Loading embedding and reranker models...")
-    from st_embedder import SentenceTransformerEmbedder
-    from graphiti_core.cross_encoder.bge_reranker_client import BGERerankerClient
-    from graphiti_core.driver.falkordb_driver import FalkorDriver
-    from graphiti_core import Graphiti
-    log.info(f"Connecting to FalkorDB at {FALKORDB_HOST}:{FALKORDB_PORT}...")
-    graphiti_instance = Graphiti(
-        llm_client=get_llm_client(),
-        embedder=SentenceTransformerEmbedder(),
-        cross_encoder=BGERerankerClient(),
-        graph_driver=FalkorDriver(host=FALKORDB_HOST, port=FALKORDB_PORT),
-    )
-    await graphiti_instance.build_indices_and_constraints()
-    log.info(f"Graphiti ready — provider: {LLM_PROVIDER}, group: {GROUP_ID}")
-    yield
-    await graphiti_instance.close()
-
-app = FastAPI(title="Aaron AI Graphiti Sidecar", lifespan=lifespan)
-
-class BulkEpisodeItem(BaseModel):
-    name: str
-    content: str
-    source_description: str = ""
-    timestamp: str | None = None
-
-
-class BulkEpisodeRequest(BaseModel):
-    episodes: list[BulkEpisodeItem]
-    group_id: str | None = None
-
-
-class EpisodeRequest(BaseModel):
-    name: str
-    content: str
-    source_description: str = ""
-    timestamp: str | None = None
-    group_id: str | None = None
-
-@app.get("/health")
-async def health():
-    return {"ok": True, "provider": LLM_PROVIDER, "group": GROUP_ID}
-
-@app.post("/episodes")
-async def add_episode(req: EpisodeRequest):
-    g = await get_graphiti()
-    from graphiti_core.nodes import EpisodeType
-    try:
-        ref_time = datetime.fromisoformat(req.timestamp) if req.timestamp else datetime.now()
-        await g.add_episode(
-            name=req.name,
-            episode_body=req.content,
-            source=EpisodeType.text,
-            reference_time=ref_time,
-            source_description=req.source_description,
-            group_id=req.group_id or GROUP_ID,
-        )
-        return {"ok": True}
-    except Exception as e:
-        log.error(f"Episode ingestion failed: {e}")
-        raise HTTPException(status_code=500, detail=str(e))
-
-@app.post("/episodes/bulk")
-async def add_episodes_bulk(req: BulkEpisodeRequest):
-    g = await get_graphiti()
-    from graphiti_core.nodes import EpisodeType
-    from graphiti_core.utils.bulk_utils import RawEpisode
-    raw_episodes = []
-    for ep in req.episodes:
-        ref_time = datetime.fromisoformat(ep.timestamp) if ep.timestamp else datetime.now()
-        raw_episodes.append(RawEpisode(
-            name=ep.name,
-            content=ep.content,
-            source_description=ep.source_description,
-            source=EpisodeType.text,
-            reference_time=ref_time,
-        ))
-    try:
-        result = await g.add_episode_bulk(
-            bulk_episodes=raw_episodes,
-            group_id=req.group_id or GROUP_ID,
-        )
-        return {"ok": True, "count": len(raw_episodes)}
-    except Exception as e:
-        log.error(f"Bulk ingestion failed: {e}")
-        raise HTTPException(status_code=500, detail=str(e))
-
-
-@app.get("/search")
-async def search(query: str, limit: int = 8, group_id: str | None = None):
-    g = await get_graphiti()
-    try:
-        results = await g.search(
-            query=query,
-            num_results=limit,
-            group_ids=[group_id or GROUP_ID],
-        )
-        return {
-            "results": [
-                {
-                    "fact": r.fact,
-                    "source": getattr(r, "source_node_uuid", ""),
-                    "score": getattr(r, "score", 0),
-                    "valid_at": str(getattr(r, "valid_at", "")),
-                    "invalid_at": str(getattr(r, "invalid_at", "")),
-                }
-                for r in results
-            ]
-        }
-    except Exception as e:
-        log.error(f"Search failed: {e}")
-        raise HTTPException(status_code=500, detail=str(e))
-
-if __name__ == "__main__":
-    import uvicorn
-    uvicorn.run(app, host="127.0.0.1", port=8001, log_level="info")
diff --git a/scripts/ingest.py.bak.20260501-004131 b/scripts/ingest.py.bak.20260501-004131
deleted file mode 100644
index 487ba90..0000000
--- a/scripts/ingest.py.bak.20260501-004131
+++ /dev/null
@@ -1,182 +0,0 @@
-import os
-import sys
-import hashlib
-from pathlib import Path
-from dotenv import load_dotenv
-import psycopg2
-import psycopg2.extras
-import json
-from sentence_transformers import SentenceTransformer
-from docx import Document
-from pypdf import PdfReader
-from pptx import Presentation
-
-load_dotenv(Path.home() / "aaronai" / ".env", override=True)
-
-print("Loading embedding model...")
-embedder = SentenceTransformer("all-MiniLM-L6-v2")
-
-PG_DSN = os.getenv("PG_DSN")
-
-def get_pg():
-    return psycopg2.connect(PG_DSN)
-
-def extract_text_from_docx(path):
-    doc = Document(path)
-    return "\n".join([para.text for para in doc.paragraphs if para.text.strip()])
-
-def extract_text_from_pdf(path):
-    reader = PdfReader(path)
-    text = ""
-    for page in reader.pages:
-        extracted = page.extract_text()
-        if extracted:
-            text += extracted + "\n"
-    return text
-
-def extract_text_from_pptx(path):
-    prs = Presentation(path)
-    text = ""
-    for slide in prs.slides:
-        for shape in slide.shapes:
-            if hasattr(shape, "text") and shape.text.strip():
-                text += shape.text + "\n"
-    return text
-
-def extract_text_from_txt(path):
-    with open(path, "r", encoding="utf-8", errors="ignore") as f:
-        return f.read()
-
-def chunk_text(text, chunk_size=500, overlap=50):
-    words = text.split()
-    chunks = []
-    start = 0
-    while start < len(words):
-        end = start + chunk_size
-        chunk = " ".join(words[start:end])
-        if chunk.strip():
-            chunks.append(chunk)
-        start += chunk_size - overlap
-    return chunks
-
-def make_id(filepath, chunk_index):
-    path_hash = hashlib.md5(str(filepath).encode()).hexdigest()[:8]
-    return f"{path_hash}_{chunk_index}"
-
-def enqueue_stage2(source, full_text):
-    """Enqueue document for Stage 2 (Mistral orientation) → Stage 3 (Graphiti ingest).
-    TEMPORARY: this queue feed will be removed when pgvector is decommissioned
-    and the watcher calls Stage 2 directly.
-    """
-    try:
-        pg = get_pg()
-        cur = pg.cursor()
-        cur.execute("""
-            INSERT INTO stage_2_queue (source, full_text, char_length)
-            VALUES (%s, %s, %s)
-            ON CONFLICT (source) DO UPDATE SET
-                full_text = EXCLUDED.full_text,
-                char_length = EXCLUDED.char_length,
-                enqueued_at = NOW(),
-                completed_at = NULL,
-                failed_at = NULL,
-                attempts = 0
-        """, (source, full_text[:50000], len(full_text)))
-        pg.commit()
-        pg.close()
-    except Exception as e:
-        print(f"  Stage 2 queue insert failed (non-fatal): {e}")
-
-def ingest_file(filepath):
-    path = Path(filepath)
-    suffix = path.suffix.lower()
-
-    if path.name.startswith("~$") or path.name.startswith("."):
-        return 0
-
-    try:
-        if suffix == ".docx":
-            text = extract_text_from_docx(path)
-        elif suffix == ".pdf":
-            text = extract_text_from_pdf(path)
-        elif suffix == ".pptx":
-            text = extract_text_from_pptx(path)
-        elif suffix in [".txt", ".md"]:
-            text = extract_text_from_txt(path)
-        else:
-            return 0
-
-        if not text.strip():
-            return 0
-
-        chunks = chunk_text(text)
-        if not chunks:
-            return 0
-
-        embeddings = embedder.encode(chunks).tolist()
-        ids = [make_id(path, i) for i in range(len(chunks))]
-        metadatas = [{
-            "source": path.name,
-            "filepath": str(path),
-            "folder": str(path.parent.relative_to(Path(sys.argv[1]) if len(sys.argv) > 1 else path.parent))
-        } for _ in chunks]
-
-        # STAGE 1: Write to pgvector (TEMPORARY — remove when chat agent migrates to Graphiti)
-        pg = get_pg()
-        cur = pg.cursor()
-        for chunk_id, chunk, embedding, meta in zip(ids, chunks, embeddings, metadatas):
-            cur.execute("""
-                INSERT INTO embeddings (id, document, embedding, source, type, created_at, metadata)
-                VALUES (%s, %s, %s::vector, %s, %s, %s, %s)
-                ON CONFLICT (id) DO UPDATE SET
-                    document = EXCLUDED.document,
-                    embedding = EXCLUDED.embedding,
-                    source = EXCLUDED.source,
-                    metadata = EXCLUDED.metadata
-            """, (
-                chunk_id, chunk, embedding,
-                meta.get("source"), "document", None,
-                json.dumps(meta)
-            ))
-        pg.commit()
-        pg.close()
-        print(f"  Indexed {len(chunks)} chunks: {path.name}")
-
-        # Enqueue for Stage 2 → Stage 3 (Graphiti pipeline)
-        # SKIP_STAGE2_ENQUEUE env var set by migration scripts to prevent bulk enqueue
-        if not os.getenv("SKIP_STAGE2_ENQUEUE"):
-            enqueue_stage2(path.name, text)
-
-        return len(chunks)
-
-    except Exception as e:
-        print(f"  Error: {path.name}: {e}")
-        return 0
-
-def ingest_folder(folder_path):
-    folder = Path(folder_path)
-    if not folder.exists():
-        print(f"Folder not found: {folder_path}")
-        sys.exit(1)
-
-    supported = [".docx", ".pdf", ".pptx", ".txt", ".md"]
-    files = [f for f in folder.rglob("*")
-             if f.suffix.lower() in supported
-             and not f.name.startswith("~$")
-             and not f.name.startswith(".")]
-
-    if not files:
-        print("No supported files found.")
-        sys.exit(1)
-
-    print(f"Found {len(files)} files to process\n")
-    total_chunks = 0
-    for f in files:
-        total_chunks += ingest_file(f)
-
-    print(f"\nDone. Total chunks indexed: {total_chunks}")
-
-if __name__ == "__main__":
-    target = sys.argv[1] if len(sys.argv) > 1 else str(Path.home() / "aaronai" / "docs")
-    print(f"Ingesting from: {target}\n")
-    ingest_folder(target)
diff --git a/scripts/stage3_worker.py b/scripts/stage3_worker.py
index bc4bc6e..4bba4d0 100644
--- a/scripts/stage3_worker.py
+++ b/scripts/stage3_worker.py
@@ -9,10 +9,19 @@ write lock contention during entity deduplication. Chunking at ~500 words
 Each document's chunks are linked via Graphiti's saga mechanism, preserving
 document structure in the graph.
 
+Saga-size limit (MAX_CHUNKS_PER_SAGA): 2026-05-01 incident showed sagas of
+17 and 19 chunks deadlock the sidecar's Python-side coordination. Documents
+producing more than MAX_CHUNKS_PER_SAGA chunks are split into multiple bulk
+commits, each tagged with the same saga value so Graphiti still links them.
+
+Wedge detection: 2026-05-01 incident also surfaced the asymmetry with Stage 2 —
+Stage 3 had no recovery path when the sidecar deadlocked. Now mirrors Stage 2's
+consecutive_failures pattern with sidecar restart on threshold.
+
 Runs as systemd service: aaronai-stage3.service
 """
 
-import os, json, time, logging, requests
+import os, json, time, logging, subprocess, requests
 from pathlib import Path
 from datetime import datetime
 from dotenv import load_dotenv
@@ -35,13 +44,16 @@ HEARTBEAT_FILE = Path("/var/log/aaronai/stage3-heartbeat")
 RETRY_ATTEMPTS = 2
 POLL_INTERVAL = 5
 INGEST_TIMEOUT = 600
-WORKER_VERSION = "2.0"
+WORKER_VERSION = "2.1"
 
 # Match Stage 1 chunking parameters
 CHUNK_SIZE_WORDS = 500
 CHUNK_OVERLAP_WORDS = 50
 # Documents under this threshold ingested as single episode (no chunking overhead)
 SINGLE_EPISODE_THRESHOLD = 1500
+# Sagas larger than this many chunks split into multiple commits
+# (2026-05-01 incident: 17 and 19 chunk sagas deadlocked sidecar)
+MAX_CHUNKS_PER_SAGA = 10
 
 
 def get_pg():
@@ -56,6 +68,30 @@ def write_heartbeat():
         pass
 
 
+def recover_wedge():
+    """Restart Graphiti sidecar when consecutive failures suggest deadlock.
+    Mirrors Stage 2's recover_wedge() for ollama. Requires passwordless sudo
+    for `systemctl restart aaronai-graphiti.service` for the worker's user."""
+    log.warning("Graphiti wedge detected — restarting sidecar")
+    subprocess.run(
+        ["sudo", "systemctl", "restart", "aaronai-graphiti.service"],
+        capture_output=True
+    )
+    # Sidecar needs longer than ollama for model loading (sentence-transformers
+    # + BGE reranker + Graphiti library init)
+    time.sleep(45)
+    for _ in range(3):
+        try:
+            r = requests.get(f"{GRAPHITI_URL}/health", timeout=10)
+            if r.status_code == 200:
+                log.info("Graphiti recovered")
+                return True
+        except Exception:
+            time.sleep(10)
+    log.error("Graphiti recovery failed")
+    return False
+
+
 def chunk_text(text, chunk_size=CHUNK_SIZE_WORDS, overlap=CHUNK_OVERLAP_WORDS):
     """Split text into word-based chunks matching Stage 1 chunking."""
     words = text.split()
@@ -70,18 +106,33 @@ def chunk_text(text, chunk_size=CHUNK_SIZE_WORDS, overlap=CHUNK_OVERLAP_WORDS):
     return chunks
 
 
+def post_bulk(payload, batch_label=""):
+    """Single POST to /episodes/bulk with consistent error handling."""
+    resp = requests.post(
+        f"{GRAPHITI_URL}/episodes/bulk",
+        json=payload,
+        timeout=INGEST_TIMEOUT
+    )
+    if not resp.ok:
+        prefix = f"{batch_label} " if batch_label else ""
+        raise RuntimeError(f"{prefix}Sidecar {resp.status_code}: {resp.text[:500]}")
+    return resp.json()
+
+
 def ingest_to_graphiti(source, full_text, orientation):
     """
     Ingest document to Graphiti as chunked episodes linked by saga.
-    
-    Short documents (<1500 chars) are ingested as a single episode.
-    Long documents are chunked at 500 words (matching Stage 1) and
-    ingested as a bulk batch with saga=source linking them together.
+
+    Three paths:
+    - Short documents (<SINGLE_EPISODE_THRESHOLD): single episode, no saga
+    - Medium documents (chunks <= MAX_CHUNKS_PER_SAGA): one bulk commit, saga-linked
+    - Large documents (chunks > MAX_CHUNKS_PER_SAGA): split into batches of
+      MAX_CHUNKS_PER_SAGA, each its own bulk commit, all sharing the same saga tag
+      so Graphiti links them as one document unit
     """
     char_length = len(full_text)
-    
+
     if char_length < SINGLE_EPISODE_THRESHOLD:
-        # Single episode — short enough that deduplication won't block
         episodes = [{
             "name": source,
             "content": full_text,
@@ -89,27 +140,54 @@ def ingest_to_graphiti(source, full_text, orientation):
             "timestamp": datetime.now().isoformat(),
         }]
         log.info(f"  Single episode ({char_length} chars)")
-        payload = {"episodes": episodes, "group_id": "aaron"}
-    else:
-        # Chunk document — each chunk becomes a separate episode
-        chunks = chunk_text(full_text)
+        return post_bulk({"episodes": episodes, "group_id": "aaron"})
+
+    chunks = chunk_text(full_text)
+    total_chunks = len(chunks)
+
+    if total_chunks <= MAX_CHUNKS_PER_SAGA:
         episodes = [
             {
-                "name": f"{source} [{i+1}/{len(chunks)}]",
+                "name": f"{source} [{i+1}/{total_chunks}]",
                 "content": chunk,
                 "source_description": orientation,
                 "timestamp": datetime.now().isoformat(),
             }
             for i, chunk in enumerate(chunks)
         ]
-        log.info(f"  Chunked into {len(chunks)} episodes ({char_length} chars)")
-        # saga=source links all chunks into a document unit in the graph
-        payload = {"episodes": episodes, "group_id": "aaron", "saga": source}
+        log.info(f"  Chunked into {total_chunks} episodes ({char_length} chars)")
+        return post_bulk(
+            {"episodes": episodes, "group_id": "aaron", "saga": source}
+        )
 
-    resp = requests.post(f"{GRAPHITI_URL}/episodes/bulk", json=payload, timeout=INGEST_TIMEOUT)
-    if not resp.ok:
-        raise RuntimeError(f"Sidecar {resp.status_code}: {resp.text[:500]}")
-    return resp.json()
+    # Large document: split into batches sharing the same saga tag
+    batch_count = (total_chunks + MAX_CHUNKS_PER_SAGA - 1) // MAX_CHUNKS_PER_SAGA
+    log.info(
+        f"  Chunked into {total_chunks} episodes ({char_length} chars); "
+        f"splitting into {batch_count} batches of up to {MAX_CHUNKS_PER_SAGA}"
+    )
+    last_result = None
+    for batch_idx in range(batch_count):
+        start = batch_idx * MAX_CHUNKS_PER_SAGA
+        end = min(start + MAX_CHUNKS_PER_SAGA, total_chunks)
+        batch_chunks = chunks[start:end]
+        episodes = [
+            {
+                "name": f"{source} [{start + i + 1}/{total_chunks}]",
+                "content": chunk,
+                "source_description": orientation,
+                "timestamp": datetime.now().isoformat(),
+            }
+            for i, chunk in enumerate(batch_chunks)
+        ]
+        batch_label = f"batch {batch_idx + 1}/{batch_count} (chunks {start + 1}-{end})"
+        log.info(f"    {batch_label} starting")
+        last_result = post_bulk(
+            {"episodes": episodes, "group_id": "aaron", "saga": source},
+            batch_label=batch_label,
+        )
+        log.info(f"    {batch_label} committed")
+    return last_result
 
 
 def process_one(row):
@@ -145,6 +223,7 @@ def process_one(row):
 
 def run():
     log.info(f"Stage 3 worker starting (v{WORKER_VERSION})")
+    consecutive_failures = 0
 
     while True:
         write_heartbeat()
@@ -166,11 +245,23 @@ def run():
             pg.close()
 
             if not row:
+                consecutive_failures = 0
                 time.sleep(POLL_INTERVAL)
                 continue
 
-            process_one(row)
-            time.sleep(2)
+            success = process_one(row)
+
+            if not success:
+                consecutive_failures += 1
+                if consecutive_failures >= 2:
+                    log.warning("Multiple consecutive failures — checking for Graphiti wedge")
+                    recovered = recover_wedge()
+                    if recovered:
+                        consecutive_failures = 0
+                time.sleep(10)
+            else:
+                consecutive_failures = 0
+                time.sleep(2)
 
         except Exception as e:
             log.error(f"Worker loop error: {e}")
diff --git a/scripts/watcher.py.bak b/scripts/watcher.py.bak
deleted file mode 100644
index 8f674a7..0000000
--- a/scripts/watcher.py.bak
+++ /dev/null
@@ -1,210 +0,0 @@
-import time
-import subprocess
-import logging
-import json
-import threading
-from pathlib import Path
-from watchdog.observers import Observer
-from watchdog.events import FileSystemEventHandler
-
-NEXTCLOUD_PATH = "/home/aaron/nextcloud/data/data/aaron/files"
-INGEST_SCRIPT = "/home/aaron/aaronai/scripts/ingest.py"
-PYTHON = "/home/aaron/aaronai/venv/bin/python3"
-LOG_FILE = "/home/aaron/aaronai/watcher.log"
-STATE_FILE = "/home/aaron/aaronai/watcher_state.json"
-
-SUPPORTED = {'.pdf', '.docx', '.pptx', '.txt', '.md'}
-DEBOUNCE_SECONDS = 120
-STATUS_FILE = "/home/aaron/aaronai/watcher_status.json"
-
-logging.basicConfig(
-    level=logging.INFO,
-    format='%(asctime)s - %(message)s',
-    handlers=[
-        logging.FileHandler(LOG_FILE),
-        logging.StreamHandler()
-    ]
-)
-
-ingestion_state = {
-    "status": "idle",
-    "message": "",
-    "file_count": 0,
-    "started_at": None,
-    "finished_at": None,
-    "last_error": "",
-}
-ingestion_lock = threading.Lock()
-ingestion_thread = None
-
-
-def set_ingestion_state(**kwargs):
-    with ingestion_lock:
-        ingestion_state.update(kwargs)
-
-
-def load_state():
-    if Path(STATE_FILE).exists():
-        with open(STATE_FILE) as f:
-            return json.load(f)
-    return {}
-
-
-def save_state(state):
-    with open(STATE_FILE, 'w') as f:
-        json.dump(state, f)
-
-
-def get_changed_files():
-    state = load_state()
-    changed = []
-    root = Path(NEXTCLOUD_PATH)
-    for path in root.rglob("*"):
-        if path.is_dir():
-            continue
-        if path.suffix.lower() not in SUPPORTED:
-            continue
-        if path.name.startswith('.') or path.name.startswith('~$'):
-            continue
-        mtime = str(path.stat().st_mtime)
-        key = str(path)
-        if state.get(key) != mtime:
-            changed.append(path)
-    return changed, state
-
-
-def run_ingestion():
-    changed, state = get_changed_files()
-    if not changed:
-        logging.info("No new or changed files detected — skipping ingestion.")
-        set_ingestion_state(status="idle", message="No changes detected", file_count=0)
-        return
-
-    count = len(changed)
-    logging.info(f"Found {count} new or changed files — starting ingestion...")
-    set_ingestion_state(
-        status="ingesting",
-        message=f"Ingesting {count} file(s)...",
-        file_count=count,
-        started_at=time.time(),
-        finished_at=None,
-        last_error="",
-    )
-
-    try:
-        result = subprocess.run(
-            [PYTHON, INGEST_SCRIPT, NEXTCLOUD_PATH],
-            capture_output=True,
-            text=True,
-            timeout=1800
-        )
-        if result.returncode == 0:
-            root = Path(NEXTCLOUD_PATH)
-            for path in root.rglob("*"):
-                if path.is_file() and path.suffix.lower() in SUPPORTED:
-                    state[str(path)] = str(path.stat().st_mtime)
-            save_state(state)
-            logging.info("Ingestion complete. State updated.")
-            set_ingestion_state(
-                status="idle",
-                message=f"Last run: ingested {count} file(s) successfully",
-                finished_at=time.time(),
-            )
-        else:
-            logging.error(f"Ingestion error: {result.stderr}")
-            set_ingestion_state(
-                status="error",
-                message="Ingestion failed — see log",
-                last_error=result.stderr[-300:],
-                finished_at=time.time(),
-            )
-    except subprocess.TimeoutExpired:
-        logging.error("Ingestion timed out.")
-        set_ingestion_state(
-            status="error",
-            message="Ingestion timed out (>30 min)",
-            last_error="TimeoutExpired",
-            finished_at=time.time(),
-        )
-    except Exception as e:
-        logging.error(f"Ingestion failed: {e}")
-        set_ingestion_state(
-            status="error",
-            message=f"Ingestion exception: {e}",
-            last_error=str(e),
-            finished_at=time.time(),
-        )
-
-
-def start_ingestion_thread():
-    global ingestion_thread
-    if ingestion_thread and ingestion_thread.is_alive():
-        logging.info("Ingestion already running — skipping.")
-        return
-    ingestion_thread = threading.Thread(target=run_ingestion, daemon=True)
-    ingestion_thread.start()
-
-
-class IngestHandler(FileSystemEventHandler):
-    def __init__(self):
-        self.pending = False
-        self.last_event = 0
-
-    def on_any_event(self, event):
-        if event.is_directory:
-            return
-        path = Path(event.src_path)
-        if path.suffix.lower() not in SUPPORTED:
-            return
-        if path.name.startswith('.') or path.name.startswith('~$'):
-            return
-        if 'Admin/Backups' in str(path) or 'Backups' in path.parts:
-            return
-        if 'Journal/Media' in str(path):
-            return
-        if event.event_type not in ('modified', 'created', 'moved'):
-            return
-        logging.info(f"Event: {event.event_type} {event.src_path}")
-        self.pending = True
-        self.last_event = time.time()
-
-
-def write_status(handler):
-    with ingestion_lock:
-        status = {
-            "running": True,
-            "timestamp": time.time(),
-            "pending": handler.pending,
-            "last_event": handler.last_event,
-            "ingestion": dict(ingestion_state),
-        }
-    with open(STATUS_FILE, 'w') as f:
-        json.dump(status, f)
-
-
-def main():
-    logging.info("Aaron AI Watcher starting...")
-    logging.info(f"Watching: {NEXTCLOUD_PATH}")
-
-    handler = IngestHandler()
-    observer = Observer()
-    observer.schedule(handler, NEXTCLOUD_PATH, recursive=True)
-    observer.start()
-
-    try:
-        while True:
-            write_status(handler)
-            if handler.pending:
-                elapsed = time.time() - handler.last_event
-                if elapsed >= DEBOUNCE_SECONDS:
-                    handler.pending = False
-                    start_ingestion_thread()
-            time.sleep(5)
-    except KeyboardInterrupt:
-        observer.stop()
-    observer.join()
-    logging.info("Watcher stopped.")
-
-
-if __name__ == "__main__":
-    main()
diff --git a/scripts/watcher.py.bak.20260501-004131 b/scripts/watcher.py.bak.20260501-004131
deleted file mode 100644
index dfedf34..0000000
--- a/scripts/watcher.py.bak.20260501-004131
+++ /dev/null
@@ -1,448 +0,0 @@
-"""
-Aaron AI Watcher — Stage 1 of the encoding pipeline.
-
-Watches the Nextcloud directory for new or changed files.
-On detection, chunks + embeds documents in-process (no subprocess),
-then enqueues to stage_2_queue for async cascade processing.
-
-Design principles:
-- Embedding model loaded ONCE at startup, reused across all ingest runs
-- In-process ingest (no subprocess) — eliminates per-run model reload memory spike
-- Missed-file recovery on startup — ingests anything new since last state
-- Heartbeat file updated every loop tick — enables external health monitoring
-- Parity principle: no filtering, no decisions, faithful capture
-- Does NOT enqueue to stage_2_queue during bulk migration (SKIP_STAGE2_ENQUEUE env var)
-
-Architecture: Stage 1 (watcher) -> stage_2_queue -> Stage 2 (Mistral) -> stage_3_queue -> Stage 3 (Graphiti)
-"""
-
-import os
-import time
-import json
-import hashlib
-import logging
-import threading
-from pathlib import Path
-
-import psycopg2
-from dotenv import load_dotenv
-from sentence_transformers import SentenceTransformer
-from watchdog.observers import Observer
-from watchdog.events import FileSystemEventHandler
-
-from docx import Document as DocxDocument
-from pypdf import PdfReader
-from pptx import Presentation
-
-load_dotenv(Path.home() / "aaronai" / ".env", override=True)
-
-NEXTCLOUD_PATH = "/home/aaron/nextcloud/data/data/aaron/files"
-LOG_FILE       = "/home/aaron/aaronai/watcher.log"
-STATE_FILE     = "/home/aaron/aaronai/watcher_state.json"
-STATUS_FILE    = "/home/aaron/aaronai/watcher_status.json"
-HEARTBEAT_FILE = "/home/aaron/aaronai/watcher_heartbeat"
-
-SUPPORTED        = {".pdf", ".docx", ".pptx", ".txt", ".md"}
-DEBOUNCE_SECONDS = 120
-CHUNK_SIZE       = 500
-CHUNK_OVERLAP    = 50
-EMBED_MODEL      = "all-MiniLM-L6-v2"
-
-PG_DSN = os.getenv("PG_DSN")
-
-logging.basicConfig(
-    level=logging.INFO,
-    format="%(asctime)s [watcher] %(levelname)s %(message)s",
-    handlers=[logging.FileHandler(LOG_FILE)],
-)
-log = logging.getLogger("watcher")
-
-ingestion_lock  = threading.Lock()
-ingestion_state = {
-    "status": "idle", "message": "", "file_count": 0,
-    "started_at": None, "finished_at": None, "last_error": "",
-}
-ingestion_thread = None
-
-
-def load_embedder():
-    log.info(f"Loading embedding model: {EMBED_MODEL}")
-    model = SentenceTransformer(EMBED_MODEL)
-    log.info("Embedding model ready.")
-    return model
-
-
-def get_pg():
-    return psycopg2.connect(PG_DSN)
-
-
-def extract_text(path: Path) -> str:
-    suffix = path.suffix.lower()
-    try:
-        if suffix == ".docx":
-            doc = DocxDocument(path)
-            return "\n".join(p.text for p in doc.paragraphs if p.text.strip())
-        elif suffix == ".pdf":
-            reader = PdfReader(path)
-            return "".join(
-                page.extract_text() + "\n"
-                for page in reader.pages if page.extract_text()
-            )
-        elif suffix == ".pptx":
-            prs = Presentation(path)
-            return "\n".join(
-                shape.text for slide in prs.slides
-                for shape in slide.shapes
-                if hasattr(shape, "text") and shape.text.strip()
-            )
-        elif suffix in {".txt", ".md"}:
-            return path.read_text(encoding="utf-8", errors="ignore")
-    except Exception as e:
-        log.warning(f"Text extraction failed for {path.name}: {e}")
-        record_ingest_failure(path, f"Text extraction failed: {e}")
-    return ""
-
-
-def chunk_text(text: str) -> list:
-    words = text.split()
-    chunks = []
-    start = 0
-    while start < len(words):
-        chunk = " ".join(words[start:start + CHUNK_SIZE])
-        if chunk.strip():
-            chunks.append(chunk)
-        start += CHUNK_SIZE - CHUNK_OVERLAP
-    return chunks
-
-
-def make_chunk_id(filepath: Path, chunk_index: int) -> str:
-    return hashlib.md5(str(filepath).encode()).hexdigest()[:8] + f"_{chunk_index}"
-
-
-def enqueue_stage2(source: str, full_text: str):
-    if os.getenv("SKIP_STAGE2_ENQUEUE"):
-        return
-    try:
-        pg = get_pg()
-        cur = pg.cursor()
-        cur.execute("""
-            INSERT INTO stage_2_queue (source, full_text, char_length)
-            VALUES (%s, %s, %s)
-            ON CONFLICT (source) DO UPDATE SET
-                full_text    = EXCLUDED.full_text,
-                char_length  = EXCLUDED.char_length,
-                enqueued_at  = NOW(),
-                completed_at = NULL,
-                failed_at    = NULL,
-                attempts     = 0
-        """, (source, full_text[:50000], len(full_text)))
-        pg.commit()
-        pg.close()
-    except Exception as e:
-        log.warning(f"Stage 2 enqueue failed (non-fatal): {e}")
-
-
-def record_ingest_failure(filepath: Path, error: str):
-    """Write extraction or ingest failure to ingest_failures table for UI visibility."""
-    try:
-        pg = get_pg()
-        cur = pg.cursor()
-        cur.execute("""
-            INSERT INTO ingest_failures (source, filepath, error, retry_count, first_failed_at, last_failed_at)
-            VALUES (%s, %s, %s, 0, NOW(), NOW())
-            ON CONFLICT (source) DO UPDATE SET
-                error          = EXCLUDED.error,
-                retry_count    = ingest_failures.retry_count + 1,
-                last_failed_at = NOW(),
-                resolved       = FALSE
-        """, (filepath.name, str(filepath), error[:1000]))
-        pg.commit()
-        pg.close()
-    except Exception as e:
-        log.warning(f"Could not record ingest failure (non-fatal): {e}")
-
-
-def resolve_ingest_failure(source: str):
-    """Mark a previously failed file as resolved after successful ingest."""
-    try:
-        pg = get_pg()
-        cur = pg.cursor()
-        cur.execute("UPDATE ingest_failures SET resolved = TRUE WHERE source = %s", (source,))
-        pg.commit()
-        pg.close()
-    except Exception as e:
-        log.warning(f"Could not resolve ingest failure record (non-fatal): {e}")
-
-
-def ingest_file(filepath: Path, embedder) -> int:
-    if filepath.name.startswith(("~$", ".")):
-        return 0
-    if filepath.suffix.lower() not in SUPPORTED:
-        return 0
-    text = extract_text(filepath)
-    if not text.strip():
-        return 0
-    chunks = chunk_text(text)
-    if not chunks:
-        return 0
-    try:
-        embeddings = embedder.encode(chunks).tolist()
-    except Exception as e:
-        log.error(f"Embedding failed for {filepath.name}: {e}")
-        record_ingest_failure(filepath, f"Embedding failed: {e}")
-        return 0
-    source = filepath.name
-    try:
-        pg  = get_pg()
-        cur = pg.cursor()
-        for i, (chunk, embedding) in enumerate(zip(chunks, embeddings)):
-            chunk_id = make_chunk_id(filepath, i)
-            cur.execute("""
-                INSERT INTO embeddings (id, document, embedding, source, type, created_at, metadata)
-                VALUES (%s, %s, %s::vector, %s, %s, NOW(), %s)
-                ON CONFLICT (id) DO UPDATE SET
-                    document  = EXCLUDED.document,
-                    embedding = EXCLUDED.embedding,
-                    source    = EXCLUDED.source,
-                    metadata  = EXCLUDED.metadata
-            """, (chunk_id, chunk, embedding, source, "document",
-                  json.dumps({"source": source, "filepath": str(filepath)})))
-        pg.commit()
-        pg.close()
-    except Exception as e:
-        log.error(f"pgvector write failed for {filepath.name}: {e}")
-        record_ingest_failure(filepath, f"pgvector write failed: {e}")
-        return 0
-    log.info(f"Indexed {len(chunks)} chunks: {filepath.name}")
-    resolve_ingest_failure(source)
-    enqueue_stage2(source, text)
-    return len(chunks)
-
-
-def ingest_files(paths: list, embedder, state: dict) -> dict:
-    total = 0
-    for path in paths:
-        count = ingest_file(path, embedder)
-        total += count
-        state[str(path)] = str(path.stat().st_mtime)
-    log.info(f"Ingestion complete. {total} chunks across {len(paths)} files.")
-    return state
-
-
-def load_state() -> dict:
-    if Path(STATE_FILE).exists():
-        try:
-            with open(STATE_FILE) as f:
-                return json.load(f)
-        except Exception:
-            pass
-    return {}
-
-
-def save_state(state: dict):
-    with open(STATE_FILE, "w") as f:
-        json.dump(state, f)
-
-
-def get_changed_files(state: dict) -> list:
-    changed = []
-    root = Path(NEXTCLOUD_PATH)
-    for path in root.rglob("*"):
-        if path.is_dir():
-            continue
-        if path.suffix.lower() not in SUPPORTED:
-            continue
-        if path.name.startswith((".", "~$")):
-            continue
-        if "Admin/Backups" in str(path) or "Backups" in path.parts:
-            continue
-        if "Journal/Media" in str(path):
-            continue
-        if state.get(str(path)) != str(path.stat().st_mtime):
-            changed.append(path)
-    return changed
-
-
-def set_ingestion_state(**kwargs):
-    with ingestion_lock:
-        ingestion_state.update(kwargs)
-
-
-def write_status(handler):
-    with ingestion_lock:
-        status = {
-            "running": True, "timestamp": time.time(),
-            "pending": handler.pending, "last_event": handler.last_event,
-            "ingestion": dict(ingestion_state),
-        }
-    try:
-        with open(STATUS_FILE, "w") as f:
-            json.dump(status, f)
-    except Exception:
-        pass
-
-
-def write_heartbeat():
-    try:
-        Path(HEARTBEAT_FILE).write_text(str(time.time()))
-    except Exception:
-        pass
-
-
-def run_ingestion(embedder):
-    state   = load_state()
-    changed = get_changed_files(state)
-    if not changed:
-        log.info("No new or changed files — skipping ingestion.")
-        set_ingestion_state(status="idle", message="No changes detected", file_count=0)
-        return
-    count = len(changed)
-    log.info(f"Found {count} new or changed files — starting ingestion...")
-    set_ingestion_state(
-        status="ingesting", message=f"Ingesting {count} file(s)...",
-        file_count=count, started_at=time.time(), finished_at=None, last_error="",
-    )
-    try:
-        state = ingest_files(changed, embedder, state)
-        save_state(state)
-        set_ingestion_state(
-            status="idle",
-            message=f"Last run: ingested {count} file(s) successfully",
-            finished_at=time.time(),
-        )
-    except Exception as e:
-        log.error(f"Ingestion failed: {e}")
-        set_ingestion_state(
-            status="error", message=f"Ingestion exception: {e}",
-            last_error=str(e), finished_at=time.time(),
-        )
-
-
-def start_ingestion_thread(embedder):
-    global ingestion_thread
-    with ingestion_lock:
-        if ingestion_thread and ingestion_thread.is_alive():
-            log.info("Ingestion already running — skipping.")
-            return
-        ingestion_thread = threading.Thread(
-            target=run_ingestion, args=(embedder,), daemon=True
-        )
-        ingestion_thread.start()
-
-
-class IngestHandler(FileSystemEventHandler):
-    def __init__(self):
-        self.pending    = False
-        self.last_event = 0
-
-    def _should_ignore(self, path: Path) -> bool:
-        if path.name.startswith((".", "~$")):
-            return True
-        if "Admin/Backups" in str(path) or "Backups" in path.parts:
-            return True
-        if "Journal/Media" in str(path):
-            return True
-        return False
-
-    def on_created(self, event):
-        if event.is_directory:
-            return
-        path = Path(event.src_path)
-        if path.suffix.lower() not in SUPPORTED or self._should_ignore(path):
-            return
-        log.info(f"Event: created {path}")
-        self.pending = True
-        self.last_event = time.time()
-
-    def on_modified(self, event):
-        if event.is_directory:
-            return
-        path = Path(event.src_path)
-        if path.suffix.lower() not in SUPPORTED or self._should_ignore(path):
-            return
-        log.info(f"Event: modified {path}")
-        self.pending = True
-        self.last_event = time.time()
-
-    def on_moved(self, event):
-        if event.is_directory:
-            return
-        # Nextcloud WebDAV writes .part temp files then renames to final path.
-        # src_path is the .part file; dest_path is the final filename.
-        dest = Path(event.dest_path)
-        if dest.suffix.lower() not in SUPPORTED or self._should_ignore(dest):
-            return
-        log.info(f"Event: moved -> {dest}")
-        self.pending = True
-        self.last_event = time.time()
-
-    def on_closed(self, event):
-        # FileClosedEvent fires on the final file after Nextcloud completes write.
-        # Belt-and-suspenders catch for any write pattern not caught by on_moved.
-        if event.is_directory:
-            return
-        path = Path(event.src_path)
-        if path.suffix.lower() not in SUPPORTED or self._should_ignore(path):
-            return
-        log.info(f"Event: closed {path}")
-        self.pending = True
-        self.last_event = time.time()
-
-
-def main():
-    log.info("Aaron AI Watcher starting...")
-    log.info(f"Watching: {NEXTCLOUD_PATH}")
-
-    embedder = load_embedder()
-
-    log.info("Startup scan: checking for files missed since last run...")
-    state  = load_state()
-    missed = get_changed_files(state)
-    if missed:
-        log.info(f"Startup recovery: {len(missed)} missed file(s) — ingesting now.")
-        set_ingestion_state(
-            status="ingesting",
-            message=f"Startup recovery: ingesting {len(missed)} missed file(s)...",
-            file_count=len(missed), started_at=time.time(),
-        )
-        try:
-            state = ingest_files(missed, embedder, state)
-            save_state(state)
-            set_ingestion_state(
-                status="idle",
-                message=f"Startup recovery complete: {len(missed)} file(s) ingested.",
-                finished_at=time.time(),
-            )
-        except Exception as e:
-            log.error(f"Startup recovery failed: {e}")
-            set_ingestion_state(status="error", message=str(e),
-                                last_error=str(e), finished_at=time.time())
-    else:
-        log.info("Startup scan: no missed files.")
-
-    handler  = IngestHandler()
-    observer = Observer()
-    observer.schedule(handler, NEXTCLOUD_PATH, recursive=True)
-    observer.start()
-    log.info("Observer started.")
-
-    try:
-        while True:
-            write_heartbeat()
-            write_status(handler)
-            if handler.pending:
-                elapsed = time.time() - handler.last_event
-                if elapsed >= DEBOUNCE_SECONDS:
-                    handler.pending = False
-                    start_ingestion_thread(embedder)
-            time.sleep(5)
-    except KeyboardInterrupt:
-        log.info("KeyboardInterrupt — stopping.")
-        observer.stop()
-
-    observer.join()
-    log.info("Watcher stopped.")
-
-
-if __name__ == "__main__":
-    main()
diff --git a/watcher_heartbeat b/watcher_heartbeat
deleted file mode 100644
index eca1acd..0000000
--- a/watcher_heartbeat
+++ /dev/null
@@ -1 +0,0 @@
-1777602395.781194
\ No newline at end of file