From c5fc517fef23358f37454a96803c09a551cd9000 Mon Sep 17 00:00:00 2001
From: Aaron Nelson <aaron@aaronnelson.studio>
Date: Mon, 4 May 2026 03:13:45 +0000
Subject: [PATCH] ingest_conversations.py: lazy-load embedder to match
 ingest.py pattern

Embedder was instantiated at module import (~30-60s, ~200MB) regardless
of whether new conversations existed. On nights with no new content
(most nights per the logs), the script paid the load cost and exited
immediately. ingest.py:134 already uses lazy loading; this brings the
two ingest scripts into a consistent shape.
---
 scripts/ingest_conversations.py | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/scripts/ingest_conversations.py b/scripts/ingest_conversations.py
index 3ad7100..694bd57 100644
--- a/scripts/ingest_conversations.py
+++ b/scripts/ingest_conversations.py
@@ -18,8 +18,14 @@ CONVERSATIONS_DB = str(Path.home() / "aaronai" / "conversations.db")
 PG_DSN = os.getenv("PG_DSN")
 MIN_EXCHANGES = 3
 
-print("Loading embedding model...")
-embedder = SentenceTransformer("all-MiniLM-L6-v2")
+_embedder = None
+
+def get_embedder():
+    global _embedder
+    if _embedder is None:
+        print("Loading embedding model...")
+        _embedder = SentenceTransformer("all-MiniLM-L6-v2")
+    return _embedder
 
 def get_conversations():
     conn = sqlite3.connect(CONVERSATIONS_DB)
@@ -123,7 +129,7 @@ def run():
         
         # Embed and insert
         texts = [c[1] for c in new_chunks]
-        embeddings = embedder.encode(texts, show_progress_bar=False).tolist()
+        embeddings = get_embedder().encode(texts, show_progress_bar=False).tolist()
         
         for (chunk_id, chunk_text, meta), embedding in zip(new_chunks, embeddings):
             if not meta.get("type"):