ingest_conversations.py: lazy-load embedder to match ingest.py pattern

Embedder was instantiated at module import (~30-60s, ~200MB) regardless
of whether new conversations existed. On nights with no new content
(most nights per the logs), the script paid the load cost and exited
immediately. ingest.py:134 already uses lazy loading; this brings the
two ingest scripts into a consistent shape.
This commit is contained in:
2026-05-04 03:13:45 +00:00
parent b35d44ef58
commit c5fc517fef
+8 -2
View File
@@ -18,8 +18,14 @@ CONVERSATIONS_DB = str(Path.home() / "aaronai" / "conversations.db")
PG_DSN = os.getenv("PG_DSN")
MIN_EXCHANGES = 3
_embedder = None
def get_embedder():
global _embedder
if _embedder is None:
print("Loading embedding model...")
embedder = SentenceTransformer("all-MiniLM-L6-v2")
_embedder = SentenceTransformer("all-MiniLM-L6-v2")
return _embedder
def get_conversations():
conn = sqlite3.connect(CONVERSATIONS_DB)
@@ -123,7 +129,7 @@ def run():
# Embed and insert
texts = [c[1] for c in new_chunks]
embeddings = embedder.encode(texts, show_progress_bar=False).tolist()
embeddings = get_embedder().encode(texts, show_progress_bar=False).tolist()
for (chunk_id, chunk_text, meta), embedding in zip(new_chunks, embeddings):
if not meta.get("type"):