dream.py: cache the SentenceTransformer embedder across retrieve() calls
Pipeline mode calls retrieve() three times (NREM, Early REM, Late REM).
Previously each call re-imported and re-instantiated SentenceTransformer
("all-MiniLM-L6-v2"), allocating ~200MB and spending 30-60s on disk->CPU
init three times sequentially. lru_cache(maxsize=1) makes the load happen
once per process.
Expected: pipeline runtime drops ~100-180s, removes 2x redundant 200MB
allocations, and reduces transient memory pressure during the same window
when other nightly jobs may run.
This commit is contained in:
+7
-2
@@ -16,6 +16,7 @@ import os
|
|||||||
import json
|
import json
|
||||||
import sqlite3
|
import sqlite3
|
||||||
import argparse
|
import argparse
|
||||||
|
from functools import lru_cache
|
||||||
from collections import Counter
|
from collections import Counter
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from datetime import datetime, timedelta
|
from datetime import datetime, timedelta
|
||||||
@@ -283,6 +284,11 @@ def retrieve_graphiti(mode, task=None, n_results=8, excluded_sources=None):
|
|||||||
print(f"[Graphiti retrieval error: {e}] — falling back to empty.")
|
print(f"[Graphiti retrieval error: {e}] — falling back to empty.")
|
||||||
return []
|
return []
|
||||||
|
|
||||||
|
@lru_cache(maxsize=1)
|
||||||
|
def _get_embedder():
|
||||||
|
from sentence_transformers import SentenceTransformer
|
||||||
|
return SentenceTransformer("all-MiniLM-L6-v2")
|
||||||
|
|
||||||
def retrieve(mode, task=None, n_results=8, excluded_sources=None, type_filter=None):
|
def retrieve(mode, task=None, n_results=8, excluded_sources=None, type_filter=None):
|
||||||
# E3 experiment: DREAMER_SUBSTRATE=graphiti routes retrieval to Graphiti /search
|
# E3 experiment: DREAMER_SUBSTRATE=graphiti routes retrieval to Graphiti /search
|
||||||
# Default behavior: pgvector similarity search (unchanged)
|
# Default behavior: pgvector similarity search (unchanged)
|
||||||
@@ -291,8 +297,7 @@ def retrieve(mode, task=None, n_results=8, excluded_sources=None, type_filter=No
|
|||||||
substrate = os.getenv("DREAMER_SUBSTRATE", "pgvector")
|
substrate = os.getenv("DREAMER_SUBSTRATE", "pgvector")
|
||||||
if substrate == "graphiti":
|
if substrate == "graphiti":
|
||||||
return retrieve_graphiti(mode, task=task, n_results=n_results, excluded_sources=excluded_sources)
|
return retrieve_graphiti(mode, task=task, n_results=n_results, excluded_sources=excluded_sources)
|
||||||
from sentence_transformers import SentenceTransformer
|
embedder = _get_embedder()
|
||||||
embedder = SentenceTransformer("all-MiniLM-L6-v2")
|
|
||||||
low, high = MODE_RANGES[mode]
|
low, high = MODE_RANGES[mode]
|
||||||
|
|
||||||
if task:
|
if task:
|
||||||
|
|||||||
Reference in New Issue
Block a user