option b: cross-night source exclusion in dream pipeline
This commit is contained in:
+35
-10
@@ -104,7 +104,7 @@ def get_recent_conversation_topics(days=14):
|
|||||||
|
|
||||||
# ─── Stage 2: Retrieve ──────────────────────────────────────────────────────
|
# ─── Stage 2: Retrieve ──────────────────────────────────────────────────────
|
||||||
|
|
||||||
def retrieve(mode, task=None, n_results=8):
|
def retrieve(mode, task=None, n_results=8, excluded_sources=None):
|
||||||
from sentence_transformers import SentenceTransformer
|
from sentence_transformers import SentenceTransformer
|
||||||
embedder = SentenceTransformer("all-MiniLM-L6-v2")
|
embedder = SentenceTransformer("all-MiniLM-L6-v2")
|
||||||
low, high = MODE_RANGES[mode]
|
low, high = MODE_RANGES[mode]
|
||||||
@@ -127,12 +127,22 @@ def retrieve(mode, task=None, n_results=8):
|
|||||||
try:
|
try:
|
||||||
pg = get_pg()
|
pg = get_pg()
|
||||||
cur = pg.cursor()
|
cur = pg.cursor()
|
||||||
cur.execute("""
|
excluded_sources = excluded_sources or set()
|
||||||
SELECT document, source, 1 - (embedding <=> %s::vector) as similarity
|
if excluded_sources:
|
||||||
FROM embeddings
|
cur.execute("""
|
||||||
ORDER BY embedding <=> %s::vector
|
SELECT document, source, 1 - (embedding <=> %s::vector) as similarity
|
||||||
LIMIT %s
|
FROM embeddings
|
||||||
""", (embedding, embedding, n_results * 3))
|
WHERE source NOT IN %s
|
||||||
|
ORDER BY embedding <=> %s::vector
|
||||||
|
LIMIT %s
|
||||||
|
""", (embedding, tuple(excluded_sources), embedding, n_results * 3))
|
||||||
|
else:
|
||||||
|
cur.execute("""
|
||||||
|
SELECT document, source, 1 - (embedding <=> %s::vector) as similarity
|
||||||
|
FROM embeddings
|
||||||
|
ORDER BY embedding <=> %s::vector
|
||||||
|
LIMIT %s
|
||||||
|
""", (embedding, embedding, n_results * 3))
|
||||||
|
|
||||||
for doc, source, similarity in cur.fetchall():
|
for doc, source, similarity in cur.fetchall():
|
||||||
if not (low <= similarity <= high):
|
if not (low <= similarity <= high):
|
||||||
@@ -379,12 +389,18 @@ def dream_pipeline():
|
|||||||
"""
|
"""
|
||||||
print(f"Dreamer pipeline starting — {datetime.now().strftime('%Y-%m-%d %H:%M')}")
|
print(f"Dreamer pipeline starting — {datetime.now().strftime('%Y-%m-%d %H:%M')}")
|
||||||
|
|
||||||
|
state = load_dreamer_state()
|
||||||
|
previously_retrieved = set(state.get("retrieved_sources", []))
|
||||||
|
session_retrieved = set()
|
||||||
|
|
||||||
delta = observe_corpus()
|
delta = observe_corpus()
|
||||||
print(f"Corpus: {delta['new_chunks']} new chunks, {delta['days_since_dream']:.1f} days since last dream")
|
print(f"Corpus: {delta['new_chunks']} new chunks, {delta['days_since_dream']:.1f} days since last dream")
|
||||||
|
print(f"Excluding {len(previously_retrieved)} previously retrieved sources")
|
||||||
|
|
||||||
# ── Stage 1: NREM ──────────────────────────────────────────────────────
|
# ── Stage 1: NREM ──────────────────────────────────────────────────────
|
||||||
print("\n[NREM] Retrieving...")
|
print("\n[NREM] Retrieving...")
|
||||||
nrem_chunks = retrieve("nrem")
|
nrem_chunks = retrieve("nrem", excluded_sources=previously_retrieved | session_retrieved)
|
||||||
|
session_retrieved.update(c["source"] for c in nrem_chunks)
|
||||||
if not nrem_chunks:
|
if not nrem_chunks:
|
||||||
print("[NREM] No suitable chunks — aborting pipeline")
|
print("[NREM] No suitable chunks — aborting pipeline")
|
||||||
return None
|
return None
|
||||||
@@ -405,7 +421,8 @@ def dream_pipeline():
|
|||||||
|
|
||||||
# ── Stage 2: Early REM — informed by NREM ──────────────────────────────
|
# ── Stage 2: Early REM — informed by NREM ──────────────────────────────
|
||||||
print("\n[Early REM] Retrieving...")
|
print("\n[Early REM] Retrieving...")
|
||||||
early_chunks = retrieve("early-rem")
|
early_chunks = retrieve("early-rem", excluded_sources=previously_retrieved | session_retrieved)
|
||||||
|
session_retrieved.update(c["source"] for c in early_chunks)
|
||||||
if not early_chunks:
|
if not early_chunks:
|
||||||
print("[Early REM] No suitable chunks — skipping")
|
print("[Early REM] No suitable chunks — skipping")
|
||||||
early_rem_output = nrem_output # fallback
|
early_rem_output = nrem_output # fallback
|
||||||
@@ -424,7 +441,8 @@ def dream_pipeline():
|
|||||||
|
|
||||||
# ── Stage 3: Late REM — informed by NREM + Early REM ──────────────────
|
# ── Stage 3: Late REM — informed by NREM + Early REM ──────────────────
|
||||||
print("\n[Late REM] Retrieving...")
|
print("\n[Late REM] Retrieving...")
|
||||||
late_chunks = retrieve("late-rem")
|
late_chunks = retrieve("late-rem", excluded_sources=previously_retrieved | session_retrieved)
|
||||||
|
session_retrieved.update(c["source"] for c in late_chunks)
|
||||||
if not late_chunks:
|
if not late_chunks:
|
||||||
print("[Late REM] No suitable chunks — skipping")
|
print("[Late REM] No suitable chunks — skipping")
|
||||||
late_rem_output = early_rem_output # fallback
|
late_rem_output = early_rem_output # fallback
|
||||||
@@ -468,6 +486,13 @@ def dream_pipeline():
|
|||||||
state["last_dream_timestamp"] = datetime.now().timestamp()
|
state["last_dream_timestamp"] = datetime.now().timestamp()
|
||||||
state["last_dream_mode"] = "pipeline"
|
state["last_dream_mode"] = "pipeline"
|
||||||
state["last_dream_file"] = synthesis_file
|
state["last_dream_file"] = synthesis_file
|
||||||
|
|
||||||
|
# Accumulate retrieved sources across nights. Cap at 500, trim to 400 on overflow.
|
||||||
|
all_retrieved = list(previously_retrieved | session_retrieved)
|
||||||
|
if len(all_retrieved) > 500:
|
||||||
|
all_retrieved = all_retrieved[-400:]
|
||||||
|
state["retrieved_sources"] = all_retrieved
|
||||||
|
|
||||||
save_dreamer_state(state)
|
save_dreamer_state(state)
|
||||||
|
|
||||||
notify_sse("synthesis", synthesis_file.split("/")[-1])
|
notify_sse("synthesis", synthesis_file.split("/")[-1])
|
||||||
|
|||||||
Reference in New Issue
Block a user