dream.py v1.1: score-band exclusion for Early REM, DREAMER_VERSION constant, manifest versioning

This commit is contained in:
2026-04-30 15:51:11 +00:00
parent b53717af5b
commit cae7fb8775
+8 -1
View File
@@ -47,6 +47,7 @@ MODE_RANGES = {
"late-rem": (0.22, 0.42), "late-rem": (0.22, 0.42),
"lucid": (0.32, 0.72), "lucid": (0.32, 0.72),
} }
DREAMER_VERSION = "1.1" # 1.0=original exclusion logic; 1.1=score-band exclusion
# ─── Prompt versioning ────────────────────────────────────────────────────── # ─── Prompt versioning ──────────────────────────────────────────────────────
# Bump the relevant constant manually when changing a prompt. # Bump the relevant constant manually when changing a prompt.
@@ -158,6 +159,7 @@ def retrieve(mode, task=None, n_results=8, excluded_sources=None):
"source": source or "unknown", "source": source or "unknown",
"content": doc, "content": doc,
"relevance": similarity, "relevance": similarity,
"similarity": similarity,
}) })
seen_sources.add(source) seen_sources.add(source)
if len(chunks) >= n_results: if len(chunks) >= n_results:
@@ -366,6 +368,7 @@ def write_manifest(date_str, stage_data, corpus_data):
manifest = { manifest = {
"date": date_str, "date": date_str,
"prompt_sig": prompt_signature(), "prompt_sig": prompt_signature(),
"dreamer_version": DREAMER_VERSION,
"prompt_hash": prompt_hash([ "prompt_hash": prompt_hash([
synthesize_nrem.__doc__ or "", synthesize_nrem.__doc__ or "",
synthesize_early_rem.__doc__ or "", synthesize_early_rem.__doc__ or "",
@@ -406,6 +409,8 @@ def dream_pipeline():
print("\n[NREM] Retrieving...") print("\n[NREM] Retrieving...")
nrem_chunks = retrieve("nrem", excluded_sources=previously_retrieved | session_retrieved) nrem_chunks = retrieve("nrem", excluded_sources=previously_retrieved | session_retrieved)
session_retrieved.update(c["source"] for c in nrem_chunks) session_retrieved.update(c["source"] for c in nrem_chunks)
# Track sources that scored above Early REM ceiling — these are the only ones Early REM should exclude
nrem_high_sources = {c["source"] for c in nrem_chunks if c["similarity"] > 0.55}
if not nrem_chunks: if not nrem_chunks:
print("[NREM] No suitable chunks — aborting pipeline") print("[NREM] No suitable chunks — aborting pipeline")
return None return None
@@ -431,7 +436,9 @@ def dream_pipeline():
# ── Stage 2: Early REM — informed by NREM ────────────────────────────── # ── Stage 2: Early REM — informed by NREM ──────────────────────────────
print("\n[Early REM] Retrieving...") print("\n[Early REM] Retrieving...")
early_chunks = retrieve("early-rem", excluded_sources=previously_retrieved | session_retrieved) # Early REM excludes previously retrieved + NREM high-scorers only (not full session_retrieved)
# Sources that scored in Early REM band during NREM remain available
early_chunks = retrieve("early-rem", excluded_sources=previously_retrieved | nrem_high_sources)
session_retrieved.update(c["source"] for c in early_chunks) session_retrieved.update(c["source"] for c in early_chunks)
if not early_chunks: if not early_chunks:
print("[Early REM] No suitable chunks — skipping") print("[Early REM] No suitable chunks — skipping")