diff --git a/scripts/dream.py b/scripts/dream.py
index 7c34b4e..41a78a5 100644
--- a/scripts/dream.py
+++ b/scripts/dream.py
@@ -63,6 +63,11 @@ def prompt_hash(prompts: list[str]) -> str:
     combined = "".join(prompts)
     return hashlib.md5(combined.encode()).hexdigest()[:8]
 
+def extract_folder(source_path):
+    """Extract top-level Nextcloud folder from source path."""
+    parts = source_path.replace("\\", "/").split("/")
+    return parts[0] if parts else "unknown"
+
 # ─── Stage 1: Observe ───────────────────────────────────────────────────────
 
 def observe_corpus():
@@ -408,12 +413,17 @@ def dream_pipeline():
     print(f"[NREM] Retrieved {len(nrem_chunks)} chunks. Synthesizing...")
     nrem_output = synthesize_nrem(nrem_chunks)
     nrem_file = deliver(nrem_output, "nrem")
+    nrem_sources = [c["source"] for c in nrem_chunks]
+    nrem_folders = list({extract_folder(s) for s in nrem_sources})
     stage_data = {
         "nrem": {
             "chunks_retrieved": len(nrem_chunks),
             "avg_similarity": round(sum(c["relevance"] for c in nrem_chunks) / len(nrem_chunks), 3),
             "query": "research fabrication teaching practice recent work",
             "word_count": len(nrem_output.split()),
+            "sources": nrem_sources,
+            "distinct_folders": nrem_folders,
+            "folder_count": len(nrem_folders),
             "status": "ok",
         }
     }
@@ -430,11 +440,16 @@ def dream_pipeline():
         print(f"[Early REM] Retrieved {len(early_chunks)} chunks. Synthesizing with NREM context...")
         early_rem_output = synthesize_early_rem(early_chunks, nrem_output)
         deliver(early_rem_output, "early-rem")
+        early_sources = [c["source"] for c in early_chunks]
+        early_folders = list({extract_folder(s) for s in early_sources})
         stage_data["early_rem"] = {
             "chunks_retrieved": len(early_chunks),
             "avg_similarity": round(sum(c["relevance"] for c in early_chunks) / len(early_chunks), 3),
             "query": "career decision personal change what matters next",
             "word_count": len(early_rem_output.split()),
+            "sources": early_sources,
+            "distinct_folders": early_folders,
+            "folder_count": len(early_folders),
             "status": "ok",
         }
         print(f"[Early REM] Done.\n{early_rem_output[:200]}...")
@@ -450,11 +465,22 @@ def dream_pipeline():
         print(f"[Late REM] Retrieved {len(late_chunks)} chunks. Synthesizing with full context...")
         late_rem_output = synthesize_late_rem(late_chunks, nrem_output, early_rem_output)
         deliver(late_rem_output, "late-rem")
+        late_sources = [c["source"] for c in late_chunks]
+        late_folders = [extract_folder(s) for s in late_sources]
+        cross_domain_pairs = sum(
+            1 for i in range(len(late_folders))
+            for j in range(i+1, len(late_folders))
+            if late_folders[i] != late_folders[j]
+        )
         stage_data["late_rem"] = {
             "chunks_retrieved": len(late_chunks),
             "avg_similarity": round(sum(c["relevance"] for c in late_chunks) / len(late_chunks), 3),
             "query": "practice place memory making",
             "word_count": len(late_rem_output.split()),
+            "sources": late_sources,
+            "distinct_folders": list(set(late_folders)),
+            "folder_count": len(set(late_folders)),
+            "cross_domain_pairs": cross_domain_pairs,
             "status": "ok",
         }
         print(f"[Late REM] Done.\n{late_rem_output[:200]}...")
@@ -474,10 +500,18 @@ def dream_pipeline():
     print(f"{'='*60}")
 
     # Write manifest
+    all_session_sources = list(session_retrieved)
+    all_session_folders = list({extract_folder(s) for s in all_session_sources})
     corpus_data = {
         "total_chunks": delta.get("new_chunks", 0),
         "new_chunks_since_last_dream": delta.get("new_chunks", 0),
         "days_since_last_dream": round(delta.get("days_since_dream", 0), 2),
+        "substrate": "pgvector",
+        "aggregate": {
+            "total_distinct_sources": len(all_session_sources),
+            "total_distinct_folders": len(all_session_folders),
+            "folders_touched": all_session_folders,
+        }
     }
     write_manifest(datetime.now().strftime("%Y-%m-%d"), stage_data, corpus_data)