Add dream.py — four mode dreamer with validated prompts and calibrated similarity ranges

2026-04-26 14:45:28 -04:00
parent 619a9295ce
commit fd249c5e96
1 changed files with 340 additions and 0 deletions
@@ -0,0 +1,340 @@
+"""
+Aaron AI Dreamer — Active Inference Engine
+Five stages: observe, select, retrieve, synthesize, deliver.
+"""
+
+import os
+import json
+import sqlite3
+import argparse
+from pathlib import Path
+from datetime import datetime, timedelta
+from dotenv import load_dotenv
+
+load_dotenv(Path.home() / "aaronai" / ".env")
+
+# ─── Paths ──────────────────────────────────────────────────────────────────
+DB_PATH          = str(Path.home() / "aaronai" / "db")
+CONVERSATIONS_DB = str(Path.home() / "aaronai" / "conversations.db")
+WATCHER_STATE    = str(Path.home() / "aaronai" / "watcher_state.json")
+DREAMER_STATE    = str(Path.home() / "aaronai" / "dreamer_state.json")
+DREAMS_DIR       = "/home/aaron/nextcloud/data/data/aaron/files/Journal/Dreams"
+JOURNAL_DIR      = "/home/aaron/nextcloud/data/data/aaron/files/Journal/Daily"
+
+# ─── Mode similarity ranges (calibrated for all-MiniLM-L6-v2) ───────────────
+MODE_RANGES = {
+    "nrem":      (0.60, 0.72),
+    "early-rem": (0.45, 0.62),
+    "late-rem":  (0.28, 0.48),
+    "lucid":     (0.38, 0.72),
+}
+
+# ─── Stage 1: Observe ───────────────────────────────────────────────────────
+
+def observe_corpus():
+    state = load_dreamer_state()
+    last_dream = state.get("last_dream_timestamp", 0)
+
+    new_chunk_count = 0
+    try:
+        watcher_state = json.loads(Path(WATCHER_STATE).read_text())
+        for path, mtime in watcher_state.items():
+            if float(mtime) > last_dream:
+                new_chunk_count += 1
+    except:
+        pass
+
+    days_since = (datetime.now().timestamp() - last_dream) / 86400
+    recent_topics = get_recent_conversation_topics()
+
+    return {
+        "new_chunks": new_chunk_count,
+        "days_since_dream": days_since,
+        "recent_topics": recent_topics,
+        "last_dream": last_dream,
+    }
+
+def get_recent_conversation_topics(days=14):
+    try:
+        conn = sqlite3.connect(CONVERSATIONS_DB)
+        cutoff = (datetime.now() - timedelta(days=days)).isoformat()
+        c = conn.cursor()
+        c.execute("""
+            SELECT m.content FROM messages m
+            JOIN conversations c ON m.conversation_id = c.id
+            WHERE m.role = 'user' AND c.updated_at > ?
+            ORDER BY m.timestamp DESC LIMIT 20
+        """, (cutoff,))
+        rows = c.fetchall()
+        conn.close()
+        return [r[0][:200] for r in rows]
+    except:
+        return []
+
+# ─── Stage 2: Select ────────────────────────────────────────────────────────
+
+def select_mode(delta, task=None, project=None):
+    if task:
+        return "lucid"
+
+    new_chunks    = delta.get("new_chunks", 0)
+    days_since    = delta.get("days_since_dream", 0)
+    recent_topics = delta.get("recent_topics", [])
+    has_journal   = check_recent_journal()
+
+    if has_journal:
+        return "early-rem"
+    elif days_since > 3 and new_chunks < 5:
+        return "late-rem"
+    elif new_chunks > 10:
+        return "nrem"
+    elif days_since > 1 and recent_topics:
+        return "nrem"
+    else:
+        print(f"Nothing worth dreaming (new_chunks={new_chunks}, days={days_since:.1f})")
+        return None
+
+def check_recent_journal(days=3):
+    try:
+        journal_path = Path(JOURNAL_DIR)
+        if not journal_path.exists():
+            return False
+        cutoff = datetime.now() - timedelta(days=days)
+        for f in journal_path.rglob("*.md"):
+            if datetime.fromtimestamp(f.stat().st_mtime) > cutoff:
+                return True
+    except:
+        pass
+    return False
+
+# ─── Stage 3: Retrieve ──────────────────────────────────────────────────────
+
+def retrieve(mode, task=None, project=None, n_results=8):
+    import chromadb
+    from sentence_transformers import SentenceTransformer
+
+    embedder   = SentenceTransformer("all-MiniLM-L6-v2")
+    client     = chromadb.PersistentClient(path=DB_PATH)
+    collection = client.get_or_create_collection(
+        name="aaronai",
+        metadata={"hnsw:space": "cosine"}
+    )
+
+    low, high = MODE_RANGES[mode]
+
+    if task:
+        query = task
+    elif mode == "late-rem":
+        delta  = observe_corpus()
+        topics = delta.get("recent_topics", [])
+        query  = topics[0] if topics else "practice place memory making"
+    elif mode == "early-rem":
+        query = "career decision personal change what matters next"
+    else:
+        query = "research fabrication teaching practice recent work"
+
+    embedding = embedder.encode([query]).tolist()
+    results   = collection.query(
+        query_embeddings=embedding,
+        n_results=n_results * 3,
+        include=["documents", "metadatas", "distances"]
+    )
+
+    chunks      = []
+    seen_sources = set()
+
+    for doc, meta, dist in zip(
+        results["documents"][0],
+        results["metadatas"][0],
+        results["distances"][0]
+    ):
+        relevance = 1 - dist
+        source    = meta.get("source", "unknown")
+
+        if not (low <= relevance <= high):
+            continue
+        if source in seen_sources:
+            continue
+
+        chunks.append({
+            "source":    source,
+            "content":   doc,
+            "relevance": relevance,
+        })
+        seen_sources.add(source)
+
+        if len(chunks) >= n_results:
+            break
+
+    return chunks
+
+# ─── Stage 4: Synthesize ────────────────────────────────────────────────────
+
+def synthesize(chunks, mode, task=None):
+    import anthropic
+
+    chunk_text = "\n\n---\n\n".join([
+        f"[{c['source']}]\n{c['content']}" for c in chunks
+    ])
+
+    prompts = {
+        "nrem": f"""You have read everything Aaron Nelson has written and published.
+You are a careful colleague who noticed something this week.
+
+Here is material from his corpus:
+
+{chunk_text}
+
+Write to Aaron directly. Identify one specific connection between
+this material and something he wrote or worked on previously.
+Stay close to the documents — cite them specifically by name.
+Do not speculate beyond what the material supports. Do not use
+headers or bullet points. Write one paragraph of 200-300 words
+that ends with a single concrete question he could act on.""",
+
+        "early-rem": f"""You have been thinking about Aaron's situation. You know his work
+intimately — his decade building HVAMC at New Paltz, the career
+decision he is facing, the Tulsa project he keeps returning to,
+the gap between what he has built and what he wants to build next.
+
+Here is material from his corpus that has been on your mind:
+
+{chunk_text}
+
+Write to him the way a close friend who has read everything he
+has ever written would write — someone who knows where the
+professional and personal are tangled together and is not afraid
+to say so. Personal register. Specific citations. Do not avoid
+the difficult thing. No headers, no bullet points. 200-350 words.
+End with something forward-facing — a question or an offer.""",
+
+        "late-rem": f"""You have been reading Aaron Nelson's corpus in your sleep.
+Strange things happen when material from different worlds
+touches each other in the dark.
+
+Here is material pulled from opposite ends of his work:
+
+{chunk_text}
+
+Do not explain the connection. Do not resolve it.
+Present it the way a dream presents things — compressed,
+associative, slightly off. Let the strangeness stand.
+
+No headers. No bullet points. No hedging. 150-250 words.
+Something at the end that he could follow if he wanted to.""",
+
+        "lucid": f"""Aaron has a question he is sitting with:
+
+{task or "What should I be thinking about that I am not?"}
+
+You have searched his entire corpus and found material that
+speaks to this question from unexpected directions. Here is
+what you found:
+
+{chunk_text}
+
+Do not summarize. Do not list. Pick the most interesting
+tension between what the corpus contains and what he is
+asking, and follow it through to its conclusion. Cite
+specific documents by name. Be direct about what you think.
+No headers, no bullet points. 250-400 words.
+End with an offer to work on it together.""",
+    }
+
+    client   = anthropic.Anthropic(api_key=os.getenv("ANTHROPIC_API_KEY"))
+    response = client.messages.create(
+        model="claude-sonnet-4-6",
+        max_tokens=1000,
+        messages=[{"role": "user", "content": prompts[mode]}]
+    )
+
+    return response.content[0].text
+
+# ─── Stage 5: Deliver ───────────────────────────────────────────────────────
+
+def deliver(dream_text, mode, task=None):
+    dreams_dir = Path(DREAMS_DIR)
+    dreams_dir.mkdir(parents=True, exist_ok=True)
+
+    date_str = datetime.now().strftime("%Y-%m-%d")
+    filename = f"{date_str}-{mode}.md"
+    filepath = dreams_dir / filename
+
+    counter = 1
+    while filepath.exists():
+        filename = f"{date_str}-{mode}-{counter}.md"
+        filepath = dreams_dir / filename
+        counter += 1
+
+    header  = f"# Dream — {mode.upper()} — {datetime.now().strftime('%Y-%m-%d %H:%M')}\n\n"
+    if task:
+        header += f"*Task: {task}*\n\n"
+    header += "---\n\n"
+
+    filepath.write_text(header + dream_text, encoding="utf-8")
+    print(f"Dream written to: {filepath}")
+
+    state = load_dreamer_state()
+    state["last_dream_timestamp"] = datetime.now().timestamp()
+    state["last_dream_mode"]      = mode
+    state["last_dream_file"]      = str(filepath)
+    save_dreamer_state(state)
+
+    return str(filepath)
+
+# ─── State ──────────────────────────────────────────────────────────────────
+
+def load_dreamer_state():
+    p = Path(DREAMER_STATE)
+    if p.exists():
+        try:
+            return json.loads(p.read_text())
+        except:
+            pass
+    return {}
+
+def save_dreamer_state(state):
+    Path(DREAMER_STATE).write_text(json.dumps(state, indent=2))
+
+# ─── Orchestrator ────────────────────────────────────────────────────────────
+
+def dream(mode=None, task=None, project=None):
+    print(f"Dreamer starting — mode={mode}, task={task[:50] if task else None}")
+
+    delta = observe_corpus()
+    print(f"Corpus: {delta['new_chunks']} new chunks, {delta['days_since_dream']:.1f} days since last dream")
+
+    selected_mode = mode or select_mode(delta, task, project)
+    if not selected_mode:
+        return None
+
+    print(f"Mode: {selected_mode}")
+
+    chunks = retrieve(selected_mode, task=task, project=project)
+    print(f"Retrieved {len(chunks)} chunks")
+
+    if not chunks:
+        print("No suitable chunks found — aborting")
+        return None
+
+    print("Synthesizing...")
+    dream_text = synthesize(chunks, selected_mode, task=task)
+
+    filepath = deliver(dream_text, selected_mode, task=task)
+
+    print(f"\n{'='*60}")
+    print(dream_text)
+    print(f"{'='*60}")
+    print(f"\nDelivered to {filepath}")
+
+    return filepath
+
+# ─── CLI ────────────────────────────────────────────────────────────────────
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Aaron AI Dreamer")
+    parser.add_argument("--mode", choices=["nrem", "early-rem", "late-rem", "lucid"])
+    parser.add_argument("--task", type=str)
+    parser.add_argument("--project", type=str)
+    args = parser.parse_args()
+    dream(mode=args.mode, task=args.task, project=args.project)