aaronAI/scripts/dream.py

"""
Aaron AI Dreamer — Active Inference Engine
Five stages: observe, select, retrieve, synthesize, deliver.
"""

import os
import json
import sqlite3
import argparse
from pathlib import Path
from datetime import datetime, timedelta
from dotenv import load_dotenv
import psycopg2

load_dotenv(Path.home() / "aaronai" / ".env")
PG_DSN = os.getenv("PG_DSN", "dbname=aaronai user=aaronai password=aaronai_db_password host=localhost")

def get_pg():
    return psycopg2.connect(PG_DSN)

# ─── Paths ──────────────────────────────────────────────────────────────────
DB_PATH          = str(Path.home() / "aaronai" / "db")
CONVERSATIONS_DB = str(Path.home() / "aaronai" / "conversations.db")
WATCHER_STATE    = str(Path.home() / "aaronai" / "watcher_state.json")
DREAMER_STATE    = str(Path.home() / "aaronai" / "dreamer_state.json")
JOURNAL_DIR      = "/home/aaron/nextcloud/data/data/aaron/files/Journal/Daily"

# Nextcloud WebDAV config — proper API, works for any deployment
NEXTCLOUD_URL      = os.getenv("NEXTCLOUD_URL", "https://nextcloud.aaronnelson.studio")
NEXTCLOUD_USER     = os.getenv("NEXTCLOUD_USER", "aaron")
NEXTCLOUD_PASSWORD = os.getenv("NEXTCLOUD_PASSWORD", "")
DREAMS_WEBDAV      = f"{NEXTCLOUD_URL}/remote.php/dav/files/{NEXTCLOUD_USER}/Journal/Dreams"

# ─── Mode similarity ranges (calibrated for all-MiniLM-L6-v2) ───────────────
MODE_RANGES = {
    "nrem":      (0.48, 0.72),
    "early-rem": (0.38, 0.55),
    "late-rem":  (0.22, 0.42),
    "lucid":     (0.32, 0.72),
}

# ─── Stage 1: Observe ───────────────────────────────────────────────────────

def observe_corpus():
    state = load_dreamer_state()
    last_dream = state.get("last_dream_timestamp", 0)

    new_chunk_count = 0
    try:
        watcher_state = json.loads(Path(WATCHER_STATE).read_text())
        for path, mtime in watcher_state.items():
            if float(mtime) > last_dream:
                new_chunk_count += 1
    except:
        pass

    days_since = (datetime.now().timestamp() - last_dream) / 86400
    recent_topics = get_recent_conversation_topics()

    return {
        "new_chunks": new_chunk_count,
        "days_since_dream": days_since,
        "recent_topics": recent_topics,
        "last_dream": last_dream,
    }

def get_recent_conversation_topics(days=14):
    try:
        conn = sqlite3.connect(CONVERSATIONS_DB)
        cutoff = (datetime.now() - timedelta(days=days)).isoformat()
        c = conn.cursor()
        c.execute("""
            SELECT m.content FROM messages m
            JOIN conversations c ON m.conversation_id = c.id
            WHERE m.role = 'user' AND c.updated_at > ?
            ORDER BY m.timestamp DESC LIMIT 20
        """, (cutoff,))
        rows = c.fetchall()
        conn.close()
        return [r[0][:200] for r in rows]
    except:
        return []

# ─── Stage 2: Select ────────────────────────────────────────────────────────

def select_mode(delta, task=None, project=None):
    if task:
        return "lucid"

    new_chunks    = delta.get("new_chunks", 0)
    days_since    = delta.get("days_since_dream", 0)
    recent_topics = delta.get("recent_topics", [])
    has_journal   = check_recent_journal()

    if has_journal:
        return "early-rem"
    elif days_since > 3 and new_chunks < 5:
        return "late-rem"
    elif new_chunks > 10:
        return "nrem"
    elif days_since > 1 and recent_topics:
        return "nrem"
    else:
        print(f"Nothing worth dreaming (new_chunks={new_chunks}, days={days_since:.1f})")
        return None

def check_recent_journal(days=3):
    try:
        journal_path = Path(JOURNAL_DIR)
        if not journal_path.exists():
            return False
        cutoff = datetime.now() - timedelta(days=days)
        for f in journal_path.rglob("*.md"):
            if datetime.fromtimestamp(f.stat().st_mtime) > cutoff:
                return True
    except:
        pass
    return False

# ─── Stage 3: Retrieve ──────────────────────────────────────────────────────

def retrieve(mode, task=None, project=None, n_results=8):
    from sentence_transformers import SentenceTransformer

    embedder = SentenceTransformer("all-MiniLM-L6-v2")
    low, high = MODE_RANGES[mode]

    if task:
        query = task
    elif mode == "late-rem":
        delta  = observe_corpus()
        topics = delta.get("recent_topics", [])
        query  = topics[0] if topics else "practice place memory making"
    elif mode == "early-rem":
        query = "career decision personal change what matters next"
    else:
        query = "research fabrication teaching practice recent work"

    embedding = embedder.encode([query]).tolist()[0]

    chunks = []
    seen_sources = set()

    try:
        pg = get_pg()
        cur = pg.cursor()
        cur.execute("""
            SELECT document, source, 1 - (embedding <=> %s::vector) as similarity
            FROM embeddings
            ORDER BY embedding <=> %s::vector
            LIMIT %s
        """, (embedding, embedding, n_results * 3))

        for doc, source, similarity in cur.fetchall():
            if not (low <= similarity <= high):
                continue
            if source in seen_sources:
                continue
            chunks.append({
                "source":    source or "unknown",
                "content":   doc,
                "relevance": similarity,
            })
            seen_sources.add(source)
            if len(chunks) >= n_results:
                break
        pg.close()
    except Exception as e:
        print(f"pgvector retrieval error: {e}")

    return chunks

# ─── Stage 4: Synthesize ────────────────────────────────────────────────────

def synthesize(chunks, mode, task=None):
    import anthropic

    chunk_text = "\n\n---\n\n".join([
        f"[{c['source']}]\n{c['content']}" for c in chunks
    ])

    prompts = {
        "nrem": f"""You have read everything Aaron Nelson has written and published.
You are a careful colleague who noticed something this week.

Here is material from his corpus:

{chunk_text}

Write to Aaron directly. Identify one specific connection between
this material and something he wrote or worked on previously.
Stay close to the documents — cite them specifically by name.
Do not speculate beyond what the material supports. Do not use
headers or bullet points. Write one paragraph of 200-300 words
that ends with a single concrete question he could act on.""",

        "early-rem": f"""You have been thinking about Aaron's situation. You know his work
intimately — his decade building HVAMC at New Paltz, the career
decision he is facing, the Tulsa project he keeps returning to,
the gap between what he has built and what he wants to build next.

Here is material from his corpus that has been on your mind:

{chunk_text}

Write to him the way a close friend who has read everything he
has ever written would write — someone who knows where the
professional and personal are tangled together and is not afraid
to say so. Personal register. Specific citations. Do not avoid
the difficult thing. No headers, no bullet points. 200-350 words.
End with something forward-facing — a question or an offer.""",

        "late-rem": f"""You have been reading Aaron Nelson's corpus in your sleep.
Strange things happen when material from different worlds
touches each other in the dark.

Here is material pulled from opposite ends of his work:

{chunk_text}

Do not explain the connection. Do not resolve it.
Present it the way a dream presents things — compressed,
associative, slightly off. Let the strangeness stand.

No headers. No bullet points. No hedging. 150-250 words.
Something at the end that he could follow if he wanted to.""",

        "lucid": f"""Aaron has a question he is sitting with:

{task or "What should I be thinking about that I am not?"}

You have searched his entire corpus and found material that
speaks to this question from unexpected directions. Here is
what you found:

{chunk_text}

Do not summarize. Do not list. Pick the most interesting
tension between what the corpus contains and what he is
asking, and follow it through to its conclusion. Cite
specific documents by name. Be direct about what you think.
No headers, no bullet points. 250-400 words.
End with an offer to work on it together.""",
    }

    client   = anthropic.Anthropic(api_key=os.getenv("ANTHROPIC_API_KEY"))
    response = client.messages.create(
        model="claude-sonnet-4-6",
        max_tokens=1000,
        messages=[{"role": "user", "content": prompts[mode]}]
    )

    return response.content[0].text

# ─── Stage 5: Deliver ───────────────────────────────────────────────────────

def deliver(dream_text, mode, task=None):
    import requests

    date_str = datetime.now().strftime("%Y-%m-%d")
    filename = f"{date_str}-{mode}.md"

    header  = f"# Dream — {mode.upper()} — {datetime.now().strftime('%Y-%m-%d %H:%M')}\n\n"
    if task:
        header += f"*Task: {task}*\n\n"
    header += "---\n\n"

    content = header + dream_text

    # Ensure Dreams folder exists via WebDAV MKCOL
    auth = (NEXTCLOUD_USER, NEXTCLOUD_PASSWORD)
    requests.request("MKCOL", DREAMS_WEBDAV, auth=auth, timeout=10)

    # Write file via WebDAV PUT — handles any deployment
    url = f"{DREAMS_WEBDAV}/{filename}"

    # Handle filename collision
    counter = 1
    while True:
        check = requests.request("PROPFIND", url, auth=auth, timeout=10)
        if check.status_code == 404:
            break
        filename = f"{date_str}-{mode}-{counter}.md"
        url = f"{DREAMS_WEBDAV}/{filename}"
        counter += 1

    response = requests.put(url, data=content.encode("utf-8"), auth=auth, timeout=30)
    response.raise_for_status()

    print(f"Dream written to Nextcloud: Journal/Dreams/{filename}")

    # Notify any open browser connections via SSE
    try:
        import requests as _req
        _req.post("http://localhost:8000/api/events/notify", json={
            "type": "dream",
            "mode": mode,
            "filename": filename,
            "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M"),
        }, timeout=3)
    except Exception as _e:
        print(f"SSE notify failed (non-critical): {_e}")

    state = load_dreamer_state()
    state["last_dream_timestamp"] = datetime.now().timestamp()
    state["last_dream_mode"]      = mode
    state["last_dream_file"]      = f"Journal/Dreams/{filename}"
    save_dreamer_state(state)

    return f"Journal/Dreams/{filename}"

# ─── State ──────────────────────────────────────────────────────────────────

def load_dreamer_state():
    p = Path(DREAMER_STATE)
    if p.exists():
        try:
            return json.loads(p.read_text())
        except:
            pass
    return {}

def save_dreamer_state(state):
    Path(DREAMER_STATE).write_text(json.dumps(state, indent=2))

# ─── Orchestrator ────────────────────────────────────────────────────────────

def dream(mode=None, task=None, project=None):
    print(f"Dreamer starting — mode={mode}, task={task[:50] if task else None}")

    delta = observe_corpus()
    print(f"Corpus: {delta['new_chunks']} new chunks, {delta['days_since_dream']:.1f} days since last dream")

    selected_mode = mode or select_mode(delta, task, project)
    if not selected_mode:
        return None

    print(f"Mode: {selected_mode}")

    chunks = retrieve(selected_mode, task=task, project=project)
    print(f"Retrieved {len(chunks)} chunks")

    if not chunks:
        print("No suitable chunks found — aborting")
        return None

    print("Synthesizing...")
    dream_text = synthesize(chunks, selected_mode, task=task)

    filepath = deliver(dream_text, selected_mode, task=task)

    print(f"\n{'='*60}")
    print(dream_text)
    print(f"{'='*60}")
    print(f"\nDelivered to {filepath}")

    return filepath

# ─── CLI ────────────────────────────────────────────────────────────────────

if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Aaron AI Dreamer")
    parser.add_argument("--mode", choices=["nrem", "early-rem", "late-rem", "lucid"])
    parser.add_argument("--task", type=str)
    parser.add_argument("--project", type=str)
    args = parser.parse_args()
    dream(mode=args.mode, task=args.task, project=args.project)