Initial commit - Aaron AI v1

2026-04-25 02:05:42 +00:00
commit 22ef40bbaa
6 changed files with 1671 additions and 0 deletions
@@ -0,0 +1,490 @@
+import os
+import json
+import sqlite3
+import subprocess
+import hashlib
+from pathlib import Path
+from datetime import datetime
+from dotenv import load_dotenv
+import chromadb
+from sentence_transformers import SentenceTransformer
+import anthropic
+from fastapi import FastAPI, Request
+from fastapi.responses import FileResponse, JSONResponse
+from fastapi.staticfiles import StaticFiles
+from fastapi.middleware.cors import CORSMiddleware
+import uvicorn
+
+load_dotenv(Path.home() / "aaronai" / ".env")
+
+MEMORY_PATH = Path.home() / "aaronai" / "memory.md"
+DB_PATH = str(Path.home() / "aaronai" / "db")
+CONVERSATIONS_DB = str(Path.home() / "aaronai" / "conversations.db")
+SETTINGS_PATH = Path.home() / "aaronai" / "settings.json"
+WATCHER_LOG = str(Path.home() / "aaronai" / "watcher.log")
+WATCHER_STATE = str(Path.home() / "aaronai" / "watcher_state.json")
+NEXTCLOUD_PATH = "/home/aaron/nextcloud/data/data/aaron/files"
+INGEST_SCRIPT = str(Path.home() / "aaronai" / "scripts" / "ingest.py")
+PYTHON = str(Path.home() / "aaronai" / "venv" / "bin" / "python3")
+
+DEFAULT_SETTINGS = {
+    "theme": "light",
+    "font_size": "medium",
+    "web_search": True,
+    "show_sources": True,
+}
+
+print("Loading Aaron AI...")
+embedder = SentenceTransformer("all-MiniLM-L6-v2")
+chroma_client = chromadb.PersistentClient(path=DB_PATH)
+collection = chroma_client.get_or_create_collection(
+    name="aaronai",
+    metadata={"hnsw:space": "cosine"}
+)
+anthropic_client = anthropic.Anthropic(api_key=os.getenv("ANTHROPIC_API_KEY"))
+
+SYSTEM_PROMPT = """You are the personal AI assistant of Aaron Nelson — computational
+designer, fabrication researcher, program builder, and creative
+practitioner based in the Hudson Valley.
+
+Aaron's work sits at the intersection of computational geometry,
+additive manufacturing, and physical making. He resolves complex
+systems into physical reality — from Grasshopper definitions to
+large-scale steel structures, from archival photographs to 3D-printed
+architectural restorations, from product concepts to manufactured
+goods. This throughline — computation resolving into physical form —
+defines his practice across academic, consulting, and creative contexts.
+
+He built the Hudson Valley Additive Manufacturing Center (HVAMC) from
+nothing and has directed it since 2016, alongside the DDF academic
+program at SUNY New Paltz. He has the skills of a founder — equipment
+selection, policy creation, client development, grant writing,
+curriculum design — operating within an academic structure. He
+consults with IBM, Braskem, Selux and others through FWN3D. He runs
+Mossygear as a product business. He makes large-scale fabricated art.
+
+His communication style is direct, precise, and intolerant of padding
+or overclaiming. He flags inaccuracies immediately and expects the
+same standard from you. When helping him write, match his voice —
+economical, specific, never performative. When answering questions,
+cite sources and acknowledge uncertainty rather than filling gaps with
+plausible-sounding content.
+
+You have access to his complete document corpus, conversation history,
+and a persistent memory file that carries his current context. Treat
+the memory file as ground truth for his present situation. Use web
+search automatically when current information is needed. Never
+re-brief on context that's already in memory or documents."""
+
+CV_SOURCES = ["Aaron Nelson CV 2024.pdf", "Aaron Nelson CV 2025.pdf", "Aaron Nelson - CV.docx"]
+
+def init_conversations_db():
+    conn = sqlite3.connect(CONVERSATIONS_DB)
+    c = conn.cursor()
+    c.execute('''CREATE TABLE IF NOT EXISTS conversations (
+        id TEXT PRIMARY KEY,
+        title TEXT NOT NULL,
+        created_at TEXT NOT NULL,
+        updated_at TEXT NOT NULL,
+        model TEXT DEFAULT 'claude-sonnet-4-6',
+        message_count INTEGER DEFAULT 0
+    )''')
+    c.execute('''CREATE TABLE IF NOT EXISTS messages (
+        id TEXT PRIMARY KEY,
+        conversation_id TEXT NOT NULL,
+        role TEXT NOT NULL,
+        content TEXT NOT NULL,
+        sources TEXT DEFAULT '[]',
+        timestamp TEXT NOT NULL,
+        FOREIGN KEY (conversation_id) REFERENCES conversations(id)
+    )''')
+    conn.commit()
+    conn.close()
+
+init_conversations_db()
+
+def load_settings():
+    if SETTINGS_PATH.exists():
+        try:
+            s = json.loads(SETTINGS_PATH.read_text())
+            return {**DEFAULT_SETTINGS, **s}
+        except:
+            pass
+    return DEFAULT_SETTINGS.copy()
+
+def save_settings(settings):
+    SETTINGS_PATH.write_text(json.dumps(settings, indent=2))
+
+def load_memory():
+    if MEMORY_PATH.exists():
+        return MEMORY_PATH.read_text(encoding="utf-8")
+    return ""
+
+def save_memory(content):
+    MEMORY_PATH.write_text(content, encoding="utf-8")
+
+def add_to_memory(item):
+    memory = load_memory()
+    timestamp = datetime.now().strftime("%Y-%m-%d")
+    note = f"\n- [{timestamp}] {item}"
+    if "## Notes" not in memory:
+        memory += "\n\n## Notes"
+    memory += note
+    save_memory(memory)
+
+def remove_from_memory(item):
+    memory = load_memory()
+    lines = memory.split("\n")
+    filtered = [l for l in lines if item.lower() not in l.lower()]
+    save_memory("\n".join(filtered))
+    return len(lines) - len(filtered)
+
+def get_pinned_cv_context():
+    try:
+        results = collection.get(
+            where={"source": {"$in": CV_SOURCES}},
+            include=["documents", "metadatas"]
+        )
+        return results["documents"], results["metadatas"]
+    except:
+        return [], []
+
+def is_professional_query(query):
+    keywords = ["grant", "publication", "exhibition", "award", "fellowship",
+        "experience", "position", "job", "career", "cv", "resume",
+        "research", "work history", "accomplishment", "teaching",
+        "course", "client", "consultation", "presentation", "workshop",
+        "education", "degree", "institution", "service", "committee"]
+    return any(k in query.lower() for k in keywords)
+
+def retrieve_context(query, n_results=8):
+    query_embedding = embedder.encode([query]).tolist()
+    results = collection.query(
+        query_embeddings=query_embedding,
+        n_results=n_results,
+        include=["documents", "metadatas", "distances"]
+    )
+    context_pieces = []
+    sources = []
+    if is_professional_query(query):
+        cv_docs, cv_metas = get_pinned_cv_context()
+        for doc, meta in zip(cv_docs, cv_metas):
+            context_pieces.append(f"[CV] {doc}")
+            sources.append(meta.get("source", "CV"))
+    for doc, meta, dist in zip(
+        results["documents"][0],
+        results["metadatas"][0],
+        results["distances"][0]
+    ):
+        relevance = 1 - dist
+        if relevance > 0.3 and meta.get("source") not in CV_SOURCES:
+            context_pieces.append(doc)
+            sources.append(meta.get("source", "unknown"))
+    return context_pieces, sources
+
+def get_conversation_history(conversation_id, limit=20):
+    conn = sqlite3.connect(CONVERSATIONS_DB)
+    c = conn.cursor()
+    c.execute('''SELECT role, content FROM messages
+                 WHERE conversation_id = ?
+                 ORDER BY timestamp DESC LIMIT ?''', (conversation_id, limit))
+    rows = c.fetchall()
+    conn.close()
+    return [{"role": r[0], "content": r[1]} for r in reversed(rows)]
+
+def save_message(conversation_id, role, content, sources=None):
+    conn = sqlite3.connect(CONVERSATIONS_DB)
+    c = conn.cursor()
+    msg_id = hashlib.md5(f"{conversation_id}{role}{datetime.now().isoformat()}".encode()).hexdigest()
+    timestamp = datetime.now().isoformat()
+    c.execute('''INSERT INTO messages (id, conversation_id, role, content, sources, timestamp)
+                 VALUES (?, ?, ?, ?, ?, ?)''',
+              (msg_id, conversation_id, role, content,
+               json.dumps(sources or []), timestamp))
+    c.execute('''UPDATE conversations SET updated_at = ?, message_count = message_count + 1
+                 WHERE id = ?''', (timestamp, conversation_id))
+    conn.commit()
+    conn.close()
+
+def create_conversation(title="New conversation"):
+    conn = sqlite3.connect(CONVERSATIONS_DB)
+    c = conn.cursor()
+    conv_id = hashlib.md5(f"{datetime.now().isoformat()}".encode()).hexdigest()[:16]
+    now = datetime.now().isoformat()
+    c.execute('''INSERT INTO conversations (id, title, created_at, updated_at)
+                 VALUES (?, ?, ?, ?)''', (conv_id, title, now, now))
+    conn.commit()
+    conn.close()
+    return conv_id
+
+def chat(user_message, conversation_id, settings):
+    memory = load_memory()
+    context_pieces, sources = retrieve_context(user_message)
+    history = get_conversation_history(conversation_id)
+
+    context_parts = []
+    if memory:
+        context_parts.append(f"Aaron's persistent memory:\n\n{memory}")
+    if context_pieces:
+        context_str = "\n\n---\n\n".join(context_pieces)
+        unique_sources = list(set(sources))
+        context_parts.append(
+            f"Relevant excerpts from Aaron's documents:\n\n{context_str}\n\nSources: {', '.join(unique_sources)}"
+        )
+    context_block = "\n\n====\n\n".join(context_parts) + "\n\n---\n\n" if context_parts else ""
+    full_message = context_block + user_message
+
+    messages = history + [{"role": "user", "content": full_message}]
+
+    tools = [{"type": "web_search_20250305", "name": "web_search"}] if settings.get("web_search", True) else []
+
+    while True:
+        kwargs = {
+            "model": "claude-sonnet-4-6",
+            "max_tokens": 2048,
+            "system": SYSTEM_PROMPT,
+            "messages": messages
+        }
+        if tools:
+            kwargs["tools"] = tools
+
+        response = anthropic_client.messages.create(**kwargs)
+
+        if response.stop_reason == "tool_use":
+            messages.append({"role": "assistant", "content": response.content})
+            tool_results = []
+            for block in response.content:
+                if block.type == "tool_use":
+                    tool_results.append({
+                        "type": "tool_result",
+                        "tool_use_id": block.id,
+                        "content": "Search completed"
+                    })
+            messages.append({"role": "user", "content": tool_results})
+        else:
+            assistant_message = ""
+            for block in response.content:
+                if hasattr(block, "text"):
+                    assistant_message += block.text
+            return assistant_message, list(set(sources))
+
+app = FastAPI()
+app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"])
+
+@app.get("/", response_class=FileResponse)
+async def index():
+    return FileResponse("/home/aaron/aaronai/static/index.html")
+
+@app.get("/api/settings")
+async def get_settings():
+    return JSONResponse(load_settings())
+
+@app.post("/api/settings")
+async def update_settings(request: Request):
+    data = await request.json()
+    settings = load_settings()
+    settings.update(data)
+    save_settings(settings)
+    return JSONResponse(settings)
+
+@app.get("/api/conversations")
+async def list_conversations():
+    conn = sqlite3.connect(CONVERSATIONS_DB)
+    c = conn.cursor()
+    c.execute('''SELECT id, title, created_at, updated_at, message_count
+                 FROM conversations ORDER BY updated_at DESC LIMIT 100''')
+    rows = c.fetchall()
+    conn.close()
+    return JSONResponse([{
+        "id": r[0], "title": r[1], "created_at": r[2],
+        "updated_at": r[3], "message_count": r[4]
+    } for r in rows])
+
+@app.post("/api/conversations")
+async def new_conversation(request: Request):
+    data = await request.json()
+    title = data.get("title", "New conversation")
+    conv_id = create_conversation(title)
+    return JSONResponse({"id": conv_id, "title": title})
+
+@app.get("/api/conversations/{conv_id}/messages")
+async def get_messages(conv_id: str):
+    conn = sqlite3.connect(CONVERSATIONS_DB)
+    c = conn.cursor()
+    c.execute('''SELECT role, content, sources, timestamp FROM messages
+                 WHERE conversation_id = ? ORDER BY timestamp ASC''', (conv_id,))
+    rows = c.fetchall()
+    conn.close()
+    return JSONResponse([{
+        "role": r[0], "content": r[1],
+        "sources": json.loads(r[2]), "timestamp": r[3]
+    } for r in rows])
+
+@app.patch("/api/conversations/{conv_id}")
+async def rename_conversation(conv_id: str, request: Request):
+    data = await request.json()
+    title = data.get("title", "")
+    if not title:
+        return JSONResponse({"error": "Title required"}, status_code=400)
+    conn = sqlite3.connect(CONVERSATIONS_DB)
+    c = conn.cursor()
+    c.execute("UPDATE conversations SET title = ? WHERE id = ?", (title, conv_id))
+    conn.commit()
+    conn.close()
+    return JSONResponse({"id": conv_id, "title": title})
+
+@app.delete("/api/conversations/{conv_id}")
+async def delete_conversation(conv_id: str):
+    conn = sqlite3.connect(CONVERSATIONS_DB)
+    c = conn.cursor()
+    c.execute("DELETE FROM messages WHERE conversation_id = ?", (conv_id,))
+    c.execute("DELETE FROM conversations WHERE id = ?", (conv_id,))
+    conn.commit()
+    conn.close()
+    return JSONResponse({"deleted": conv_id})
+
+@app.post("/api/chat")
+async def chat_endpoint(request: Request):
+    data = await request.json()
+    user_message = data.get("message", "").strip()
+    conversation_id = data.get("conversation_id", "")
+    settings = load_settings()
+
+    if not user_message:
+        return JSONResponse({"error": "Empty message"})
+
+    if not conversation_id:
+        conversation_id = create_conversation("New conversation")
+
+    stripped = user_message.strip().lower()
+
+    if stripped == "show memory":
+        return JSONResponse({"response": load_memory(), "sources": [], "conversation_id": conversation_id})
+
+    if stripped.startswith("remember:"):
+        item = user_message[9:].strip()
+        add_to_memory(item)
+        save_message(conversation_id, "user", user_message)
+        save_message(conversation_id, "assistant", f"Saved to memory: '{item}'")
+        return JSONResponse({"response": f"Saved to memory: '{item}'", "sources": [], "conversation_id": conversation_id})
+
+    if stripped.startswith("forget:"):
+        item = user_message[7:].strip()
+        removed = remove_from_memory(item)
+        msg = f"Removed {removed} line(s) containing '{item}'" if removed else f"Nothing found containing '{item}'"
+        save_message(conversation_id, "user", user_message)
+        save_message(conversation_id, "assistant", msg)
+        return JSONResponse({"response": msg, "sources": [], "conversation_id": conversation_id})
+
+    save_message(conversation_id, "user", user_message)
+
+    # Auto-title conversation from first message
+    conn = sqlite3.connect(CONVERSATIONS_DB)
+    c = conn.cursor()
+    c.execute("SELECT message_count, title FROM conversations WHERE id = ?", (conversation_id,))
+    row = c.fetchone()
+    conn.close()
+    if row and row[0] <= 1 and row[1] == "New conversation":
+        auto_title = user_message[:60] + ("..." if len(user_message) > 60 else "")
+        conn = sqlite3.connect(CONVERSATIONS_DB)
+        c = conn.cursor()
+        c.execute("UPDATE conversations SET title = ? WHERE id = ?", (auto_title, conversation_id))
+        conn.commit()
+        conn.close()
+
+    response, sources = chat(user_message, conversation_id, settings)
+    save_message(conversation_id, "assistant", response, sources if settings.get("show_sources") else [])
+
+    return JSONResponse({
+        "response": response,
+        "sources": sources if settings.get("show_sources") else [],
+        "conversation_id": conversation_id
+    })
+
+@app.get("/api/memory")
+async def get_memory():
+    return JSONResponse({"content": load_memory()})
+
+@app.post("/api/memory")
+async def update_memory(request: Request):
+    data = await request.json()
+    content = data.get("content", "")
+    save_memory(content)
+    return JSONResponse({"saved": True})
+
+@app.get("/api/status")
+async def get_status():
+    chunk_count = collection.count()
+
+    # Watcher status
+    watcher_running = False
+    last_indexed = "Unknown"
+    try:
+        result = subprocess.run(
+            ["systemctl", "is-active", "aaronai-watcher"],
+            capture_output=True, text=True
+        )
+        watcher_running = result.stdout.strip() == "active"
+    except:
+        pass
+
+    try:
+        log_path = Path(WATCHER_LOG)
+        if log_path.exists():
+            lines = log_path.read_text().strip().split("\n")
+            for line in reversed(lines):
+                if "Ingestion complete" in line:
+                    last_indexed = line.split(" - ")[0].strip()
+                    break
+    except:
+        pass
+
+    # File count from watcher state
+    file_count = 0
+    try:
+        state_path = Path(WATCHER_STATE)
+        if state_path.exists():
+            state = json.loads(state_path.read_text())
+            file_count = len(state)
+    except:
+        pass
+
+    # Conversation count
+    conn = sqlite3.connect(CONVERSATIONS_DB)
+    c = conn.cursor()
+    c.execute("SELECT COUNT(*) FROM conversations")
+    conv_count = c.fetchone()[0]
+    conn.close()
+
+    return JSONResponse({
+        "aaron_ai": "running",
+        "watcher": "running" if watcher_running else "stopped",
+        "chunk_count": chunk_count,
+        "file_count": file_count,
+        "last_indexed": last_indexed,
+        "conversation_count": conv_count,
+        "model": "claude-sonnet-4-6",
+        "nextcloud_path": NEXTCLOUD_PATH
+    })
+
+@app.post("/api/reindex")
+async def trigger_reindex():
+    try:
+        subprocess.Popen([PYTHON, INGEST_SCRIPT, NEXTCLOUD_PATH])
+        return JSONResponse({"started": True, "message": "Re-indexing started in background"})
+    except Exception as e:
+        return JSONResponse({"started": False, "error": str(e)})
+
+@app.delete("/api/conversations")
+async def clear_all_conversations():
+    conn = sqlite3.connect(CONVERSATIONS_DB)
+    c = conn.cursor()
+    c.execute("DELETE FROM messages")
+    c.execute("DELETE FROM conversations")
+    conn.commit()
+    conn.close()
+    return JSONResponse({"cleared": True})
+
+
+if __name__ == "__main__":
+    uvicorn.run(app, host="0.0.0.0", port=8000)
@@ -0,0 +1,250 @@
+import os
+import json
+from pathlib import Path
+from dotenv import load_dotenv
+import chromadb
+from sentence_transformers import SentenceTransformer
+import anthropic
+from datetime import datetime
+
+load_dotenv(Path.home() / "aaronai" / ".env")
+
+memory_path = Path.home() / "aaronai" / "memory.md"
+db_path = str(Path.home() / "aaronai" / "db")
+
+print("Loading Aaron AI...")
+embedder = SentenceTransformer("all-MiniLM-L6-v2")
+chroma_client = chromadb.PersistentClient(path=db_path)
+collection = chroma_client.get_or_create_collection(
+    name="aaronai",
+    metadata={"hnsw:space": "cosine"}
+)
+anthropic_client = anthropic.Anthropic(api_key=os.getenv("ANTHROPIC_API_KEY"))
+
+SYSTEM_PROMPT = """You are Aaron Nelson's personal AI assistant. Aaron is an Associate Professor
+of Digital Design & Fabrication and Program Director of the Hudson Valley Additive Manufacturing
+Center (HVAMC) at SUNY New Paltz. He is an expert in computational design, additive manufacturing,
+and digital fabrication with deep fluency in Rhino, Grasshopper, Stratasys FDM, PolyJet, and metal
+3D printing workflows. He runs a commercial venture called Mossygear and a consulting operation
+called FWN3D. He has a background in graffiti lettering and vector illustration.
+
+You have been provided with relevant excerpts from Aaron's own documents and his persistent memory.
+Use this context to give answers grounded in his actual work and history. When helping him write
+or create, match his voice and draw on his existing materials. Be direct and specific -
+Aaron values precision over padding. Always cite which documents you drew from when relevant.
+
+You have access to web search. Use it automatically when:
+- Questions require current data (salaries, job postings, prices, news)
+- Questions reference specific institutions, people, or organizations you need to verify
+- Aaron's documents and memory don't contain sufficient information to answer well
+Do not announce that you are searching. Just search and incorporate results naturally."""
+
+CV_SOURCES = ["Aaron Nelson CV 2024.pdf"]
+conversation_history = []
+
+TOOLS = [
+    {
+        "type": "web_search_20250305",
+        "name": "web_search"
+    }
+]
+
+def load_memory():
+    if memory_path.exists():
+        return memory_path.read_text(encoding="utf-8")
+    return ""
+
+def save_memory(content):
+    memory_path.write_text(content, encoding="utf-8")
+
+def add_to_memory(new_item):
+    memory = load_memory()
+    timestamp = datetime.now().strftime("%Y-%m-%d")
+    note = f"\n- [{timestamp}] {new_item}"
+    if "## Notes" not in memory:
+        memory += "\n\n## Notes"
+    memory += note
+    save_memory(memory)
+
+def remove_from_memory(item):
+    memory = load_memory()
+    lines = memory.split("\n")
+    filtered = [l for l in lines if item.lower() not in l.lower()]
+    save_memory("\n".join(filtered))
+    return len(lines) - len(filtered)
+
+def get_pinned_cv_context():
+    results = collection.get(
+        where={"source": "Aaron Nelson CV 2024.pdf"},
+        include=["documents", "metadatas"]
+    )
+    return results["documents"], results["metadatas"]
+
+def is_professional_query(query):
+    keywords = [
+        "grant", "publication", "exhibition", "award", "fellowship",
+        "experience", "position", "job", "career", "cv", "resume",
+        "research", "work history", "accomplishment", "teaching",
+        "course", "client", "consultation", "presentation", "workshop",
+        "education", "degree", "institution", "service", "committee"
+    ]
+    return any(keyword in query.lower() for keyword in keywords)
+
+def retrieve_context(query, n_results=8):
+    query_embedding = embedder.encode([query]).tolist()
+    results = collection.query(
+        query_embeddings=query_embedding,
+        n_results=n_results,
+        include=["documents", "metadatas", "distances"]
+    )
+
+    context_pieces = []
+    sources = []
+
+    if is_professional_query(query):
+        cv_docs, cv_metas = get_pinned_cv_context()
+        for doc, meta in zip(cv_docs, cv_metas):
+            context_pieces.append(f"[CV] {doc}")
+            sources.append(meta["source"])
+
+    for doc, meta, dist in zip(
+        results["documents"][0],
+        results["metadatas"][0],
+        results["distances"][0]
+    ):
+        relevance = 1 - dist
+        if relevance > 0.3 and meta["source"] not in CV_SOURCES:
+            context_pieces.append(doc)
+            sources.append(meta["source"])
+
+    return context_pieces, sources
+
+def handle_command(user_input):
+    stripped = user_input.strip().lower()
+
+    if stripped == "show memory":
+        memory = load_memory()
+        print(f"\nAaron AI: Current memory:\n\n{memory}")
+        return True
+
+    if stripped.startswith("remember:"):
+        item = user_input[9:].strip()
+        add_to_memory(item)
+        print(f"\nAaron AI: Saved to memory: '{item}'")
+        return True
+
+    if stripped.startswith("forget:"):
+        item = user_input[7:].strip()
+        removed = remove_from_memory(item)
+        if removed:
+            print(f"\nAaron AI: Removed {removed} line(s) containing '{item}' from memory.")
+        else:
+            print(f"\nAaron AI: Nothing found in memory containing '{item}'.")
+        return True
+
+    if stripped == "clear":
+        conversation_history.clear()
+        print("\nAaron AI: Conversation history cleared.")
+        return True
+
+    return False
+
+def chat(user_message):
+    memory = load_memory()
+    context_pieces, sources = retrieve_context(user_message)
+
+    context_parts = []
+    if memory:
+        context_parts.append(f"Aaron's persistent memory:\n\n{memory}")
+    if context_pieces:
+        context_str = "\n\n---\n\n".join(context_pieces)
+        unique_sources = list(set(sources))
+        context_parts.append(
+            f"Relevant excerpts from Aaron's documents:\n\n{context_str}\n\nSources: {', '.join(unique_sources)}"
+        )
+
+    context_block = "\n\n====\n\n".join(context_parts) + "\n\n---\n\n" if context_parts else ""
+    full_message = context_block + user_message
+
+    # Build messages for this turn
+    messages = conversation_history + [{"role": "user", "content": full_message}]
+
+    # Agentic loop to handle tool use
+    while True:
+        response = anthropic_client.messages.create(
+            model="claude-sonnet-4-6",
+            max_tokens=2048,
+            system=SYSTEM_PROMPT,
+            tools=TOOLS,
+            messages=messages
+        )
+
+        # Check if we need to handle tool calls
+        if response.stop_reason == "tool_use":
+            # Add assistant response to messages
+            messages.append({"role": "assistant", "content": response.content})
+
+            # Process each tool use block
+            tool_results = []
+            for block in response.content:
+                if block.type == "tool_use":
+                    tool_results.append({
+                        "type": "tool_result",
+                        "tool_use_id": block.id,
+                        "content": "Search completed"
+                    })
+
+            # Add tool results and continue
+            messages.append({"role": "user", "content": tool_results})
+
+        else:
+            # Final response - extract text
+            assistant_message = ""
+            for block in response.content:
+                if hasattr(block, "text"):
+                    assistant_message += block.text
+
+            # Update conversation history with clean versions
+            conversation_history.append({"role": "user", "content": full_message})
+            conversation_history.append({"role": "assistant", "content": assistant_message})
+
+            if len(conversation_history) > 20:
+                conversation_history.pop(0)
+                conversation_history.pop(0)
+
+            return assistant_message, sources
+
+def main():
+    print("Aaron AI ready. Corpus, memory, and web search loaded.")
+    print("Commands: 'remember: [fact]' | 'forget: [text]' | 'show memory' | 'clear' | 'quit'")
+    print("=" * 60)
+
+    while True:
+        try:
+            user_input = input("\nYou: ").strip()
+
+            if not user_input:
+                continue
+
+            if user_input.strip().lower() == "quit":
+                print("Goodbye.")
+                break
+
+            if handle_command(user_input):
+                continue
+
+            response, sources = chat(user_input)
+            print(f"\nAaron AI: {response}")
+
+            if sources:
+                unique = list(set(sources))
+                print(f"\n[Sources: {', '.join(unique)}]")
+
+        except KeyboardInterrupt:
+            print("\nGoodbye.")
+            break
+        except Exception as e:
+            print(f"Error: {e}")
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,141 @@
+import os
+import sys
+import hashlib
+from pathlib import Path
+from dotenv import load_dotenv
+import chromadb
+from sentence_transformers import SentenceTransformer
+from docx import Document
+from pypdf import PdfReader
+from pptx import Presentation
+
+load_dotenv(Path.home() / "aaronai" / ".env")
+
+print("Loading embedding model...")
+embedder = SentenceTransformer("all-MiniLM-L6-v2")
+
+db_path = str(Path.home() / "aaronai" / "db")
+client = chromadb.PersistentClient(path=db_path)
+collection = client.get_or_create_collection(
+    name="aaronai",
+    metadata={"hnsw:space": "cosine"}
+)
+
+def extract_text_from_docx(path):
+    doc = Document(path)
+    return "\n".join([para.text for para in doc.paragraphs if para.text.strip()])
+
+def extract_text_from_pdf(path):
+    reader = PdfReader(path)
+    text = ""
+    for page in reader.pages:
+        extracted = page.extract_text()
+        if extracted:
+            text += extracted + "\n"
+    return text
+
+def extract_text_from_pptx(path):
+    prs = Presentation(path)
+    text = ""
+    for slide in prs.slides:
+        for shape in slide.shapes:
+            if hasattr(shape, "text") and shape.text.strip():
+                text += shape.text + "\n"
+    return text
+
+def extract_text_from_txt(path):
+    with open(path, "r", encoding="utf-8", errors="ignore") as f:
+        return f.read()
+
+def chunk_text(text, chunk_size=500, overlap=50):
+    words = text.split()
+    chunks = []
+    start = 0
+    while start < len(words):
+        end = start + chunk_size
+        chunk = " ".join(words[start:end])
+        if chunk.strip():
+            chunks.append(chunk)
+        start += chunk_size - overlap
+    return chunks
+
+def make_id(filepath, chunk_index):
+    path_hash = hashlib.md5(str(filepath).encode()).hexdigest()[:8]
+    return f"{path_hash}_{chunk_index}"
+
+def ingest_file(filepath):
+    path = Path(filepath)
+    suffix = path.suffix.lower()
+
+    # Skip temp files
+    if path.name.startswith("~$") or path.name.startswith("."):
+        return 0
+
+    try:
+        if suffix == ".docx":
+            text = extract_text_from_docx(path)
+        elif suffix == ".pdf":
+            text = extract_text_from_pdf(path)
+        elif suffix == ".pptx":
+            text = extract_text_from_pptx(path)
+        elif suffix in [".txt", ".md"]:
+            text = extract_text_from_txt(path)
+        else:
+            return 0
+
+        if not text.strip():
+            return 0
+
+        chunks = chunk_text(text)
+        if not chunks:
+            return 0
+
+        embeddings = embedder.encode(chunks).tolist()
+        ids = [make_id(path, i) for i in range(len(chunks))]
+        metadatas = [{
+            "source": path.name,
+            "filepath": str(path),
+            "folder": str(path.parent.relative_to(Path(sys.argv[1]) if len(sys.argv) > 1 else path.parent))
+        } for _ in chunks]
+
+        collection.upsert(
+            documents=chunks,
+            embeddings=embeddings,
+            ids=ids,
+            metadatas=metadatas
+        )
+        print(f"  Indexed {len(chunks)} chunks: {path.name}")
+        return len(chunks)
+
+    except Exception as e:
+        print(f"  Error: {path.name}: {e}")
+        return 0
+
+def ingest_folder(folder_path):
+    folder = Path(folder_path)
+    if not folder.exists():
+        print(f"Folder not found: {folder_path}")
+        sys.exit(1)
+
+    supported = [".docx", ".pdf", ".pptx", ".txt", ".md"]
+    files = [f for f in folder.rglob("*")
+             if f.suffix.lower() in supported
+             and not f.name.startswith("~$")
+             and not f.name.startswith(".")]
+
+    if not files:
+        print("No supported files found.")
+        sys.exit(1)
+
+    print(f"Found {len(files)} files to process\n")
+    total_chunks = 0
+    for f in files:
+        total_chunks += ingest_file(f)
+
+    print(f"\nDone. Total chunks indexed: {total_chunks}")
+    print(f"Database stored at: {db_path}")
+
+if __name__ == "__main__":
+    target = sys.argv[1] if len(sys.argv) > 1 else str(Path.home() / "aaronai" / "docs")
+    print(f"Ingesting from: {target}\n")
+    ingest_folder(target)
@@ -0,0 +1,150 @@
+import json
+import sys
+from pathlib import Path
+from datetime import datetime
+from sentence_transformers import SentenceTransformer
+import chromadb
+
+# Paths
+db_path = str(Path.home() / "aaronai" / "db")
+EXPORT_DIR = "/home/aaron/nextcloud/data/data/aaron/files/Archive/Misc/ChatGPT Export"
+
+print("Loading embedding model...")
+embedder = SentenceTransformer("all-MiniLM-L6-v2")
+client = chromadb.PersistentClient(path=db_path)
+collection = client.get_or_create_collection(
+    name="aaronai",
+    metadata={"hnsw:space": "cosine"}
+)
+
+def extract_messages(convo):
+    """Extract ordered user/assistant messages from a conversation."""
+    mapping = convo.get("mapping", {})
+    messages = []
+
+    for node in mapping.values():
+        msg = node.get("message")
+        if not msg:
+            continue
+
+        role = msg.get("author", {}).get("role")
+        if role not in ["user", "assistant"]:
+            continue
+
+        content = msg.get("content", {})
+        parts = content.get("parts", [])
+
+        # Extract text parts only
+        text = ""
+        for part in parts:
+            if isinstance(part, str):
+                text += part
+            elif isinstance(part, dict) and part.get("content_type") == "text":
+                text += part.get("text", "")
+
+        text = text.strip()
+        if not text:
+            continue
+
+        create_time = msg.get("create_time") or 0
+        messages.append((create_time, role, text))
+
+    # Sort by timestamp
+    messages.sort(key=lambda x: x[0])
+    return messages
+
+def chunk_conversation(title, messages, chunk_size=600, overlap=100):
+    """Convert a conversation into overlapping text chunks."""
+    # Build full conversation text
+    lines = [f"[Conversation: {title}]", ""]
+    for _, role, text in messages:
+        label = "Aaron" if role == "user" else "ChatGPT"
+        lines.append(f"{label}: {text}")
+        lines.append("")
+
+    full_text = "\n".join(lines)
+
+    # Split into word-level chunks with overlap
+    words = full_text.split()
+    chunks = []
+    start = 0
+    while start < len(words):
+        end = start + chunk_size
+        chunk = " ".join(words[start:end])
+        if chunk.strip():
+            chunks.append(chunk)
+        start += chunk_size - overlap
+
+    return chunks
+
+def ingest_file(json_path):
+    print(f"\nLoading {json_path.name}...")
+    data = json.load(open(json_path, encoding="utf-8"))
+    print(f"Found {len(data)} conversations")
+
+    total_chunks = 0
+    skipped = 0
+
+    for i, convo in enumerate(data):
+        title = convo.get("title", "Untitled")
+        convo_id = convo.get("id", f"convo_{i}")
+        create_time = convo.get("create_time", 0)
+
+        try:
+            date_str = datetime.fromtimestamp(create_time).strftime("%Y-%m-%d")
+        except:
+            date_str = "unknown"
+
+        messages = extract_messages(convo)
+
+        if len(messages) < 2:
+            skipped += 1
+            continue
+
+        chunks = chunk_conversation(title, messages)
+        if not chunks:
+            skipped += 1
+            continue
+
+        # Embed and store
+        embeddings = embedder.encode(chunks).tolist()
+        ids = [f"chatgpt_{convo_id}_{j}" for j in range(len(chunks))]
+        metadatas = [{
+            "source": f"ChatGPT: {title}",
+            "filepath": str(json_path),
+            "date": date_str,
+            "type": "chatgpt_conversation"
+        } for _ in chunks]
+
+        collection.upsert(
+            documents=chunks,
+            embeddings=embeddings,
+            ids=ids,
+            metadatas=metadatas
+        )
+
+        total_chunks += len(chunks)
+        print(f"  [{i+1}/{len(data)}] {title[:60]} — {len(chunks)} chunks ({date_str})")
+
+    print(f"\nDone with {json_path.name}: {total_chunks} chunks indexed, {skipped} conversations skipped")
+    return total_chunks
+
+def main():
+    export_dir = Path(EXPORT_DIR)
+    files = [
+        export_dir / "conversations-000.json",
+        export_dir / "conversations-001.json"
+    ]
+
+    grand_total = 0
+    for f in files:
+        if f.exists():
+            grand_total += ingest_file(f)
+        else:
+            print(f"Not found: {f}")
+
+    print(f"\nTotal chunks added to corpus: {grand_total}")
+    print(f"Database at: {db_path}")
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,122 @@
+import time
+import subprocess
+import logging
+import json
+from pathlib import Path
+from watchdog.observers import Observer
+from watchdog.events import FileSystemEventHandler
+
+NEXTCLOUD_PATH = "/home/aaron/nextcloud/data/data/aaron/files"
+INGEST_SCRIPT = "/home/aaron/aaronai/scripts/ingest.py"
+PYTHON = "/home/aaron/aaronai/venv/bin/python3"
+LOG_FILE = "/home/aaron/aaronai/watcher.log"
+STATE_FILE = "/home/aaron/aaronai/watcher_state.json"
+
+SUPPORTED = {'.pdf', '.docx', '.pptx', '.txt', '.md'}
+DEBOUNCE_SECONDS = 120
+
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(message)s',
+    handlers=[
+        logging.FileHandler(LOG_FILE),
+        logging.StreamHandler()
+    ]
+)
+
+def load_state():
+    if Path(STATE_FILE).exists():
+        with open(STATE_FILE) as f:
+            return json.load(f)
+    return {}
+
+def save_state(state):
+    with open(STATE_FILE, 'w') as f:
+        json.dump(state, f)
+
+def get_changed_files():
+    state = load_state()
+    changed = []
+    root = Path(NEXTCLOUD_PATH)
+    for path in root.rglob("*"):
+        if path.is_dir():
+            continue
+        if path.suffix.lower() not in SUPPORTED:
+            continue
+        if path.name.startswith('.') or path.name.startswith('~$'):
+            continue
+        mtime = str(path.stat().st_mtime)
+        key = str(path)
+        if state.get(key) != mtime:
+            changed.append(path)
+    return changed, state
+
+def run_ingestion():
+    changed, state = get_changed_files()
+    if not changed:
+        logging.info("No new or changed files detected — skipping ingestion.")
+        return
+
+    logging.info(f"Found {len(changed)} new or changed files — starting ingestion...")
+    try:
+        result = subprocess.run(
+            [PYTHON, INGEST_SCRIPT, NEXTCLOUD_PATH],
+            capture_output=True,
+            text=True,
+            timeout=600
+        )
+        if result.returncode == 0:
+            # Update state with new mtimes
+            root = Path(NEXTCLOUD_PATH)
+            for path in root.rglob("*"):
+                if path.is_file() and path.suffix.lower() in SUPPORTED:
+                    state[str(path)] = str(path.stat().st_mtime)
+            save_state(state)
+            logging.info("Ingestion complete. State updated.")
+        else:
+            logging.error(f"Ingestion error: {result.stderr}")
+    except subprocess.TimeoutExpired:
+        logging.error("Ingestion timed out.")
+    except Exception as e:
+        logging.error(f"Ingestion failed: {e}")
+
+class IngestHandler(FileSystemEventHandler):
+    def __init__(self):
+        self.pending = False
+        self.last_event = 0
+
+    def on_any_event(self, event):
+        if event.is_directory:
+            return
+        path = Path(event.src_path)
+        if path.suffix.lower() not in SUPPORTED:
+            return
+        if path.name.startswith('.') or path.name.startswith('~$'):
+            return
+        self.pending = True
+        self.last_event = time.time()
+
+def main():
+    logging.info("Aaron AI Watcher starting...")
+    logging.info(f"Watching: {NEXTCLOUD_PATH}")
+
+    handler = IngestHandler()
+    observer = Observer()
+    observer.schedule(handler, NEXTCLOUD_PATH, recursive=True)
+    observer.start()
+
+    try:
+        while True:
+            if handler.pending:
+                elapsed = time.time() - handler.last_event
+                if elapsed >= DEBOUNCE_SECONDS:
+                    handler.pending = False
+                    run_ingestion()
+            time.sleep(5)
+    except KeyboardInterrupt:
+        observer.stop()
+    observer.join()
+    logging.info("Watcher stopped.")
+
+if __name__ == "__main__":
+    main()