chore: archive deprecated chromadb and migration scripts

2026-04-28 00:15:46 +00:00
parent d5b5c2ec14
commit 037d747573
10 changed files with 486 additions and 11 deletions
@@ -0,0 +1,250 @@
+import os
+import json
+from pathlib import Path
+from dotenv import load_dotenv
+import chromadb
+from sentence_transformers import SentenceTransformer
+import anthropic
+from datetime import datetime
+
+load_dotenv(Path.home() / "aaronai" / ".env")
+
+memory_path = Path.home() / "aaronai" / "memory.md"
+db_path = str(Path.home() / "aaronai" / "db")
+
+print("Loading Aaron AI...")
+embedder = SentenceTransformer("all-MiniLM-L6-v2")
+chroma_client = chromadb.PersistentClient(path=db_path)
+collection = chroma_client.get_or_create_collection(
+    name="aaronai",
+    metadata={"hnsw:space": "cosine"}
+)
+anthropic_client = anthropic.Anthropic(api_key=os.getenv("ANTHROPIC_API_KEY"))
+
+SYSTEM_PROMPT = """You are Aaron Nelson's personal AI assistant. Aaron is an Associate Professor
+of Digital Design & Fabrication and Program Director of the Hudson Valley Additive Manufacturing
+Center (HVAMC) at SUNY New Paltz. He is an expert in computational design, additive manufacturing,
+and digital fabrication with deep fluency in Rhino, Grasshopper, Stratasys FDM, PolyJet, and metal
+3D printing workflows. He runs a commercial venture called Mossygear and a consulting operation
+called FWN3D. He has a background in graffiti lettering and vector illustration.
+
+You have been provided with relevant excerpts from Aaron's own documents and his persistent memory.
+Use this context to give answers grounded in his actual work and history. When helping him write
+or create, match his voice and draw on his existing materials. Be direct and specific -
+Aaron values precision over padding. Always cite which documents you drew from when relevant.
+
+You have access to web search. Use it automatically when:
+- Questions require current data (salaries, job postings, prices, news)
+- Questions reference specific institutions, people, or organizations you need to verify
+- Aaron's documents and memory don't contain sufficient information to answer well
+Do not announce that you are searching. Just search and incorporate results naturally."""
+
+CV_SOURCES = ["Aaron Nelson CV 2024.pdf"]
+conversation_history = []
+
+TOOLS = [
+    {
+        "type": "web_search_20250305",
+        "name": "web_search"
+    }
+]
+
+def load_memory():
+    if memory_path.exists():
+        return memory_path.read_text(encoding="utf-8")
+    return ""
+
+def save_memory(content):
+    memory_path.write_text(content, encoding="utf-8")
+
+def add_to_memory(new_item):
+    memory = load_memory()
+    timestamp = datetime.now().strftime("%Y-%m-%d")
+    note = f"\n- [{timestamp}] {new_item}"
+    if "## Notes" not in memory:
+        memory += "\n\n## Notes"
+    memory += note
+    save_memory(memory)
+
+def remove_from_memory(item):
+    memory = load_memory()
+    lines = memory.split("\n")
+    filtered = [l for l in lines if item.lower() not in l.lower()]
+    save_memory("\n".join(filtered))
+    return len(lines) - len(filtered)
+
+def get_pinned_cv_context():
+    results = collection.get(
+        where={"source": "Aaron Nelson CV 2024.pdf"},
+        include=["documents", "metadatas"]
+    )
+    return results["documents"], results["metadatas"]
+
+def is_professional_query(query):
+    keywords = [
+        "grant", "publication", "exhibition", "award", "fellowship",
+        "experience", "position", "job", "career", "cv", "resume",
+        "research", "work history", "accomplishment", "teaching",
+        "course", "client", "consultation", "presentation", "workshop",
+        "education", "degree", "institution", "service", "committee"
+    ]
+    return any(keyword in query.lower() for keyword in keywords)
+
+def retrieve_context(query, n_results=8):
+    query_embedding = embedder.encode([query]).tolist()
+    results = collection.query(
+        query_embeddings=query_embedding,
+        n_results=n_results,
+        include=["documents", "metadatas", "distances"]
+    )
+
+    context_pieces = []
+    sources = []
+
+    if is_professional_query(query):
+        cv_docs, cv_metas = get_pinned_cv_context()
+        for doc, meta in zip(cv_docs, cv_metas):
+            context_pieces.append(f"[CV] {doc}")
+            sources.append(meta["source"])
+
+    for doc, meta, dist in zip(
+        results["documents"][0],
+        results["metadatas"][0],
+        results["distances"][0]
+    ):
+        relevance = 1 - dist
+        if relevance > 0.3 and meta["source"] not in CV_SOURCES:
+            context_pieces.append(doc)
+            sources.append(meta["source"])
+
+    return context_pieces, sources
+
+def handle_command(user_input):
+    stripped = user_input.strip().lower()
+
+    if stripped == "show memory":
+        memory = load_memory()
+        print(f"\nAaron AI: Current memory:\n\n{memory}")
+        return True
+
+    if stripped.startswith("remember:"):
+        item = user_input[9:].strip()
+        add_to_memory(item)
+        print(f"\nAaron AI: Saved to memory: '{item}'")
+        return True
+
+    if stripped.startswith("forget:"):
+        item = user_input[7:].strip()
+        removed = remove_from_memory(item)
+        if removed:
+            print(f"\nAaron AI: Removed {removed} line(s) containing '{item}' from memory.")
+        else:
+            print(f"\nAaron AI: Nothing found in memory containing '{item}'.")
+        return True
+
+    if stripped == "clear":
+        conversation_history.clear()
+        print("\nAaron AI: Conversation history cleared.")
+        return True
+
+    return False
+
+def chat(user_message):
+    memory = load_memory()
+    context_pieces, sources = retrieve_context(user_message)
+
+    context_parts = []
+    if memory:
+        context_parts.append(f"Aaron's persistent memory:\n\n{memory}")
+    if context_pieces:
+        context_str = "\n\n---\n\n".join(context_pieces)
+        unique_sources = list(set(sources))
+        context_parts.append(
+            f"Relevant excerpts from Aaron's documents:\n\n{context_str}\n\nSources: {', '.join(unique_sources)}"
+        )
+
+    context_block = "\n\n====\n\n".join(context_parts) + "\n\n---\n\n" if context_parts else ""
+    full_message = context_block + user_message
+
+    # Build messages for this turn
+    messages = conversation_history + [{"role": "user", "content": full_message}]
+
+    # Agentic loop to handle tool use
+    while True:
+        response = anthropic_client.messages.create(
+            model="claude-sonnet-4-6",
+            max_tokens=2048,
+            system=SYSTEM_PROMPT,
+            tools=TOOLS,
+            messages=messages
+        )
+
+        # Check if we need to handle tool calls
+        if response.stop_reason == "tool_use":
+            # Add assistant response to messages
+            messages.append({"role": "assistant", "content": response.content})
+
+            # Process each tool use block
+            tool_results = []
+            for block in response.content:
+                if block.type == "tool_use":
+                    tool_results.append({
+                        "type": "tool_result",
+                        "tool_use_id": block.id,
+                        "content": "Search completed"
+                    })
+
+            # Add tool results and continue
+            messages.append({"role": "user", "content": tool_results})
+
+        else:
+            # Final response - extract text
+            assistant_message = ""
+            for block in response.content:
+                if hasattr(block, "text"):
+                    assistant_message += block.text
+
+            # Update conversation history with clean versions
+            conversation_history.append({"role": "user", "content": full_message})
+            conversation_history.append({"role": "assistant", "content": assistant_message})
+
+            if len(conversation_history) > 20:
+                conversation_history.pop(0)
+                conversation_history.pop(0)
+
+            return assistant_message, sources
+
+def main():
+    print("Aaron AI ready. Corpus, memory, and web search loaded.")
+    print("Commands: 'remember: [fact]' | 'forget: [text]' | 'show memory' | 'clear' | 'quit'")
+    print("=" * 60)
+
+    while True:
+        try:
+            user_input = input("\nYou: ").strip()
+
+            if not user_input:
+                continue
+
+            if user_input.strip().lower() == "quit":
+                print("Goodbye.")
+                break
+
+            if handle_command(user_input):
+                continue
+
+            response, sources = chat(user_input)
+            print(f"\nAaron AI: {response}")
+
+            if sources:
+                unique = list(set(sources))
+                print(f"\n[Sources: {', '.join(unique)}]")
+
+        except KeyboardInterrupt:
+            print("\nGoodbye.")
+            break
+        except Exception as e:
+            print(f"Error: {e}")
+
+if __name__ == "__main__":
+    main()