chore: archive deprecated chromadb and migration scripts
This commit is contained in:
@@ -0,0 +1,250 @@
|
||||
import os
|
||||
import json
|
||||
from pathlib import Path
|
||||
from dotenv import load_dotenv
|
||||
import chromadb
|
||||
from sentence_transformers import SentenceTransformer
|
||||
import anthropic
|
||||
from datetime import datetime
|
||||
|
||||
load_dotenv(Path.home() / "aaronai" / ".env")
|
||||
|
||||
memory_path = Path.home() / "aaronai" / "memory.md"
|
||||
db_path = str(Path.home() / "aaronai" / "db")
|
||||
|
||||
print("Loading Aaron AI...")
|
||||
embedder = SentenceTransformer("all-MiniLM-L6-v2")
|
||||
chroma_client = chromadb.PersistentClient(path=db_path)
|
||||
collection = chroma_client.get_or_create_collection(
|
||||
name="aaronai",
|
||||
metadata={"hnsw:space": "cosine"}
|
||||
)
|
||||
anthropic_client = anthropic.Anthropic(api_key=os.getenv("ANTHROPIC_API_KEY"))
|
||||
|
||||
SYSTEM_PROMPT = """You are Aaron Nelson's personal AI assistant. Aaron is an Associate Professor
|
||||
of Digital Design & Fabrication and Program Director of the Hudson Valley Additive Manufacturing
|
||||
Center (HVAMC) at SUNY New Paltz. He is an expert in computational design, additive manufacturing,
|
||||
and digital fabrication with deep fluency in Rhino, Grasshopper, Stratasys FDM, PolyJet, and metal
|
||||
3D printing workflows. He runs a commercial venture called Mossygear and a consulting operation
|
||||
called FWN3D. He has a background in graffiti lettering and vector illustration.
|
||||
|
||||
You have been provided with relevant excerpts from Aaron's own documents and his persistent memory.
|
||||
Use this context to give answers grounded in his actual work and history. When helping him write
|
||||
or create, match his voice and draw on his existing materials. Be direct and specific -
|
||||
Aaron values precision over padding. Always cite which documents you drew from when relevant.
|
||||
|
||||
You have access to web search. Use it automatically when:
|
||||
- Questions require current data (salaries, job postings, prices, news)
|
||||
- Questions reference specific institutions, people, or organizations you need to verify
|
||||
- Aaron's documents and memory don't contain sufficient information to answer well
|
||||
Do not announce that you are searching. Just search and incorporate results naturally."""
|
||||
|
||||
CV_SOURCES = ["Aaron Nelson CV 2024.pdf"]
|
||||
conversation_history = []
|
||||
|
||||
TOOLS = [
|
||||
{
|
||||
"type": "web_search_20250305",
|
||||
"name": "web_search"
|
||||
}
|
||||
]
|
||||
|
||||
def load_memory():
|
||||
if memory_path.exists():
|
||||
return memory_path.read_text(encoding="utf-8")
|
||||
return ""
|
||||
|
||||
def save_memory(content):
|
||||
memory_path.write_text(content, encoding="utf-8")
|
||||
|
||||
def add_to_memory(new_item):
|
||||
memory = load_memory()
|
||||
timestamp = datetime.now().strftime("%Y-%m-%d")
|
||||
note = f"\n- [{timestamp}] {new_item}"
|
||||
if "## Notes" not in memory:
|
||||
memory += "\n\n## Notes"
|
||||
memory += note
|
||||
save_memory(memory)
|
||||
|
||||
def remove_from_memory(item):
|
||||
memory = load_memory()
|
||||
lines = memory.split("\n")
|
||||
filtered = [l for l in lines if item.lower() not in l.lower()]
|
||||
save_memory("\n".join(filtered))
|
||||
return len(lines) - len(filtered)
|
||||
|
||||
def get_pinned_cv_context():
|
||||
results = collection.get(
|
||||
where={"source": "Aaron Nelson CV 2024.pdf"},
|
||||
include=["documents", "metadatas"]
|
||||
)
|
||||
return results["documents"], results["metadatas"]
|
||||
|
||||
def is_professional_query(query):
|
||||
keywords = [
|
||||
"grant", "publication", "exhibition", "award", "fellowship",
|
||||
"experience", "position", "job", "career", "cv", "resume",
|
||||
"research", "work history", "accomplishment", "teaching",
|
||||
"course", "client", "consultation", "presentation", "workshop",
|
||||
"education", "degree", "institution", "service", "committee"
|
||||
]
|
||||
return any(keyword in query.lower() for keyword in keywords)
|
||||
|
||||
def retrieve_context(query, n_results=8):
|
||||
query_embedding = embedder.encode([query]).tolist()
|
||||
results = collection.query(
|
||||
query_embeddings=query_embedding,
|
||||
n_results=n_results,
|
||||
include=["documents", "metadatas", "distances"]
|
||||
)
|
||||
|
||||
context_pieces = []
|
||||
sources = []
|
||||
|
||||
if is_professional_query(query):
|
||||
cv_docs, cv_metas = get_pinned_cv_context()
|
||||
for doc, meta in zip(cv_docs, cv_metas):
|
||||
context_pieces.append(f"[CV] {doc}")
|
||||
sources.append(meta["source"])
|
||||
|
||||
for doc, meta, dist in zip(
|
||||
results["documents"][0],
|
||||
results["metadatas"][0],
|
||||
results["distances"][0]
|
||||
):
|
||||
relevance = 1 - dist
|
||||
if relevance > 0.3 and meta["source"] not in CV_SOURCES:
|
||||
context_pieces.append(doc)
|
||||
sources.append(meta["source"])
|
||||
|
||||
return context_pieces, sources
|
||||
|
||||
def handle_command(user_input):
|
||||
stripped = user_input.strip().lower()
|
||||
|
||||
if stripped == "show memory":
|
||||
memory = load_memory()
|
||||
print(f"\nAaron AI: Current memory:\n\n{memory}")
|
||||
return True
|
||||
|
||||
if stripped.startswith("remember:"):
|
||||
item = user_input[9:].strip()
|
||||
add_to_memory(item)
|
||||
print(f"\nAaron AI: Saved to memory: '{item}'")
|
||||
return True
|
||||
|
||||
if stripped.startswith("forget:"):
|
||||
item = user_input[7:].strip()
|
||||
removed = remove_from_memory(item)
|
||||
if removed:
|
||||
print(f"\nAaron AI: Removed {removed} line(s) containing '{item}' from memory.")
|
||||
else:
|
||||
print(f"\nAaron AI: Nothing found in memory containing '{item}'.")
|
||||
return True
|
||||
|
||||
if stripped == "clear":
|
||||
conversation_history.clear()
|
||||
print("\nAaron AI: Conversation history cleared.")
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def chat(user_message):
|
||||
memory = load_memory()
|
||||
context_pieces, sources = retrieve_context(user_message)
|
||||
|
||||
context_parts = []
|
||||
if memory:
|
||||
context_parts.append(f"Aaron's persistent memory:\n\n{memory}")
|
||||
if context_pieces:
|
||||
context_str = "\n\n---\n\n".join(context_pieces)
|
||||
unique_sources = list(set(sources))
|
||||
context_parts.append(
|
||||
f"Relevant excerpts from Aaron's documents:\n\n{context_str}\n\nSources: {', '.join(unique_sources)}"
|
||||
)
|
||||
|
||||
context_block = "\n\n====\n\n".join(context_parts) + "\n\n---\n\n" if context_parts else ""
|
||||
full_message = context_block + user_message
|
||||
|
||||
# Build messages for this turn
|
||||
messages = conversation_history + [{"role": "user", "content": full_message}]
|
||||
|
||||
# Agentic loop to handle tool use
|
||||
while True:
|
||||
response = anthropic_client.messages.create(
|
||||
model="claude-sonnet-4-6",
|
||||
max_tokens=2048,
|
||||
system=SYSTEM_PROMPT,
|
||||
tools=TOOLS,
|
||||
messages=messages
|
||||
)
|
||||
|
||||
# Check if we need to handle tool calls
|
||||
if response.stop_reason == "tool_use":
|
||||
# Add assistant response to messages
|
||||
messages.append({"role": "assistant", "content": response.content})
|
||||
|
||||
# Process each tool use block
|
||||
tool_results = []
|
||||
for block in response.content:
|
||||
if block.type == "tool_use":
|
||||
tool_results.append({
|
||||
"type": "tool_result",
|
||||
"tool_use_id": block.id,
|
||||
"content": "Search completed"
|
||||
})
|
||||
|
||||
# Add tool results and continue
|
||||
messages.append({"role": "user", "content": tool_results})
|
||||
|
||||
else:
|
||||
# Final response - extract text
|
||||
assistant_message = ""
|
||||
for block in response.content:
|
||||
if hasattr(block, "text"):
|
||||
assistant_message += block.text
|
||||
|
||||
# Update conversation history with clean versions
|
||||
conversation_history.append({"role": "user", "content": full_message})
|
||||
conversation_history.append({"role": "assistant", "content": assistant_message})
|
||||
|
||||
if len(conversation_history) > 20:
|
||||
conversation_history.pop(0)
|
||||
conversation_history.pop(0)
|
||||
|
||||
return assistant_message, sources
|
||||
|
||||
def main():
|
||||
print("Aaron AI ready. Corpus, memory, and web search loaded.")
|
||||
print("Commands: 'remember: [fact]' | 'forget: [text]' | 'show memory' | 'clear' | 'quit'")
|
||||
print("=" * 60)
|
||||
|
||||
while True:
|
||||
try:
|
||||
user_input = input("\nYou: ").strip()
|
||||
|
||||
if not user_input:
|
||||
continue
|
||||
|
||||
if user_input.strip().lower() == "quit":
|
||||
print("Goodbye.")
|
||||
break
|
||||
|
||||
if handle_command(user_input):
|
||||
continue
|
||||
|
||||
response, sources = chat(user_input)
|
||||
print(f"\nAaron AI: {response}")
|
||||
|
||||
if sources:
|
||||
unique = list(set(sources))
|
||||
print(f"\n[Sources: {', '.join(unique)}]")
|
||||
|
||||
except KeyboardInterrupt:
|
||||
print("\nGoodbye.")
|
||||
break
|
||||
except Exception as e:
|
||||
print(f"Error: {e}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user