diff --git a/scripts/api.py b/scripts/api.py index 9c86727..59a38dd 100644 --- a/scripts/api.py +++ b/scripts/api.py @@ -62,7 +62,7 @@ embedder = SentenceTransformer("all-MiniLM-L6-v2") chroma_client = chromadb.PersistentClient(path=DB_PATH) collection = chroma_client.get_or_create_collection( name="aaronai", - metadata={"hnsw:space": "cosine"} + metadata={"hnsw:space": "cosine", "hnsw:allow_replace_deleted": True} ) anthropic_client = anthropic.Anthropic(api_key=os.getenv("ANTHROPIC_API_KEY")) diff --git a/scripts/dream.py b/scripts/dream.py index 5791cb5..6d08d7d 100644 --- a/scripts/dream.py +++ b/scripts/dream.py @@ -122,7 +122,7 @@ def retrieve(mode, task=None, project=None, n_results=8): client = chromadb.PersistentClient(path=DB_PATH) collection = client.get_or_create_collection( name="aaronai", - metadata={"hnsw:space": "cosine"} + metadata={"hnsw:space": "cosine", "hnsw:allow_replace_deleted": True} ) low, high = MODE_RANGES[mode] diff --git a/scripts/ingest.py b/scripts/ingest.py index 8972d55..ac87feb 100644 --- a/scripts/ingest.py +++ b/scripts/ingest.py @@ -18,7 +18,7 @@ db_path = str(Path.home() / "aaronai" / "db") client = chromadb.PersistentClient(path=db_path) collection = client.get_or_create_collection( name="aaronai", - metadata={"hnsw:space": "cosine"} + metadata={"hnsw:space": "cosine", "hnsw:allow_replace_deleted": True} ) def extract_text_from_docx(path): diff --git a/scripts/ingest_chatgpt.py b/scripts/ingest_chatgpt.py index 4ace01b..2bb6406 100644 --- a/scripts/ingest_chatgpt.py +++ b/scripts/ingest_chatgpt.py @@ -14,7 +14,7 @@ embedder = SentenceTransformer("all-MiniLM-L6-v2") client = chromadb.PersistentClient(path=db_path) collection = client.get_or_create_collection( name="aaronai", - metadata={"hnsw:space": "cosine"} + metadata={"hnsw:space": "cosine", "hnsw:allow_replace_deleted": True} ) def extract_messages(convo): diff --git a/scripts/ingest_claude.py b/scripts/ingest_claude.py index f3d08e5..977c93d 100644 --- a/scripts/ingest_claude.py +++ b/scripts/ingest_claude.py @@ -13,7 +13,7 @@ embedder = SentenceTransformer("all-MiniLM-L6-v2") client = chromadb.PersistentClient(path=db_path) collection = client.get_or_create_collection( name="aaronai", - metadata={"hnsw:space": "cosine"} + metadata={"hnsw:space": "cosine", "hnsw:allow_replace_deleted": True} ) def extract_messages(convo): @@ -102,7 +102,7 @@ def ingest_conversations(path): continue embeddings = embedder.encode([n[1] for n in new]).tolist() - collection.add( + collection.upsert( ids=[n[0] for n in new], documents=[n[1] for n in new], metadatas=[n[2] for n in new], @@ -138,7 +138,7 @@ def ingest_memories(path): collection.delete(ids=[chunk_id]) embedding = embedder.encode([chunk_text]).tolist() - collection.add( + collection.upsert( ids=[chunk_id], documents=[chunk_text], metadatas=[{