Migrate to pgvector — remove ChromaDB from api.py, ingest scripts, dream.py

This commit is contained in:
2026-04-26 21:16:04 +00:00
parent d2eed98906
commit f78b83042b
6 changed files with 250 additions and 83 deletions
+23 -7
View File
@@ -2,7 +2,9 @@ import json
import sys
from pathlib import Path
from sentence_transformers import SentenceTransformer
import chromadb
import psycopg2
import psycopg2.extras
import json as json_module
# Paths
db_path = str(Path.home() / "aaronai" / "db")
@@ -102,12 +104,26 @@ def ingest_conversations(path):
continue
embeddings = embedder.encode([n[1] for n in new]).tolist()
collection.upsert(
ids=[n[0] for n in new],
documents=[n[1] for n in new],
metadatas=[n[2] for n in new],
embeddings=embeddings,
)
pg = get_pg()
cur = pg.cursor()
for (chunk_id, chunk_text, meta), embedding in zip(new, embeddings):
cur.execute("""
INSERT INTO embeddings (id, document, embedding, source, type, created_at, metadata)
VALUES (%s, %s, %s::vector, %s, %s, %s, %s)
ON CONFLICT (id) DO UPDATE SET
document = EXCLUDED.document,
embedding = EXCLUDED.embedding,
source = EXCLUDED.source,
type = EXCLUDED.type,
created_at = EXCLUDED.created_at,
metadata = EXCLUDED.metadata
""", (
chunk_id, chunk_text, embedding,
meta.get('source'), meta.get('type'), meta.get('created_at'),
json_module.dumps(meta)
))
pg.commit()
pg.close()
total += len(new)
print(f"Conversations: {total} chunks added, {skipped} skipped")