Migrate to pgvector — remove ChromaDB from api.py, ingest scripts, dream.py
This commit is contained in:
@@ -2,7 +2,9 @@ import json
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from sentence_transformers import SentenceTransformer
|
||||
import chromadb
|
||||
import psycopg2
|
||||
import psycopg2.extras
|
||||
import json as json_module
|
||||
|
||||
# Paths
|
||||
db_path = str(Path.home() / "aaronai" / "db")
|
||||
@@ -102,12 +104,26 @@ def ingest_conversations(path):
|
||||
continue
|
||||
|
||||
embeddings = embedder.encode([n[1] for n in new]).tolist()
|
||||
collection.upsert(
|
||||
ids=[n[0] for n in new],
|
||||
documents=[n[1] for n in new],
|
||||
metadatas=[n[2] for n in new],
|
||||
embeddings=embeddings,
|
||||
)
|
||||
pg = get_pg()
|
||||
cur = pg.cursor()
|
||||
for (chunk_id, chunk_text, meta), embedding in zip(new, embeddings):
|
||||
cur.execute("""
|
||||
INSERT INTO embeddings (id, document, embedding, source, type, created_at, metadata)
|
||||
VALUES (%s, %s, %s::vector, %s, %s, %s, %s)
|
||||
ON CONFLICT (id) DO UPDATE SET
|
||||
document = EXCLUDED.document,
|
||||
embedding = EXCLUDED.embedding,
|
||||
source = EXCLUDED.source,
|
||||
type = EXCLUDED.type,
|
||||
created_at = EXCLUDED.created_at,
|
||||
metadata = EXCLUDED.metadata
|
||||
""", (
|
||||
chunk_id, chunk_text, embedding,
|
||||
meta.get('source'), meta.get('type'), meta.get('created_at'),
|
||||
json_module.dumps(meta)
|
||||
))
|
||||
pg.commit()
|
||||
pg.close()
|
||||
total += len(new)
|
||||
|
||||
print(f"Conversations: {total} chunks added, {skipped} skipped")
|
||||
|
||||
Reference in New Issue
Block a user