From 5b4a29941405d1409792adcba4922bec7399be11 Mon Sep 17 00:00:00 2001 From: Aaron Nelson Date: Tue, 5 May 2026 02:52:33 +0000 Subject: [PATCH] encoding.py: write_embeddings_batch accepts commit parameter for transactional composition MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds an optional commit=True parameter to write_embeddings_batch. When True (default, matching prior behavior), the function commits the connection after the per-row UPSERT loop. When False, the caller manages the transaction. This unblocks fix #1 (pgvector-bypass paths) and fix #2 (watcher two-transaction pattern), both of which need to compose embeddings writes with other database writes in the same transaction. Without this lever, either fix would require duplicating the UPSERT logic outside this helper or introducing a second commit boundary inside an otherwise atomic operation. No behavior change for existing callers — they all use the default commit=True and continue working unchanged. --- scripts/encoding.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/scripts/encoding.py b/scripts/encoding.py index 41e06b6..a3db6bb 100644 --- a/scripts/encoding.py +++ b/scripts/encoding.py @@ -202,8 +202,8 @@ def chunk_and_embed(text: str, return rows -def write_embeddings_batch(conn, batch: list[dict]) -> int: - """Single canonical INSERT. Sets created_at = NOW() server-side. Commits. +def write_embeddings_batch(conn, batch: list[dict], commit: bool = True) -> int: + """Single canonical INSERT. Sets created_at = NOW() server-side. Every row dict must supply 'type'. created_at is SQL-supplied (NOW()), so callers do not need to provide it. The application-layer assertion is the @@ -211,6 +211,11 @@ def write_embeddings_batch(conn, batch: list[dict]) -> int: historical NULLs were resolved by the Improvement #2 backfill, and a Python-level raise gives a faster, more debuggable failure than a Postgres constraint error. + + When commit=True (default), this function commits the connection itself. + When commit=False, the caller is responsible for committing. Use + commit=False when composing this write with other writes that must land + atomically in the same transaction. """ if not batch: return 0 @@ -233,5 +238,6 @@ def write_embeddings_batch(conn, batch: list[dict]) -> int: metadata = EXCLUDED.metadata """, (row["id"], row["document"], row["embedding"], row["source"], row["type"], json.dumps(row["metadata"]))) - conn.commit() + if commit: + conn.commit() return len(batch)