api.py: hybrid retrieval with intent routing and cross-encoder rerank
Replaces pure-dense top-8 retrieval with a three-stage pipeline:
- BM25 (tsvector + websearch_to_tsquery) and dense (pgvector) in parallel,
fused with Reciprocal Rank Fusion
- Optional type filter driven by classify_retrieval_intent() so questions
about prior conversations don't pull documents and vice versa
- Cross-encoder rerank (ms-marco-MiniLM-L-6-v2) over RRF candidates before
taking final top-N
Also adds scripts/reindex_docx_pptx.py — one-off re-ingest used to recover
table/header/text-box content in docx and pptx after the 93c0d89 extractor
upgrade — and scripts/test_retrieval.py to exercise the new pipeline against
representative queries.
Schema: requires GIN index on to_tsvector('english', document) (already
created out-of-band via psql since Apache AGE in shared_preload_libraries
blocks ALTER TABLE on this database).
This commit is contained in:
@@ -0,0 +1,58 @@
|
||||
"""End-to-end test of retrieve_context with intent routing + reranking.
|
||||
|
||||
Avoids loading the full FastAPI app; replicates the chat-handler retrieval
|
||||
call shape and prints classifier output + final ranked sources for each query.
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv(Path.home() / "aaronai" / ".env", override=True)
|
||||
|
||||
sys.path.insert(0, str(Path(__file__).parent))
|
||||
|
||||
# Stub anthropic so api.py import doesn't fail without the SDK loaded.
|
||||
# We only need retrieve_context + classify_retrieval_intent.
|
||||
import types
|
||||
sys.modules.setdefault("anthropic", types.ModuleType("anthropic"))
|
||||
sys.modules["anthropic"].Anthropic = lambda **kw: None
|
||||
|
||||
# Same for whisper if present
|
||||
if "faster_whisper" not in sys.modules:
|
||||
sys.modules["faster_whisper"] = types.ModuleType("faster_whisper")
|
||||
|
||||
import importlib.util
|
||||
spec = importlib.util.spec_from_file_location("api", Path(__file__).parent / "api.py")
|
||||
api = importlib.util.module_from_spec(spec)
|
||||
# Don't execute the whole module (it starts FastAPI). Instead, exec only definitions.
|
||||
# Easier: just import the functions we need by exec'ing the file but catching errors.
|
||||
try:
|
||||
spec.loader.exec_module(api)
|
||||
except Exception as e:
|
||||
print(f"(continuing despite api.py side-effect error: {e})")
|
||||
|
||||
retrieve_context = api.retrieve_context
|
||||
classify_retrieval_intent = api.classify_retrieval_intent
|
||||
|
||||
QUERIES = [
|
||||
"write me a bio",
|
||||
"my professional bio",
|
||||
"draft a bio for the Utah application",
|
||||
"Aaron Nelson CV consulting and design work",
|
||||
"FWN3D consulting",
|
||||
"syllabi I have taught",
|
||||
"philosophy of teaching",
|
||||
"what did I tell Claude about FWN3D",
|
||||
"what did we discuss about the Utah job",
|
||||
"Hudson Valley Additive Manufacturing Center",
|
||||
]
|
||||
|
||||
for q in QUERIES:
|
||||
intent = classify_retrieval_intent(q)
|
||||
pieces, sources = retrieve_context(q, type_filter=intent)
|
||||
print(f"\n=== {q!r} ===")
|
||||
print(f" intent: {intent}")
|
||||
for i, src in enumerate(sources, 1):
|
||||
print(f" {i}. {src}")
|
||||
Reference in New Issue
Block a user