chat: cap retrieve_documents per turn, truncate displayed citations, broaden lock-file skip

- MAX_RETRIEVALS_PER_TURN (5): after five retrieve_documents calls in a single
  turn, further calls return a budget-exhausted message instead of executing.
  Caps cost on runaway multi-query loops without forbidding compound questions.

- MAX_CITED_SOURCES (5): accumulated_sources was growing to 14+ entries across
  multiple tool calls and showing chunks Claude never actually used. Cap the
  list returned to the UI at 5, preserving insertion order so the
  highest-relevance early-call results survive. Proper fix (Claude-driven
  inline citations) is bigger work, noted for later.

- ingest.py lock-file skip: changed prefix tuple from ("~$", ".") to ("~", ".")
  so it catches Office lock files even when Nextcloud's filesystem encoding has
  mangled the "$" into a unicode replacement char. Matches what watcher.py
  already does.
This commit is contained in:
2026-05-20 02:22:54 +00:00
parent 430ea239dd
commit 9bb083f065
2 changed files with 24 additions and 4 deletions
+19 -1
View File
@@ -277,6 +277,8 @@ def remove_from_memory(item):
HYBRID_CANDIDATES = 30 HYBRID_CANDIDATES = 30
RRF_K = 60 RRF_K = 60
FINAL_LIMIT = 8 FINAL_LIMIT = 8
MAX_RETRIEVALS_PER_TURN = 5
MAX_CITED_SOURCES = 5
_TSQUERY_SANITIZE_RE = re.compile(r"[^\w\s\"'-]") _TSQUERY_SANITIZE_RE = re.compile(r"[^\w\s\"'-]")
@@ -636,6 +638,7 @@ def chat(user_message, conversation_id, settings, client_time=None):
tools.append({"type": "web_search_20250305", "name": "web_search"}) tools.append({"type": "web_search_20250305", "name": "web_search"})
accumulated_sources = [] accumulated_sources = []
retrieval_count = 0
while True: while True:
response = anthropic_client.messages.create( response = anthropic_client.messages.create(
@@ -653,8 +656,17 @@ def chat(user_message, conversation_id, settings, client_time=None):
if block.type != "tool_use": if block.type != "tool_use":
continue continue
if block.name == "retrieve_documents": if block.name == "retrieve_documents":
if retrieval_count >= MAX_RETRIEVALS_PER_TURN:
result_text = (
f"Retrieval budget exhausted "
f"({MAX_RETRIEVALS_PER_TURN} calls used this turn). "
"Answer with the information you already have or "
"tell Aaron you need a more focused question."
)
else:
result_text, result_sources = _execute_retrieve_documents(block.input) result_text, result_sources = _execute_retrieve_documents(block.input)
accumulated_sources.extend(result_sources) accumulated_sources.extend(result_sources)
retrieval_count += 1
tool_results.append({ tool_results.append({
"type": "tool_result", "type": "tool_result",
"tool_use_id": block.id, "tool_use_id": block.id,
@@ -679,7 +691,13 @@ def chat(user_message, conversation_id, settings, client_time=None):
for block in response.content: for block in response.content:
if hasattr(block, "text"): if hasattr(block, "text"):
assistant_message += block.text assistant_message += block.text
return assistant_message, list(dict.fromkeys(accumulated_sources)) # Cap citations: accumulated_sources can grow large across multiple
# retrieve_documents calls and not every chunk that came back was
# actually used in the answer. Insertion order preserves rank
# (each call returns chunks reranker-ordered, so the earliest
# entries are the highest-relevance from the most direct queries).
deduped = list(dict.fromkeys(accumulated_sources))
return assistant_message, deduped[:MAX_CITED_SOURCES]
from contextlib import asynccontextmanager from contextlib import asynccontextmanager
+3 -1
View File
@@ -82,7 +82,9 @@ IGNORED_TOP_FOLDERS = {"Drafts"}
def _ingest_one(filepath: Path, embedder, root: Path = None) -> int: def _ingest_one(filepath: Path, embedder, root: Path = None) -> int:
"""Ingest a single file. Returns chunk count, 0 on skip/failure.""" """Ingest a single file. Returns chunk count, 0 on skip/failure."""
if filepath.name.startswith(("~$", ".")): # "~" catches Office lock files (~$) including the case where Nextcloud
# filesystem encoding has mangled the "$" to a unicode replacement char.
if filepath.name.startswith(("~", ".")):
return 0 return 0
if filepath.suffix.lower() not in SUPPORTED: if filepath.suffix.lower() not in SUPPORTED:
return 0 return 0