chat: cap retrieve_documents per turn, truncate displayed citations, broaden lock-file skip
- MAX_RETRIEVALS_PER_TURN (5): after five retrieve_documents calls in a single
turn, further calls return a budget-exhausted message instead of executing.
Caps cost on runaway multi-query loops without forbidding compound questions.
- MAX_CITED_SOURCES (5): accumulated_sources was growing to 14+ entries across
multiple tool calls and showing chunks Claude never actually used. Cap the
list returned to the UI at 5, preserving insertion order so the
highest-relevance early-call results survive. Proper fix (Claude-driven
inline citations) is bigger work, noted for later.
- ingest.py lock-file skip: changed prefix tuple from ("~$", ".") to ("~", ".")
so it catches Office lock files even when Nextcloud's filesystem encoding has
mangled the "$" into a unicode replacement char. Matches what watcher.py
already does.
This commit is contained in:
+21
-3
@@ -277,6 +277,8 @@ def remove_from_memory(item):
|
|||||||
HYBRID_CANDIDATES = 30
|
HYBRID_CANDIDATES = 30
|
||||||
RRF_K = 60
|
RRF_K = 60
|
||||||
FINAL_LIMIT = 8
|
FINAL_LIMIT = 8
|
||||||
|
MAX_RETRIEVALS_PER_TURN = 5
|
||||||
|
MAX_CITED_SOURCES = 5
|
||||||
|
|
||||||
_TSQUERY_SANITIZE_RE = re.compile(r"[^\w\s\"'-]")
|
_TSQUERY_SANITIZE_RE = re.compile(r"[^\w\s\"'-]")
|
||||||
|
|
||||||
@@ -636,6 +638,7 @@ def chat(user_message, conversation_id, settings, client_time=None):
|
|||||||
tools.append({"type": "web_search_20250305", "name": "web_search"})
|
tools.append({"type": "web_search_20250305", "name": "web_search"})
|
||||||
|
|
||||||
accumulated_sources = []
|
accumulated_sources = []
|
||||||
|
retrieval_count = 0
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
response = anthropic_client.messages.create(
|
response = anthropic_client.messages.create(
|
||||||
@@ -653,8 +656,17 @@ def chat(user_message, conversation_id, settings, client_time=None):
|
|||||||
if block.type != "tool_use":
|
if block.type != "tool_use":
|
||||||
continue
|
continue
|
||||||
if block.name == "retrieve_documents":
|
if block.name == "retrieve_documents":
|
||||||
result_text, result_sources = _execute_retrieve_documents(block.input)
|
if retrieval_count >= MAX_RETRIEVALS_PER_TURN:
|
||||||
accumulated_sources.extend(result_sources)
|
result_text = (
|
||||||
|
f"Retrieval budget exhausted "
|
||||||
|
f"({MAX_RETRIEVALS_PER_TURN} calls used this turn). "
|
||||||
|
"Answer with the information you already have or "
|
||||||
|
"tell Aaron you need a more focused question."
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
result_text, result_sources = _execute_retrieve_documents(block.input)
|
||||||
|
accumulated_sources.extend(result_sources)
|
||||||
|
retrieval_count += 1
|
||||||
tool_results.append({
|
tool_results.append({
|
||||||
"type": "tool_result",
|
"type": "tool_result",
|
||||||
"tool_use_id": block.id,
|
"tool_use_id": block.id,
|
||||||
@@ -679,7 +691,13 @@ def chat(user_message, conversation_id, settings, client_time=None):
|
|||||||
for block in response.content:
|
for block in response.content:
|
||||||
if hasattr(block, "text"):
|
if hasattr(block, "text"):
|
||||||
assistant_message += block.text
|
assistant_message += block.text
|
||||||
return assistant_message, list(dict.fromkeys(accumulated_sources))
|
# Cap citations: accumulated_sources can grow large across multiple
|
||||||
|
# retrieve_documents calls and not every chunk that came back was
|
||||||
|
# actually used in the answer. Insertion order preserves rank
|
||||||
|
# (each call returns chunks reranker-ordered, so the earliest
|
||||||
|
# entries are the highest-relevance from the most direct queries).
|
||||||
|
deduped = list(dict.fromkeys(accumulated_sources))
|
||||||
|
return assistant_message, deduped[:MAX_CITED_SOURCES]
|
||||||
|
|
||||||
from contextlib import asynccontextmanager
|
from contextlib import asynccontextmanager
|
||||||
|
|
||||||
|
|||||||
+3
-1
@@ -82,7 +82,9 @@ IGNORED_TOP_FOLDERS = {"Drafts"}
|
|||||||
|
|
||||||
def _ingest_one(filepath: Path, embedder, root: Path = None) -> int:
|
def _ingest_one(filepath: Path, embedder, root: Path = None) -> int:
|
||||||
"""Ingest a single file. Returns chunk count, 0 on skip/failure."""
|
"""Ingest a single file. Returns chunk count, 0 on skip/failure."""
|
||||||
if filepath.name.startswith(("~$", ".")):
|
# "~" catches Office lock files (~$) including the case where Nextcloud
|
||||||
|
# filesystem encoding has mangled the "$" to a unicode replacement char.
|
||||||
|
if filepath.name.startswith(("~", ".")):
|
||||||
return 0
|
return 0
|
||||||
if filepath.suffix.lower() not in SUPPORTED:
|
if filepath.suffix.lower() not in SUPPORTED:
|
||||||
return 0
|
return 0
|
||||||
|
|||||||
Reference in New Issue
Block a user