diff --git a/scripts/api.py b/scripts/api.py
index 7c0c5cc..b882503 100644
--- a/scripts/api.py
+++ b/scripts/api.py
@@ -277,6 +277,8 @@ def remove_from_memory(item):
 HYBRID_CANDIDATES = 30
 RRF_K = 60
 FINAL_LIMIT = 8
+MAX_RETRIEVALS_PER_TURN = 5
+MAX_CITED_SOURCES = 5
 
 _TSQUERY_SANITIZE_RE = re.compile(r"[^\w\s\"'-]")
 
@@ -636,6 +638,7 @@ def chat(user_message, conversation_id, settings, client_time=None):
         tools.append({"type": "web_search_20250305", "name": "web_search"})
 
     accumulated_sources = []
+    retrieval_count = 0
 
     while True:
         response = anthropic_client.messages.create(
@@ -653,8 +656,17 @@ def chat(user_message, conversation_id, settings, client_time=None):
                 if block.type != "tool_use":
                     continue
                 if block.name == "retrieve_documents":
-                    result_text, result_sources = _execute_retrieve_documents(block.input)
-                    accumulated_sources.extend(result_sources)
+                    if retrieval_count >= MAX_RETRIEVALS_PER_TURN:
+                        result_text = (
+                            f"Retrieval budget exhausted "
+                            f"({MAX_RETRIEVALS_PER_TURN} calls used this turn). "
+                            "Answer with the information you already have or "
+                            "tell Aaron you need a more focused question."
+                        )
+                    else:
+                        result_text, result_sources = _execute_retrieve_documents(block.input)
+                        accumulated_sources.extend(result_sources)
+                        retrieval_count += 1
                     tool_results.append({
                         "type": "tool_result",
                         "tool_use_id": block.id,
@@ -679,7 +691,13 @@ def chat(user_message, conversation_id, settings, client_time=None):
             for block in response.content:
                 if hasattr(block, "text"):
                     assistant_message += block.text
-            return assistant_message, list(dict.fromkeys(accumulated_sources))
+            # Cap citations: accumulated_sources can grow large across multiple
+            # retrieve_documents calls and not every chunk that came back was
+            # actually used in the answer. Insertion order preserves rank
+            # (each call returns chunks reranker-ordered, so the earliest
+            # entries are the highest-relevance from the most direct queries).
+            deduped = list(dict.fromkeys(accumulated_sources))
+            return assistant_message, deduped[:MAX_CITED_SOURCES]
 
 from contextlib import asynccontextmanager
 
diff --git a/scripts/ingest.py b/scripts/ingest.py
index 8b37f8d..7d144bb 100644
--- a/scripts/ingest.py
+++ b/scripts/ingest.py
@@ -82,7 +82,9 @@ IGNORED_TOP_FOLDERS = {"Drafts"}
 
 def _ingest_one(filepath: Path, embedder, root: Path = None) -> int:
     """Ingest a single file. Returns chunk count, 0 on skip/failure."""
-    if filepath.name.startswith(("~$", ".")):
+    # "~" catches Office lock files (~$) including the case where Nextcloud
+    # filesystem encoding has mangled the "$" to a unicode replacement char.
+    if filepath.name.startswith(("~", ".")):
         return 0
     if filepath.suffix.lower() not in SUPPORTED:
         return 0