diff --git a/scripts/api.py b/scripts/api.py
index 9073679..e1f25cd 100644
--- a/scripts/api.py
+++ b/scripts/api.py
@@ -123,11 +123,26 @@ economical, specific, never performative. When answering questions,
 cite sources and acknowledge uncertainty rather than filling gaps with
 plausible-sounding content.
 
-You have access to his complete document corpus, conversation history,
-and a persistent memory file that carries his current context. Treat
-the memory file as ground truth for his present situation. Use web
-search automatically when current information is needed. Never
-re-brief on context that's already in memory or documents.
+You have a persistent memory file (always present below) that carries
+Aaron's current context — treat it as ground truth for his present
+situation.
+
+For anything beyond what's in memory, you have a retrieve_documents
+tool that searches his full knowledge base: personal documents,
+reading library, conversation transcripts, and journal entries. Call
+it whenever you need concrete information — names, dates, project
+specifics, prior thinking, exhibition records, syllabi, anything you
+don't already know. For compound questions, call it multiple times
+with different concrete queries; one call per distinct information
+need. Prefer specific tokens (named entities, project names, course
+codes) over abstract instructional phrasing — search "FWN3D
+consulting" not "my work." Results are unfiltered and ranked by
+semantic similarity; judge each chunk for relevance and ignore
+irrelevant hits rather than forcing them into the answer.
+
+Use web search automatically when current external information is
+needed. Never re-brief on context that's already in memory or
+retrieved chunks.
 
 When making factual claims about Aaron — his history, credentials, locations, dates, relationships, projects, or any specific event — you must ground the claim in a specific retrieved document or the memory file. Cite the source by name inline. If no source supports the claim, say so explicitly rather than filling the gap with plausible-sounding content. Do not confabulate. If you are inferring rather than citing, mark it as inference."""
 
@@ -251,24 +266,6 @@ FINAL_LIMIT = 8
 
 _TSQUERY_SANITIZE_RE = re.compile(r"[^\w\s\"'-]")
 
-CONVERSATION_TYPES = ["chatgpt_conversation", "claude_conversation", "aaronai_conversation"]
-DOCUMENT_TYPES = ["document"]
-MEMORY_TYPES = ["claude_memory"]
-
-_CONVO_SIGNALS = (
-    "what did i tell", "what did we discuss", "what did we talk",
-    "in our conversation", "you mentioned", "we talked about",
-    "earlier you said", "earlier i said", "did i tell you",
-    "did i say", "what did chatgpt", "what did claude",
-)
-_DOC_SIGNALS = (
-    "write me a bio", "draft a bio", "my bio", "my cv", "my resume",
-    "my professional", "my work history", "my exhibitions",
-    "my publications", "my syllabi", "my courses", "my teaching",
-    "my philosophy", "about my career", "draft a cover letter",
-    "draft my", "write a bio", "professional bio",
-)
-
 
 def _websearch_query(text: str) -> str:
     """Strip characters websearch_to_tsquery doesn't handle cleanly. Quoted
@@ -276,31 +273,6 @@ def _websearch_query(text: str) -> str:
     return _TSQUERY_SANITIZE_RE.sub(" ", text).strip()
 
 
-def classify_retrieval_intent(query: str):
-    """Return (type_filter, folder_exclude_prefixes). Either may be None.
-
-    type_filter restricts the candidate pool by `type`; folder_exclude_prefixes
-    excludes any chunk whose metadata.folder matches a LIKE 'prefix%' pattern.
-
-    Implementation is a low-effort keyword classifier — explicitly tunable and
-    swappable. For nuanced routing, replace with an LLM classifier returning
-    the same shape.
-
-    Precedence: conversation signals win over document signals — "what did I
-    tell you about my CV" is asking about the conversation, not the CV.
-
-    For biographical/document intent, also exclude the reference library
-    (Library/Foundations/* — philosophy and cognition books), which is
-    categorically different from personal artifacts but lives in the same
-    `type='document'` bucket."""
-    q = query.lower()
-    if any(s in q for s in _CONVO_SIGNALS):
-        return (CONVERSATION_TYPES, None)
-    if any(s in q for s in _DOC_SIGNALS):
-        return (DOCUMENT_TYPES, ["Library/"])
-    return (None, None)
-
-
 def _rerank(query: str, candidates: list[tuple]) -> list[tuple]:
     """Cross-encoder rerank. Candidates are (id, document, source, folder, created_at)
     tuples. Returns the same tuples reordered by reranker score with created_at as
@@ -334,71 +306,49 @@ def _dedup_key(doc: str) -> str:
     return hashlib.md5(doc[:300].lower().encode("utf-8", "ignore")).hexdigest()
 
 
-def retrieve_context(query, n_results=FINAL_LIMIT,
-                     type_filter=None, folder_exclude_prefixes=None):
+def retrieve_context(query, n_results=FINAL_LIMIT):
     """Hybrid retrieval (dense + lexical, RRF fused) followed by cross-encoder rerank.
 
     - Dense (pgvector) handles paraphrase / semantic similarity.
     - Lexical (tsvector) catches rare named tokens (FWN3D, Sono-Tek, course codes)
       the embedding model has no signal for.
     - RRF combines the two rankings without calibrating score scales.
-    - Cross-encoder rerank scores each (query, chunk) pair jointly, bridging
-      semantic gaps that bi-encoders can't (e.g., "write me a bio" -> CV chunk).
+    - Cross-encoder rerank scores each (query, chunk) pair jointly.
     - Near-duplicate collapse on output so top-N slots aren't burned by
       multi-folder copies of the same file.
 
-    type_filter: optional list of `type` values to restrict the candidate pool to.
-    folder_exclude_prefixes: optional list of folder LIKE prefixes to exclude.
-    Both default to None (no restriction). Use classify_retrieval_intent() to derive."""
+    No type or folder filtering: imposing a taxonomy at retrieval time is a
+    heuristic we've explicitly rejected. The reranker ranks, the caller (LLM)
+    decides what's relevant to its task."""
     query_embedding = embedder.encode([query]).tolist()[0]
     ts_query = _websearch_query(query)
 
     context_pieces = []
     sources = []
 
-    where_clauses = []
-    extra_params = []
-    if type_filter:
-        where_clauses.append("type = ANY(%s)")
-        extra_params.append(list(type_filter))
-    for prefix in (folder_exclude_prefixes or []):
-        where_clauses.append("(metadata->>'folder' IS NULL OR metadata->>'folder' NOT LIKE %s)")
-        extra_params.append(prefix + "%")
-
-    common_where = ("WHERE " + " AND ".join(where_clauses)) if where_clauses else ""
-
     try:
         pg = get_pg()
         cur = pg.cursor()
 
-        # pgvector 0.6 HNSW doesn't iterate past its initial candidate list when
-        # a restrictive WHERE filter is present — so a filter that excludes the
-        # top-N nearest leaves nothing. Bumping ef_search forces the index to
-        # explore more graph nodes. Cheap when unfiltered; load-bearing when filtered.
-        if where_clauses:
-            cur.execute("SET LOCAL hnsw.ef_search = 500")
-
-        cur.execute(f"""
+        cur.execute("""
             SELECT id, document, source, metadata->>'folder' AS folder, created_at
             FROM embeddings
-            {common_where}
             ORDER BY embedding <=> %s::vector
             LIMIT %s
-        """, (*extra_params, query_embedding, HYBRID_CANDIDATES))
+        """, (query_embedding, HYBRID_CANDIDATES))
         dense_hits = cur.fetchall()
 
         lexical_hits = []
         if ts_query:
-            lex_match = "to_tsvector('english', document) @@ websearch_to_tsquery('english', %s)"
-            lex_where = ("WHERE " + " AND ".join([lex_match] + where_clauses))
-            cur.execute(f"""
+            cur.execute("""
                 SELECT id, document, source, metadata->>'folder' AS folder, created_at
                 FROM embeddings
-                {lex_where}
+                WHERE to_tsvector('english', document)
+                      @@ websearch_to_tsquery('english', %s)
                 ORDER BY ts_rank(to_tsvector('english', document),
                                  websearch_to_tsquery('english', %s)) DESC
                 LIMIT %s
-            """, (ts_query, *extra_params, ts_query, HYBRID_CANDIDATES))
+            """, (ts_query, ts_query, HYBRID_CANDIDATES))
             lexical_hits = cur.fetchall()
 
         pg.close()
@@ -466,13 +416,51 @@ def create_conversation(title="New conversation"):
     conn.close()
     return conv_id
 
+RETRIEVE_DOCUMENTS_TOOL = {
+    "name": "retrieve_documents",
+    "description": (
+        "Search Aaron's knowledge base — personal documents, reading library, "
+        "conversation transcripts, and journal entries — for content relevant "
+        "to a query. Call whenever you need concrete information you don't "
+        "already have from the persistent memory file. For compound questions "
+        "(e.g. 'bio emphasizing consulting work and recent research'), call "
+        "this tool multiple times with different concrete queries; one call "
+        "per distinct information need. Prefer specific named entities, "
+        "project names, course codes, or topic-specific terms over abstract "
+        "instructional phrasing — 'FWN3D consulting' retrieves better than "
+        "'my work'. Results are ranked by semantic + lexical hybrid retrieval "
+        "and a cross-encoder reranker; no taxonomy is applied, so judge each "
+        "returned chunk on its own merits and ignore irrelevant hits."
+    ),
+    "input_schema": {
+        "type": "object",
+        "properties": {
+            "query": {
+                "type": "string",
+                "description": "The search query. Use concrete terms.",
+            },
+        },
+        "required": ["query"],
+    },
+}
+
+
+def _execute_retrieve_documents(tool_input):
+    """Run retrieve_context for a tool call. Returns (tool_result_text, sources)."""
+    query = (tool_input or {}).get("query", "").strip()
+    if not query:
+        return ("No query provided.", [])
+    pieces, sources = retrieve_context(query)
+    if not pieces:
+        return (f"No results for query={query!r}.", [])
+    parts = []
+    for i, (piece, src) in enumerate(zip(pieces, sources), 1):
+        parts.append(f"[{i}] Source: {src}\n{piece}")
+    return ("\n\n---\n\n".join(parts), sources)
+
+
 def chat(user_message, conversation_id, settings, client_time=None):
     memory = load_memory()
-    type_filter, folder_excludes = classify_retrieval_intent(user_message)
-    context_pieces, sources = retrieve_context(
-        user_message, type_filter=type_filter,
-        folder_exclude_prefixes=folder_excludes,
-    )
     history = get_conversation_history(conversation_id)
 
     context_parts = []
@@ -480,40 +468,45 @@ def chat(user_message, conversation_id, settings, client_time=None):
         context_parts.append(f"Current time (user-supplied, not logged): {client_time}")
     if memory:
         context_parts.append(f"Aaron's persistent memory:\n\n{memory}")
-    if context_pieces:
-        context_str = "\n\n---\n\n".join(context_pieces)
-        unique_sources = list(set(sources))
-        context_parts.append(
-            f"Relevant excerpts from Aaron's documents:\n\n{context_str}\n\nSources: {', '.join(unique_sources)}"
-        )
     context_block = "\n\n====\n\n".join(context_parts) + "\n\n---\n\n" if context_parts else ""
     full_message = context_block + user_message
 
     messages = history + [{"role": "user", "content": full_message}]
 
-    tools = [{"type": "web_search_20250305", "name": "web_search"}] if settings.get("web_search", True) else []
+    tools = [RETRIEVE_DOCUMENTS_TOOL]
+    if settings.get("web_search", True):
+        tools.append({"type": "web_search_20250305", "name": "web_search"})
+
+    accumulated_sources = []
 
     while True:
-        kwargs = {
-            "model": "claude-sonnet-4-6",
-            "max_tokens": 2048,
-            "system": SYSTEM_PROMPT,
-            "messages": messages
-        }
-        if tools:
-            kwargs["tools"] = tools
-
-        response = anthropic_client.messages.create(**kwargs)
+        response = anthropic_client.messages.create(
+            model="claude-sonnet-4-6",
+            max_tokens=2048,
+            system=SYSTEM_PROMPT,
+            messages=messages,
+            tools=tools,
+        )
 
         if response.stop_reason == "tool_use":
             messages.append({"role": "assistant", "content": response.content})
             tool_results = []
             for block in response.content:
-                if block.type == "tool_use":
+                if block.type != "tool_use":
+                    continue
+                if block.name == "retrieve_documents":
+                    result_text, result_sources = _execute_retrieve_documents(block.input)
+                    accumulated_sources.extend(result_sources)
                     tool_results.append({
                         "type": "tool_result",
                         "tool_use_id": block.id,
-                        "content": "Search completed"
+                        "content": result_text,
+                    })
+                else:
+                    tool_results.append({
+                        "type": "tool_result",
+                        "tool_use_id": block.id,
+                        "content": "Search completed",
                     })
             messages.append({"role": "user", "content": tool_results})
         else:
@@ -521,7 +514,7 @@ def chat(user_message, conversation_id, settings, client_time=None):
             for block in response.content:
                 if hasattr(block, "text"):
                     assistant_message += block.text
-            return assistant_message, list(set(sources))
+            return assistant_message, list(dict.fromkeys(accumulated_sources))
 
 from contextlib import asynccontextmanager
 
diff --git a/scripts/test_retrieval.py b/scripts/test_retrieval.py
index 88ffd75..339f2f1 100644
--- a/scripts/test_retrieval.py
+++ b/scripts/test_retrieval.py
@@ -14,7 +14,7 @@ load_dotenv(Path.home() / "aaronai" / ".env", override=True)
 sys.path.insert(0, str(Path(__file__).parent))
 
 # Stub anthropic so api.py import doesn't fail without the SDK loaded.
-# We only need retrieve_context + classify_retrieval_intent.
+# We only need retrieve_context.
 import types
 sys.modules.setdefault("anthropic", types.ModuleType("anthropic"))
 sys.modules["anthropic"].Anthropic = lambda **kw: None
@@ -34,27 +34,20 @@ except Exception as e:
     print(f"(continuing despite api.py side-effect error: {e})")
 
 retrieve_context = api.retrieve_context
-classify_retrieval_intent = api.classify_retrieval_intent
 
 QUERIES = [
     "write me a bio",
     "my professional bio",
-    "draft a bio for the Utah application",
     "Aaron Nelson CV consulting and design work",
     "FWN3D consulting",
     "syllabi I have taught",
     "philosophy of teaching",
-    "what did I tell Claude about FWN3D",
-    "what did we discuss about the Utah job",
     "Hudson Valley Additive Manufacturing Center",
+    "Aaron Nelson is an artist and educator working in additive manufacturing",
 ]
 
 for q in QUERIES:
-    type_filter, folder_excludes = classify_retrieval_intent(q)
-    pieces, sources = retrieve_context(
-        q, type_filter=type_filter, folder_exclude_prefixes=folder_excludes,
-    )
+    pieces, sources = retrieve_context(q)
     print(f"\n=== {q!r} ===")
-    print(f"  type_filter: {type_filter}  folder_excludes: {folder_excludes}")
     for i, src in enumerate(sources, 1):
         print(f"  {i}. {src}")