diff --git a/scripts/api.py b/scripts/api.py
index e1f25cd..8ee05c9 100644
--- a/scripts/api.py
+++ b/scripts/api.py
@@ -463,13 +463,28 @@ def chat(user_message, conversation_id, settings, client_time=None):
     memory = load_memory()
     history = get_conversation_history(conversation_id)
 
-    context_parts = []
-    if client_time:
-        context_parts.append(f"Current time (user-supplied, not logged): {client_time}")
+    # System prompt + persistent memory are stable across the tool_use round-trip
+    # and across turns within the 5-minute cache TTL. Putting cache_control on the
+    # last system block creates a cache breakpoint here — the second LLM call in a
+    # tool_use turn reads this prefix from cache (~10% of standard input cost)
+    # instead of re-billing it. Memory lives here (not in the user message) so its
+    # position stays stable for cache hits.
+    system_blocks = [{"type": "text", "text": SYSTEM_PROMPT}]
     if memory:
-        context_parts.append(f"Aaron's persistent memory:\n\n{memory}")
-    context_block = "\n\n====\n\n".join(context_parts) + "\n\n---\n\n" if context_parts else ""
-    full_message = context_block + user_message
+        system_blocks.append({
+            "type": "text",
+            "text": f"Aaron's persistent memory:\n\n{memory}",
+        })
+    system_blocks[-1]["cache_control"] = {"type": "ephemeral"}
+
+    # client_time is per-turn dynamic, so it stays out of the cached prefix.
+    if client_time:
+        full_message = (
+            f"Current time (user-supplied, not logged): {client_time}\n\n"
+            f"---\n\n{user_message}"
+        )
+    else:
+        full_message = user_message
 
     messages = history + [{"role": "user", "content": full_message}]
 
@@ -483,7 +498,7 @@ def chat(user_message, conversation_id, settings, client_time=None):
         response = anthropic_client.messages.create(
             model="claude-sonnet-4-6",
             max_tokens=2048,
-            system=SYSTEM_PROMPT,
+            system=system_blocks,
             messages=messages,
             tools=tools,
         )