From fda61ad6225c797f349a32b0724bbf9a74213895 Mon Sep 17 00:00:00 2001
From: Aaron Nelson <aaron@aaronnelson.studio>
Date: Wed, 20 May 2026 00:41:26 +0000
Subject: [PATCH] =?UTF-8?q?api.py:=20save=5Fdocument=20tool=20=E2=80=94=20?=
 =?UTF-8?q?pandoc=20render=20to=20Nextcloud=20Drafts/=20via=20WebDAV?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Claude can now write docx or pdf files to Aaron's Nextcloud Drafts/ when he
asks for a document (bio, cover letter, statement, CV section) rather than
chat text. Pandoc handles markdown -> docx and markdown -> pdf with the
xelatex engine. Upload is a WebDAV PUT against the same Nextcloud instance
dream.py already uses; NEXTCLOUD_URL / NEXTCLOUD_USER / NEXTCLOUD_PASSWORD
in .env are reused. MKCOL ensures Drafts/ exists; PROPFIND-based collision
check appends _2, _3, ... until unique. Filename sanitization strips path
components and unsafe characters.

System prompt instructs Claude to call save_document when the user wants a
file (not chat text) and not to duplicate the file contents in the chat
response — just write the file and tell Aaron where it landed.

ingest.py and watcher.py now skip files under Drafts/ at ingest time so
generated drafts don't pollute future retrieval. Drafts can still be opened,
edited, and shipped; they just don't become part of the searchable corpus
unless Aaron explicitly moves them out of Drafts/.
---
 scripts/api.py     | 145 ++++++++++++++++++++++++++++++++++++++++++++-
 scripts/ingest.py  |  10 ++++
 scripts/watcher.py |   9 +++
 3 files changed, 163 insertions(+), 1 deletion(-)

diff --git a/scripts/api.py b/scripts/api.py
index 8ee05c9..4647471 100644
--- a/scripts/api.py
+++ b/scripts/api.py
@@ -4,6 +4,7 @@ import json
 import sqlite3
 import subprocess
 import hashlib
+import requests
 from pathlib import Path
 from datetime import datetime, timedelta
 from dotenv import load_dotenv
@@ -140,6 +141,13 @@ consulting" not "my work." Results are unfiltered and ranked by
 semantic similarity; judge each chunk for relevance and ignore
 irrelevant hits rather than forcing them into the answer.
 
+When Aaron asks for a document file — bio, cover letter, statement,
+CV section, anything he wants to send or edit outside chat — use the
+save_document tool to render the content to his Nextcloud Drafts/
+folder as docx (editable) or pdf (typeset). Don't duplicate the full
+content in the chat reply; just write the file and tell him where it
+landed. He can open it from any of his synced devices.
+
 Use web search automatically when current external information is
 needed. Never re-brief on context that's already in memory or
 retrieved chunks.
@@ -416,6 +424,134 @@ def create_conversation(title="New conversation"):
     conn.close()
     return conv_id
 
+NEXTCLOUD_URL = os.getenv("NEXTCLOUD_URL", "https://nextcloud.aaronnelson.studio")
+NEXTCLOUD_USER = os.getenv("NEXTCLOUD_USER", "aaron")
+NEXTCLOUD_PASSWORD = os.getenv("NEXTCLOUD_PASSWORD", "")
+DRAFTS_WEBDAV = f"{NEXTCLOUD_URL}/remote.php/dav/files/{NEXTCLOUD_USER}/Drafts"
+
+_FILENAME_SAFE_RE = re.compile(r"[^A-Za-z0-9_\-\. ]")
+
+
+SAVE_DOCUMENT_TOOL = {
+    "name": "save_document",
+    "description": (
+        "Render markdown content to docx or pdf and save it to Aaron's Nextcloud "
+        "Drafts/ folder (syncs to his other devices and web UI). Use this when "
+        "Aaron asks for a document file rather than chat text — bios, cover "
+        "letters, statements, CV sections, anything he'll edit or send. Returns "
+        "the saved filename. Pick a descriptive filename (no extension) like "
+        "'Aaron_Nelson_Bio_Utah_2026-05'. Format is 'docx' for editable drafts, "
+        "'pdf' for typeset/print-ready output. Content should be well-formed "
+        "markdown — # headings, **bold**, *italic*, - bulleted lists. Don't "
+        "embed file content in the chat response too; just call this tool and "
+        "tell Aaron where it landed."
+    ),
+    "input_schema": {
+        "type": "object",
+        "properties": {
+            "content": {
+                "type": "string",
+                "description": "Document content in markdown.",
+            },
+            "filename": {
+                "type": "string",
+                "description": "Descriptive filename without extension.",
+            },
+            "format": {
+                "type": "string",
+                "enum": ["docx", "pdf"],
+                "description": "Output format.",
+            },
+        },
+        "required": ["content", "filename", "format"],
+    },
+}
+
+
+def _safe_filename(name: str, ext: str) -> str:
+    """Strip path components and unsafe chars; force the requested extension."""
+    base = Path(name).name
+    base = _FILENAME_SAFE_RE.sub("_", base).strip().rstrip(".")
+    if not base:
+        base = "untitled"
+    base = Path(base).stem
+    return f"{base}.{ext}"
+
+
+def _webdav_unique_url(base_url: str, filename: str, auth) -> tuple[str, str]:
+    """Return a WebDAV URL that doesn't collide with an existing file. Appends
+    _2, _3, ... until PROPFIND returns 404. Matches the convention dream.py uses."""
+    stem = Path(filename).stem
+    suffix = Path(filename).suffix
+    name = filename
+    i = 2
+    while True:
+        url = f"{base_url}/{name}"
+        check = requests.request("PROPFIND", url, auth=auth, timeout=10)
+        if check.status_code == 404:
+            return url, name
+        name = f"{stem}_{i}{suffix}"
+        i += 1
+        if i > 50:
+            raise RuntimeError("could not find a free filename")
+
+
+def _execute_save_document(tool_input):
+    """Generate a document via pandoc and PUT it to Nextcloud Drafts/.
+    Returns a user-facing status string for Claude to relay."""
+    if not NEXTCLOUD_PASSWORD:
+        return "save_document: NEXTCLOUD_PASSWORD not configured."
+
+    payload = tool_input or {}
+    content = payload.get("content", "")
+    raw_filename = payload.get("filename", "untitled")
+    fmt = payload.get("format", "docx")
+
+    if not content.strip():
+        return "save_document: empty content, nothing saved."
+    if fmt not in ("docx", "pdf"):
+        return f"save_document: unsupported format {fmt!r}; use 'docx' or 'pdf'."
+
+    safe_name = _safe_filename(raw_filename, fmt)
+    auth = (NEXTCLOUD_USER, NEXTCLOUD_PASSWORD)
+
+    # Ensure Drafts/ exists. 201 = created, 405 = already there — both fine.
+    try:
+        requests.request("MKCOL", DRAFTS_WEBDAV, auth=auth, timeout=10)
+    except requests.RequestException as e:
+        return f"save_document: could not reach Nextcloud ({e})."
+
+    try:
+        url, final_name = _webdav_unique_url(DRAFTS_WEBDAV, safe_name, auth)
+    except (requests.RequestException, RuntimeError) as e:
+        return f"save_document: filename probe failed ({e})."
+
+    cmd = ["pandoc", "-f", "markdown", "-t", fmt, "-o", "-"]
+    if fmt == "pdf":
+        cmd.insert(-2, "--pdf-engine=xelatex")
+    try:
+        proc = subprocess.run(
+            cmd, input=content.encode("utf-8"),
+            capture_output=True, timeout=120,
+        )
+    except subprocess.TimeoutExpired:
+        return "save_document: pandoc timed out (>120s)."
+    except FileNotFoundError:
+        return "save_document: pandoc not installed."
+    if proc.returncode != 0:
+        err = proc.stderr.decode("utf-8", errors="replace")[:400]
+        return f"save_document: pandoc failed: {err}"
+
+    try:
+        put = requests.put(url, data=proc.stdout, auth=auth, timeout=60)
+    except requests.RequestException as e:
+        return f"save_document: WebDAV upload failed ({e})."
+    if put.status_code not in (200, 201, 204):
+        return f"save_document: WebDAV upload returned {put.status_code}."
+
+    return f"Saved to Nextcloud: Drafts/{final_name}"
+
+
 RETRIEVE_DOCUMENTS_TOOL = {
     "name": "retrieve_documents",
     "description": (
@@ -488,7 +624,7 @@ def chat(user_message, conversation_id, settings, client_time=None):
 
     messages = history + [{"role": "user", "content": full_message}]
 
-    tools = [RETRIEVE_DOCUMENTS_TOOL]
+    tools = [RETRIEVE_DOCUMENTS_TOOL, SAVE_DOCUMENT_TOOL]
     if settings.get("web_search", True):
         tools.append({"type": "web_search_20250305", "name": "web_search"})
 
@@ -517,6 +653,13 @@ def chat(user_message, conversation_id, settings, client_time=None):
                         "tool_use_id": block.id,
                         "content": result_text,
                     })
+                elif block.name == "save_document":
+                    result_text = _execute_save_document(block.input)
+                    tool_results.append({
+                        "type": "tool_result",
+                        "tool_use_id": block.id,
+                        "content": result_text,
+                    })
                 else:
                     tool_results.append({
                         "type": "tool_result",
diff --git a/scripts/ingest.py b/scripts/ingest.py
index 76bc140..8b37f8d 100644
--- a/scripts/ingest.py
+++ b/scripts/ingest.py
@@ -77,12 +77,22 @@ def _resolve_failure(source: str) -> None:
         print(f"  Could not resolve ingest failure record (non-fatal): {e}")
 
 
+IGNORED_TOP_FOLDERS = {"Drafts"}
+
+
 def _ingest_one(filepath: Path, embedder, root: Path = None) -> int:
     """Ingest a single file. Returns chunk count, 0 on skip/failure."""
     if filepath.name.startswith(("~$", ".")):
         return 0
     if filepath.suffix.lower() not in SUPPORTED:
         return 0
+    if root is not None:
+        try:
+            rel = filepath.parent.relative_to(root)
+            if rel.parts and rel.parts[0] in IGNORED_TOP_FOLDERS:
+                return 0
+        except ValueError:
+            pass
     blocks = extract_blocks(filepath)
     if not blocks or not any(
         (b.get("text") or "").strip() or (b.get("heading") or "").strip()
diff --git a/scripts/watcher.py b/scripts/watcher.py
index a938591..2148949 100644
--- a/scripts/watcher.py
+++ b/scripts/watcher.py
@@ -123,11 +123,20 @@ def resolve_ingest_failure(source: str):
         log.warning(f"Could not resolve ingest failure record (non-fatal): {e}")
 
 
+IGNORED_TOP_FOLDERS = {"Drafts"}
+
+
 def ingest_file(filepath: Path, embedder) -> int:
     if filepath.name.startswith(("~$", "~", ".")):
         return 0
     if filepath.suffix.lower() not in SUPPORTED:
         return 0
+    try:
+        rel = filepath.parent.relative_to(NEXTCLOUD_PATH)
+        if rel.parts and rel.parts[0] in IGNORED_TOP_FOLDERS:
+            return 0
+    except ValueError:
+        pass
     blocks = extract_blocks(filepath)
     if not blocks or not any(
         (b.get("text") or "").strip() or (b.get("heading") or "").strip()