api.py: save_document tool — pandoc render to Nextcloud Drafts/ via WebDAV

Claude can now write docx or pdf files to Aaron's Nextcloud Drafts/ when he asks for a document (bio, cover letter, statement, CV section) rather than chat text. Pandoc handles markdown -> docx and markdown -> pdf with the xelatex engine. Upload is a WebDAV PUT against the same Nextcloud instance dream.py already uses; NEXTCLOUD_URL / NEXTCLOUD_USER / NEXTCLOUD_PASSWORD in .env are reused. MKCOL ensures Drafts/ exists; PROPFIND-based collision check appends _2, _3, ... until unique. Filename sanitization strips path components and unsafe characters. System prompt instructs Claude to call save_document when the user wants a file (not chat text) and not to duplicate the file contents in the chat response — just write the file and tell Aaron where it landed. ingest.py and watcher.py now skip files under Drafts/ at ingest time so generated drafts don't pollute future retrieval. Drafts can still be opened, edited, and shipped; they just don't become part of the searchable corpus unless Aaron explicitly moves them out of Drafts/.
2026-05-20 00:41:26 +00:00
parent 84994f9282
commit fda61ad622
3 changed files with 163 additions and 1 deletions
@@ -4,6 +4,7 @@ import json
 import sqlite3
 import subprocess
 import hashlib
 import requests
 from pathlib import Path
 from datetime import datetime, timedelta
 from dotenv import load_dotenv
@@ -140,6 +141,13 @@ consulting" not "my work." Results are unfiltered and ranked by
 semantic similarity; judge each chunk for relevance and ignore
 irrelevant hits rather than forcing them into the answer.
 When Aaron asks for a document file — bio, cover letter, statement,
 CV section, anything he wants to send or edit outside chat — use the
 save_document tool to render the content to his Nextcloud Drafts/
 folder as docx (editable) or pdf (typeset). Don't duplicate the full
 content in the chat reply; just write the file and tell him where it
 landed. He can open it from any of his synced devices.
 Use web search automatically when current external information is
 needed. Never re-brief on context that's already in memory or
 retrieved chunks.
@@ -416,6 +424,134 @@ def create_conversation(title="New conversation"):
    conn.close()
    return conv_id
 NEXTCLOUD_URL = os.getenv("NEXTCLOUD_URL", "https://nextcloud.aaronnelson.studio")
 NEXTCLOUD_USER = os.getenv("NEXTCLOUD_USER", "aaron")
 NEXTCLOUD_PASSWORD = os.getenv("NEXTCLOUD_PASSWORD", "")
 DRAFTS_WEBDAV = f"{NEXTCLOUD_URL}/remote.php/dav/files/{NEXTCLOUD_USER}/Drafts"
 _FILENAME_SAFE_RE = re.compile(r"[^A-Za-z0-9_\-\. ]")
 SAVE_DOCUMENT_TOOL = {
    "name": "save_document",
    "description": (
        "Render markdown content to docx or pdf and save it to Aaron's Nextcloud "
        "Drafts/ folder (syncs to his other devices and web UI). Use this when "
        "Aaron asks for a document file rather than chat text — bios, cover "
        "letters, statements, CV sections, anything he'll edit or send. Returns "
        "the saved filename. Pick a descriptive filename (no extension) like "
        "'Aaron_Nelson_Bio_Utah_2026-05'. Format is 'docx' for editable drafts, "
        "'pdf' for typeset/print-ready output. Content should be well-formed "
        "markdown — # headings, **bold**, *italic*, - bulleted lists. Don't "
        "embed file content in the chat response too; just call this tool and "
        "tell Aaron where it landed."
    ),
    "input_schema": {
        "type": "object",
        "properties": {
            "content": {
                "type": "string",
                "description": "Document content in markdown.",
            },
            "filename": {
                "type": "string",
                "description": "Descriptive filename without extension.",
            },
            "format": {
                "type": "string",
                "enum": ["docx", "pdf"],
                "description": "Output format.",
            },
        },
        "required": ["content", "filename", "format"],
    },
 }
 def _safe_filename(name: str, ext: str) -> str:
    """Strip path components and unsafe chars; force the requested extension."""
    base = Path(name).name
    base = _FILENAME_SAFE_RE.sub("_", base).strip().rstrip(".")
    if not base:
        base = "untitled"
    base = Path(base).stem
    return f"{base}.{ext}"
 def _webdav_unique_url(base_url: str, filename: str, auth) -> tuple[str, str]:
    """Return a WebDAV URL that doesn't collide with an existing file. Appends
    _2, _3, ... until PROPFIND returns 404. Matches the convention dream.py uses."""
    stem = Path(filename).stem
    suffix = Path(filename).suffix
    name = filename
    i = 2
    while True:
        url = f"{base_url}/{name}"
        check = requests.request("PROPFIND", url, auth=auth, timeout=10)
        if check.status_code == 404:
            return url, name
        name = f"{stem}_{i}{suffix}"
        i += 1
        if i > 50:
            raise RuntimeError("could not find a free filename")
 def _execute_save_document(tool_input):
    """Generate a document via pandoc and PUT it to Nextcloud Drafts/.
    Returns a user-facing status string for Claude to relay."""
    if not NEXTCLOUD_PASSWORD:
        return "save_document: NEXTCLOUD_PASSWORD not configured."
    payload = tool_input or {}
    content = payload.get("content", "")
    raw_filename = payload.get("filename", "untitled")
    fmt = payload.get("format", "docx")
    if not content.strip():
        return "save_document: empty content, nothing saved."
    if fmt not in ("docx", "pdf"):
        return f"save_document: unsupported format {fmt!r}; use 'docx' or 'pdf'."
    safe_name = _safe_filename(raw_filename, fmt)
    auth = (NEXTCLOUD_USER, NEXTCLOUD_PASSWORD)
    # Ensure Drafts/ exists. 201 = created, 405 = already there — both fine.
    try:
        requests.request("MKCOL", DRAFTS_WEBDAV, auth=auth, timeout=10)
    except requests.RequestException as e:
        return f"save_document: could not reach Nextcloud ({e})."
    try:
        url, final_name = _webdav_unique_url(DRAFTS_WEBDAV, safe_name, auth)
    except (requests.RequestException, RuntimeError) as e:
        return f"save_document: filename probe failed ({e})."
    cmd = ["pandoc", "-f", "markdown", "-t", fmt, "-o", "-"]
    if fmt == "pdf":
        cmd.insert(-2, "--pdf-engine=xelatex")
    try:
        proc = subprocess.run(
            cmd, input=content.encode("utf-8"),
            capture_output=True, timeout=120,
        )
    except subprocess.TimeoutExpired:
        return "save_document: pandoc timed out (>120s)."
    except FileNotFoundError:
        return "save_document: pandoc not installed."
    if proc.returncode != 0:
        err = proc.stderr.decode("utf-8", errors="replace")[:400]
        return f"save_document: pandoc failed: {err}"
    try:
        put = requests.put(url, data=proc.stdout, auth=auth, timeout=60)
    except requests.RequestException as e:
        return f"save_document: WebDAV upload failed ({e})."
    if put.status_code not in (200, 201, 204):
        return f"save_document: WebDAV upload returned {put.status_code}."
    return f"Saved to Nextcloud: Drafts/{final_name}"
 RETRIEVE_DOCUMENTS_TOOL = {
    "name": "retrieve_documents",
    "description": (
@@ -488,7 +624,7 @@ def chat(user_message, conversation_id, settings, client_time=None):
    messages = history + [{"role": "user", "content": full_message}]
-    tools = [RETRIEVE_DOCUMENTS_TOOL]
+    tools = [RETRIEVE_DOCUMENTS_TOOL, SAVE_DOCUMENT_TOOL]
    if settings.get("web_search", True):
        tools.append({"type": "web_search_20250305", "name": "web_search"})
@@ -517,6 +653,13 @@ def chat(user_message, conversation_id, settings, client_time=None):
                        "tool_use_id": block.id,
                        "content": result_text,
                    })
                elif block.name == "save_document":
                    result_text = _execute_save_document(block.input)
                    tool_results.append({
                        "type": "tool_result",
                        "tool_use_id": block.id,
                        "content": result_text,
                    })
                else:
                    tool_results.append({
                        "type": "tool_result",
@@ -77,12 +77,22 @@ def _resolve_failure(source: str) -> None:
        print(f"  Could not resolve ingest failure record (non-fatal): {e}")
 IGNORED_TOP_FOLDERS = {"Drafts"}
 def _ingest_one(filepath: Path, embedder, root: Path = None) -> int:
    """Ingest a single file. Returns chunk count, 0 on skip/failure."""
    if filepath.name.startswith(("~$", ".")):
        return 0
    if filepath.suffix.lower() not in SUPPORTED:
        return 0
    if root is not None:
        try:
            rel = filepath.parent.relative_to(root)
            if rel.parts and rel.parts[0] in IGNORED_TOP_FOLDERS:
                return 0
        except ValueError:
            pass
    blocks = extract_blocks(filepath)
    if not blocks or not any(
        (b.get("text") or "").strip() or (b.get("heading") or "").strip()
@@ -123,11 +123,20 @@ def resolve_ingest_failure(source: str):
        log.warning(f"Could not resolve ingest failure record (non-fatal): {e}")
 IGNORED_TOP_FOLDERS = {"Drafts"}
 def ingest_file(filepath: Path, embedder) -> int:
    if filepath.name.startswith(("~$", "~", ".")):
        return 0
    if filepath.suffix.lower() not in SUPPORTED:
        return 0
    try:
        rel = filepath.parent.relative_to(NEXTCLOUD_PATH)
        if rel.parts and rel.parts[0] in IGNORED_TOP_FOLDERS:
            return 0
    except ValueError:
        pass
    blocks = extract_blocks(filepath)
    if not blocks or not any(
        (b.get("text") or "").strip() or (b.get("heading") or "").strip()