api.py: save_document tool — pandoc render to Nextcloud Drafts/ via WebDAV

Claude can now write docx or pdf files to Aaron's Nextcloud Drafts/ when he
asks for a document (bio, cover letter, statement, CV section) rather than
chat text. Pandoc handles markdown -> docx and markdown -> pdf with the
xelatex engine. Upload is a WebDAV PUT against the same Nextcloud instance
dream.py already uses; NEXTCLOUD_URL / NEXTCLOUD_USER / NEXTCLOUD_PASSWORD
in .env are reused. MKCOL ensures Drafts/ exists; PROPFIND-based collision
check appends _2, _3, ... until unique. Filename sanitization strips path
components and unsafe characters.

System prompt instructs Claude to call save_document when the user wants a
file (not chat text) and not to duplicate the file contents in the chat
response — just write the file and tell Aaron where it landed.

ingest.py and watcher.py now skip files under Drafts/ at ingest time so
generated drafts don't pollute future retrieval. Drafts can still be opened,
edited, and shipped; they just don't become part of the searchable corpus
unless Aaron explicitly moves them out of Drafts/.
This commit is contained in:
2026-05-20 00:41:26 +00:00
parent 84994f9282
commit fda61ad622
3 changed files with 163 additions and 1 deletions
+144 -1
View File
@@ -4,6 +4,7 @@ import json
import sqlite3
import subprocess
import hashlib
import requests
from pathlib import Path
from datetime import datetime, timedelta
from dotenv import load_dotenv
@@ -140,6 +141,13 @@ consulting" not "my work." Results are unfiltered and ranked by
semantic similarity; judge each chunk for relevance and ignore
irrelevant hits rather than forcing them into the answer.
When Aaron asks for a document file — bio, cover letter, statement,
CV section, anything he wants to send or edit outside chat — use the
save_document tool to render the content to his Nextcloud Drafts/
folder as docx (editable) or pdf (typeset). Don't duplicate the full
content in the chat reply; just write the file and tell him where it
landed. He can open it from any of his synced devices.
Use web search automatically when current external information is
needed. Never re-brief on context that's already in memory or
retrieved chunks.
@@ -416,6 +424,134 @@ def create_conversation(title="New conversation"):
conn.close()
return conv_id
NEXTCLOUD_URL = os.getenv("NEXTCLOUD_URL", "https://nextcloud.aaronnelson.studio")
NEXTCLOUD_USER = os.getenv("NEXTCLOUD_USER", "aaron")
NEXTCLOUD_PASSWORD = os.getenv("NEXTCLOUD_PASSWORD", "")
DRAFTS_WEBDAV = f"{NEXTCLOUD_URL}/remote.php/dav/files/{NEXTCLOUD_USER}/Drafts"
_FILENAME_SAFE_RE = re.compile(r"[^A-Za-z0-9_\-\. ]")
SAVE_DOCUMENT_TOOL = {
"name": "save_document",
"description": (
"Render markdown content to docx or pdf and save it to Aaron's Nextcloud "
"Drafts/ folder (syncs to his other devices and web UI). Use this when "
"Aaron asks for a document file rather than chat text — bios, cover "
"letters, statements, CV sections, anything he'll edit or send. Returns "
"the saved filename. Pick a descriptive filename (no extension) like "
"'Aaron_Nelson_Bio_Utah_2026-05'. Format is 'docx' for editable drafts, "
"'pdf' for typeset/print-ready output. Content should be well-formed "
"markdown — # headings, **bold**, *italic*, - bulleted lists. Don't "
"embed file content in the chat response too; just call this tool and "
"tell Aaron where it landed."
),
"input_schema": {
"type": "object",
"properties": {
"content": {
"type": "string",
"description": "Document content in markdown.",
},
"filename": {
"type": "string",
"description": "Descriptive filename without extension.",
},
"format": {
"type": "string",
"enum": ["docx", "pdf"],
"description": "Output format.",
},
},
"required": ["content", "filename", "format"],
},
}
def _safe_filename(name: str, ext: str) -> str:
"""Strip path components and unsafe chars; force the requested extension."""
base = Path(name).name
base = _FILENAME_SAFE_RE.sub("_", base).strip().rstrip(".")
if not base:
base = "untitled"
base = Path(base).stem
return f"{base}.{ext}"
def _webdav_unique_url(base_url: str, filename: str, auth) -> tuple[str, str]:
"""Return a WebDAV URL that doesn't collide with an existing file. Appends
_2, _3, ... until PROPFIND returns 404. Matches the convention dream.py uses."""
stem = Path(filename).stem
suffix = Path(filename).suffix
name = filename
i = 2
while True:
url = f"{base_url}/{name}"
check = requests.request("PROPFIND", url, auth=auth, timeout=10)
if check.status_code == 404:
return url, name
name = f"{stem}_{i}{suffix}"
i += 1
if i > 50:
raise RuntimeError("could not find a free filename")
def _execute_save_document(tool_input):
"""Generate a document via pandoc and PUT it to Nextcloud Drafts/.
Returns a user-facing status string for Claude to relay."""
if not NEXTCLOUD_PASSWORD:
return "save_document: NEXTCLOUD_PASSWORD not configured."
payload = tool_input or {}
content = payload.get("content", "")
raw_filename = payload.get("filename", "untitled")
fmt = payload.get("format", "docx")
if not content.strip():
return "save_document: empty content, nothing saved."
if fmt not in ("docx", "pdf"):
return f"save_document: unsupported format {fmt!r}; use 'docx' or 'pdf'."
safe_name = _safe_filename(raw_filename, fmt)
auth = (NEXTCLOUD_USER, NEXTCLOUD_PASSWORD)
# Ensure Drafts/ exists. 201 = created, 405 = already there — both fine.
try:
requests.request("MKCOL", DRAFTS_WEBDAV, auth=auth, timeout=10)
except requests.RequestException as e:
return f"save_document: could not reach Nextcloud ({e})."
try:
url, final_name = _webdav_unique_url(DRAFTS_WEBDAV, safe_name, auth)
except (requests.RequestException, RuntimeError) as e:
return f"save_document: filename probe failed ({e})."
cmd = ["pandoc", "-f", "markdown", "-t", fmt, "-o", "-"]
if fmt == "pdf":
cmd.insert(-2, "--pdf-engine=xelatex")
try:
proc = subprocess.run(
cmd, input=content.encode("utf-8"),
capture_output=True, timeout=120,
)
except subprocess.TimeoutExpired:
return "save_document: pandoc timed out (>120s)."
except FileNotFoundError:
return "save_document: pandoc not installed."
if proc.returncode != 0:
err = proc.stderr.decode("utf-8", errors="replace")[:400]
return f"save_document: pandoc failed: {err}"
try:
put = requests.put(url, data=proc.stdout, auth=auth, timeout=60)
except requests.RequestException as e:
return f"save_document: WebDAV upload failed ({e})."
if put.status_code not in (200, 201, 204):
return f"save_document: WebDAV upload returned {put.status_code}."
return f"Saved to Nextcloud: Drafts/{final_name}"
RETRIEVE_DOCUMENTS_TOOL = {
"name": "retrieve_documents",
"description": (
@@ -488,7 +624,7 @@ def chat(user_message, conversation_id, settings, client_time=None):
messages = history + [{"role": "user", "content": full_message}]
tools = [RETRIEVE_DOCUMENTS_TOOL]
tools = [RETRIEVE_DOCUMENTS_TOOL, SAVE_DOCUMENT_TOOL]
if settings.get("web_search", True):
tools.append({"type": "web_search_20250305", "name": "web_search"})
@@ -517,6 +653,13 @@ def chat(user_message, conversation_id, settings, client_time=None):
"tool_use_id": block.id,
"content": result_text,
})
elif block.name == "save_document":
result_text = _execute_save_document(block.input)
tool_results.append({
"type": "tool_result",
"tool_use_id": block.id,
"content": result_text,
})
else:
tool_results.append({
"type": "tool_result",