From fda61ad6225c797f349a32b0724bbf9a74213895 Mon Sep 17 00:00:00 2001 From: Aaron Nelson Date: Wed, 20 May 2026 00:41:26 +0000 Subject: [PATCH] =?UTF-8?q?api.py:=20save=5Fdocument=20tool=20=E2=80=94=20?= =?UTF-8?q?pandoc=20render=20to=20Nextcloud=20Drafts/=20via=20WebDAV?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Claude can now write docx or pdf files to Aaron's Nextcloud Drafts/ when he asks for a document (bio, cover letter, statement, CV section) rather than chat text. Pandoc handles markdown -> docx and markdown -> pdf with the xelatex engine. Upload is a WebDAV PUT against the same Nextcloud instance dream.py already uses; NEXTCLOUD_URL / NEXTCLOUD_USER / NEXTCLOUD_PASSWORD in .env are reused. MKCOL ensures Drafts/ exists; PROPFIND-based collision check appends _2, _3, ... until unique. Filename sanitization strips path components and unsafe characters. System prompt instructs Claude to call save_document when the user wants a file (not chat text) and not to duplicate the file contents in the chat response — just write the file and tell Aaron where it landed. ingest.py and watcher.py now skip files under Drafts/ at ingest time so generated drafts don't pollute future retrieval. Drafts can still be opened, edited, and shipped; they just don't become part of the searchable corpus unless Aaron explicitly moves them out of Drafts/. --- scripts/api.py | 145 ++++++++++++++++++++++++++++++++++++++++++++- scripts/ingest.py | 10 ++++ scripts/watcher.py | 9 +++ 3 files changed, 163 insertions(+), 1 deletion(-) diff --git a/scripts/api.py b/scripts/api.py index 8ee05c9..4647471 100644 --- a/scripts/api.py +++ b/scripts/api.py @@ -4,6 +4,7 @@ import json import sqlite3 import subprocess import hashlib +import requests from pathlib import Path from datetime import datetime, timedelta from dotenv import load_dotenv @@ -140,6 +141,13 @@ consulting" not "my work." Results are unfiltered and ranked by semantic similarity; judge each chunk for relevance and ignore irrelevant hits rather than forcing them into the answer. +When Aaron asks for a document file — bio, cover letter, statement, +CV section, anything he wants to send or edit outside chat — use the +save_document tool to render the content to his Nextcloud Drafts/ +folder as docx (editable) or pdf (typeset). Don't duplicate the full +content in the chat reply; just write the file and tell him where it +landed. He can open it from any of his synced devices. + Use web search automatically when current external information is needed. Never re-brief on context that's already in memory or retrieved chunks. @@ -416,6 +424,134 @@ def create_conversation(title="New conversation"): conn.close() return conv_id +NEXTCLOUD_URL = os.getenv("NEXTCLOUD_URL", "https://nextcloud.aaronnelson.studio") +NEXTCLOUD_USER = os.getenv("NEXTCLOUD_USER", "aaron") +NEXTCLOUD_PASSWORD = os.getenv("NEXTCLOUD_PASSWORD", "") +DRAFTS_WEBDAV = f"{NEXTCLOUD_URL}/remote.php/dav/files/{NEXTCLOUD_USER}/Drafts" + +_FILENAME_SAFE_RE = re.compile(r"[^A-Za-z0-9_\-\. ]") + + +SAVE_DOCUMENT_TOOL = { + "name": "save_document", + "description": ( + "Render markdown content to docx or pdf and save it to Aaron's Nextcloud " + "Drafts/ folder (syncs to his other devices and web UI). Use this when " + "Aaron asks for a document file rather than chat text — bios, cover " + "letters, statements, CV sections, anything he'll edit or send. Returns " + "the saved filename. Pick a descriptive filename (no extension) like " + "'Aaron_Nelson_Bio_Utah_2026-05'. Format is 'docx' for editable drafts, " + "'pdf' for typeset/print-ready output. Content should be well-formed " + "markdown — # headings, **bold**, *italic*, - bulleted lists. Don't " + "embed file content in the chat response too; just call this tool and " + "tell Aaron where it landed." + ), + "input_schema": { + "type": "object", + "properties": { + "content": { + "type": "string", + "description": "Document content in markdown.", + }, + "filename": { + "type": "string", + "description": "Descriptive filename without extension.", + }, + "format": { + "type": "string", + "enum": ["docx", "pdf"], + "description": "Output format.", + }, + }, + "required": ["content", "filename", "format"], + }, +} + + +def _safe_filename(name: str, ext: str) -> str: + """Strip path components and unsafe chars; force the requested extension.""" + base = Path(name).name + base = _FILENAME_SAFE_RE.sub("_", base).strip().rstrip(".") + if not base: + base = "untitled" + base = Path(base).stem + return f"{base}.{ext}" + + +def _webdav_unique_url(base_url: str, filename: str, auth) -> tuple[str, str]: + """Return a WebDAV URL that doesn't collide with an existing file. Appends + _2, _3, ... until PROPFIND returns 404. Matches the convention dream.py uses.""" + stem = Path(filename).stem + suffix = Path(filename).suffix + name = filename + i = 2 + while True: + url = f"{base_url}/{name}" + check = requests.request("PROPFIND", url, auth=auth, timeout=10) + if check.status_code == 404: + return url, name + name = f"{stem}_{i}{suffix}" + i += 1 + if i > 50: + raise RuntimeError("could not find a free filename") + + +def _execute_save_document(tool_input): + """Generate a document via pandoc and PUT it to Nextcloud Drafts/. + Returns a user-facing status string for Claude to relay.""" + if not NEXTCLOUD_PASSWORD: + return "save_document: NEXTCLOUD_PASSWORD not configured." + + payload = tool_input or {} + content = payload.get("content", "") + raw_filename = payload.get("filename", "untitled") + fmt = payload.get("format", "docx") + + if not content.strip(): + return "save_document: empty content, nothing saved." + if fmt not in ("docx", "pdf"): + return f"save_document: unsupported format {fmt!r}; use 'docx' or 'pdf'." + + safe_name = _safe_filename(raw_filename, fmt) + auth = (NEXTCLOUD_USER, NEXTCLOUD_PASSWORD) + + # Ensure Drafts/ exists. 201 = created, 405 = already there — both fine. + try: + requests.request("MKCOL", DRAFTS_WEBDAV, auth=auth, timeout=10) + except requests.RequestException as e: + return f"save_document: could not reach Nextcloud ({e})." + + try: + url, final_name = _webdav_unique_url(DRAFTS_WEBDAV, safe_name, auth) + except (requests.RequestException, RuntimeError) as e: + return f"save_document: filename probe failed ({e})." + + cmd = ["pandoc", "-f", "markdown", "-t", fmt, "-o", "-"] + if fmt == "pdf": + cmd.insert(-2, "--pdf-engine=xelatex") + try: + proc = subprocess.run( + cmd, input=content.encode("utf-8"), + capture_output=True, timeout=120, + ) + except subprocess.TimeoutExpired: + return "save_document: pandoc timed out (>120s)." + except FileNotFoundError: + return "save_document: pandoc not installed." + if proc.returncode != 0: + err = proc.stderr.decode("utf-8", errors="replace")[:400] + return f"save_document: pandoc failed: {err}" + + try: + put = requests.put(url, data=proc.stdout, auth=auth, timeout=60) + except requests.RequestException as e: + return f"save_document: WebDAV upload failed ({e})." + if put.status_code not in (200, 201, 204): + return f"save_document: WebDAV upload returned {put.status_code}." + + return f"Saved to Nextcloud: Drafts/{final_name}" + + RETRIEVE_DOCUMENTS_TOOL = { "name": "retrieve_documents", "description": ( @@ -488,7 +624,7 @@ def chat(user_message, conversation_id, settings, client_time=None): messages = history + [{"role": "user", "content": full_message}] - tools = [RETRIEVE_DOCUMENTS_TOOL] + tools = [RETRIEVE_DOCUMENTS_TOOL, SAVE_DOCUMENT_TOOL] if settings.get("web_search", True): tools.append({"type": "web_search_20250305", "name": "web_search"}) @@ -517,6 +653,13 @@ def chat(user_message, conversation_id, settings, client_time=None): "tool_use_id": block.id, "content": result_text, }) + elif block.name == "save_document": + result_text = _execute_save_document(block.input) + tool_results.append({ + "type": "tool_result", + "tool_use_id": block.id, + "content": result_text, + }) else: tool_results.append({ "type": "tool_result", diff --git a/scripts/ingest.py b/scripts/ingest.py index 76bc140..8b37f8d 100644 --- a/scripts/ingest.py +++ b/scripts/ingest.py @@ -77,12 +77,22 @@ def _resolve_failure(source: str) -> None: print(f" Could not resolve ingest failure record (non-fatal): {e}") +IGNORED_TOP_FOLDERS = {"Drafts"} + + def _ingest_one(filepath: Path, embedder, root: Path = None) -> int: """Ingest a single file. Returns chunk count, 0 on skip/failure.""" if filepath.name.startswith(("~$", ".")): return 0 if filepath.suffix.lower() not in SUPPORTED: return 0 + if root is not None: + try: + rel = filepath.parent.relative_to(root) + if rel.parts and rel.parts[0] in IGNORED_TOP_FOLDERS: + return 0 + except ValueError: + pass blocks = extract_blocks(filepath) if not blocks or not any( (b.get("text") or "").strip() or (b.get("heading") or "").strip() diff --git a/scripts/watcher.py b/scripts/watcher.py index a938591..2148949 100644 --- a/scripts/watcher.py +++ b/scripts/watcher.py @@ -123,11 +123,20 @@ def resolve_ingest_failure(source: str): log.warning(f"Could not resolve ingest failure record (non-fatal): {e}") +IGNORED_TOP_FOLDERS = {"Drafts"} + + def ingest_file(filepath: Path, embedder) -> int: if filepath.name.startswith(("~$", "~", ".")): return 0 if filepath.suffix.lower() not in SUPPORTED: return 0 + try: + rel = filepath.parent.relative_to(NEXTCLOUD_PATH) + if rel.parts and rel.parts[0] in IGNORED_TOP_FOLDERS: + return 0 + except ValueError: + pass blocks = extract_blocks(filepath) if not blocks or not any( (b.get("text") or "").strip() or (b.get("heading") or "").strip()