diff --git a/scripts/api.py b/scripts/api.py index 8ee05c9..4647471 100644 --- a/scripts/api.py +++ b/scripts/api.py @@ -4,6 +4,7 @@ import json import sqlite3 import subprocess import hashlib +import requests from pathlib import Path from datetime import datetime, timedelta from dotenv import load_dotenv @@ -140,6 +141,13 @@ consulting" not "my work." Results are unfiltered and ranked by semantic similarity; judge each chunk for relevance and ignore irrelevant hits rather than forcing them into the answer. +When Aaron asks for a document file — bio, cover letter, statement, +CV section, anything he wants to send or edit outside chat — use the +save_document tool to render the content to his Nextcloud Drafts/ +folder as docx (editable) or pdf (typeset). Don't duplicate the full +content in the chat reply; just write the file and tell him where it +landed. He can open it from any of his synced devices. + Use web search automatically when current external information is needed. Never re-brief on context that's already in memory or retrieved chunks. @@ -416,6 +424,134 @@ def create_conversation(title="New conversation"): conn.close() return conv_id +NEXTCLOUD_URL = os.getenv("NEXTCLOUD_URL", "https://nextcloud.aaronnelson.studio") +NEXTCLOUD_USER = os.getenv("NEXTCLOUD_USER", "aaron") +NEXTCLOUD_PASSWORD = os.getenv("NEXTCLOUD_PASSWORD", "") +DRAFTS_WEBDAV = f"{NEXTCLOUD_URL}/remote.php/dav/files/{NEXTCLOUD_USER}/Drafts" + +_FILENAME_SAFE_RE = re.compile(r"[^A-Za-z0-9_\-\. ]") + + +SAVE_DOCUMENT_TOOL = { + "name": "save_document", + "description": ( + "Render markdown content to docx or pdf and save it to Aaron's Nextcloud " + "Drafts/ folder (syncs to his other devices and web UI). Use this when " + "Aaron asks for a document file rather than chat text — bios, cover " + "letters, statements, CV sections, anything he'll edit or send. Returns " + "the saved filename. Pick a descriptive filename (no extension) like " + "'Aaron_Nelson_Bio_Utah_2026-05'. Format is 'docx' for editable drafts, " + "'pdf' for typeset/print-ready output. Content should be well-formed " + "markdown — # headings, **bold**, *italic*, - bulleted lists. Don't " + "embed file content in the chat response too; just call this tool and " + "tell Aaron where it landed." + ), + "input_schema": { + "type": "object", + "properties": { + "content": { + "type": "string", + "description": "Document content in markdown.", + }, + "filename": { + "type": "string", + "description": "Descriptive filename without extension.", + }, + "format": { + "type": "string", + "enum": ["docx", "pdf"], + "description": "Output format.", + }, + }, + "required": ["content", "filename", "format"], + }, +} + + +def _safe_filename(name: str, ext: str) -> str: + """Strip path components and unsafe chars; force the requested extension.""" + base = Path(name).name + base = _FILENAME_SAFE_RE.sub("_", base).strip().rstrip(".") + if not base: + base = "untitled" + base = Path(base).stem + return f"{base}.{ext}" + + +def _webdav_unique_url(base_url: str, filename: str, auth) -> tuple[str, str]: + """Return a WebDAV URL that doesn't collide with an existing file. Appends + _2, _3, ... until PROPFIND returns 404. Matches the convention dream.py uses.""" + stem = Path(filename).stem + suffix = Path(filename).suffix + name = filename + i = 2 + while True: + url = f"{base_url}/{name}" + check = requests.request("PROPFIND", url, auth=auth, timeout=10) + if check.status_code == 404: + return url, name + name = f"{stem}_{i}{suffix}" + i += 1 + if i > 50: + raise RuntimeError("could not find a free filename") + + +def _execute_save_document(tool_input): + """Generate a document via pandoc and PUT it to Nextcloud Drafts/. + Returns a user-facing status string for Claude to relay.""" + if not NEXTCLOUD_PASSWORD: + return "save_document: NEXTCLOUD_PASSWORD not configured." + + payload = tool_input or {} + content = payload.get("content", "") + raw_filename = payload.get("filename", "untitled") + fmt = payload.get("format", "docx") + + if not content.strip(): + return "save_document: empty content, nothing saved." + if fmt not in ("docx", "pdf"): + return f"save_document: unsupported format {fmt!r}; use 'docx' or 'pdf'." + + safe_name = _safe_filename(raw_filename, fmt) + auth = (NEXTCLOUD_USER, NEXTCLOUD_PASSWORD) + + # Ensure Drafts/ exists. 201 = created, 405 = already there — both fine. + try: + requests.request("MKCOL", DRAFTS_WEBDAV, auth=auth, timeout=10) + except requests.RequestException as e: + return f"save_document: could not reach Nextcloud ({e})." + + try: + url, final_name = _webdav_unique_url(DRAFTS_WEBDAV, safe_name, auth) + except (requests.RequestException, RuntimeError) as e: + return f"save_document: filename probe failed ({e})." + + cmd = ["pandoc", "-f", "markdown", "-t", fmt, "-o", "-"] + if fmt == "pdf": + cmd.insert(-2, "--pdf-engine=xelatex") + try: + proc = subprocess.run( + cmd, input=content.encode("utf-8"), + capture_output=True, timeout=120, + ) + except subprocess.TimeoutExpired: + return "save_document: pandoc timed out (>120s)." + except FileNotFoundError: + return "save_document: pandoc not installed." + if proc.returncode != 0: + err = proc.stderr.decode("utf-8", errors="replace")[:400] + return f"save_document: pandoc failed: {err}" + + try: + put = requests.put(url, data=proc.stdout, auth=auth, timeout=60) + except requests.RequestException as e: + return f"save_document: WebDAV upload failed ({e})." + if put.status_code not in (200, 201, 204): + return f"save_document: WebDAV upload returned {put.status_code}." + + return f"Saved to Nextcloud: Drafts/{final_name}" + + RETRIEVE_DOCUMENTS_TOOL = { "name": "retrieve_documents", "description": ( @@ -488,7 +624,7 @@ def chat(user_message, conversation_id, settings, client_time=None): messages = history + [{"role": "user", "content": full_message}] - tools = [RETRIEVE_DOCUMENTS_TOOL] + tools = [RETRIEVE_DOCUMENTS_TOOL, SAVE_DOCUMENT_TOOL] if settings.get("web_search", True): tools.append({"type": "web_search_20250305", "name": "web_search"}) @@ -517,6 +653,13 @@ def chat(user_message, conversation_id, settings, client_time=None): "tool_use_id": block.id, "content": result_text, }) + elif block.name == "save_document": + result_text = _execute_save_document(block.input) + tool_results.append({ + "type": "tool_result", + "tool_use_id": block.id, + "content": result_text, + }) else: tool_results.append({ "type": "tool_result", diff --git a/scripts/ingest.py b/scripts/ingest.py index 76bc140..8b37f8d 100644 --- a/scripts/ingest.py +++ b/scripts/ingest.py @@ -77,12 +77,22 @@ def _resolve_failure(source: str) -> None: print(f" Could not resolve ingest failure record (non-fatal): {e}") +IGNORED_TOP_FOLDERS = {"Drafts"} + + def _ingest_one(filepath: Path, embedder, root: Path = None) -> int: """Ingest a single file. Returns chunk count, 0 on skip/failure.""" if filepath.name.startswith(("~$", ".")): return 0 if filepath.suffix.lower() not in SUPPORTED: return 0 + if root is not None: + try: + rel = filepath.parent.relative_to(root) + if rel.parts and rel.parts[0] in IGNORED_TOP_FOLDERS: + return 0 + except ValueError: + pass blocks = extract_blocks(filepath) if not blocks or not any( (b.get("text") or "").strip() or (b.get("heading") or "").strip() diff --git a/scripts/watcher.py b/scripts/watcher.py index a938591..2148949 100644 --- a/scripts/watcher.py +++ b/scripts/watcher.py @@ -123,11 +123,20 @@ def resolve_ingest_failure(source: str): log.warning(f"Could not resolve ingest failure record (non-fatal): {e}") +IGNORED_TOP_FOLDERS = {"Drafts"} + + def ingest_file(filepath: Path, embedder) -> int: if filepath.name.startswith(("~$", "~", ".")): return 0 if filepath.suffix.lower() not in SUPPORTED: return 0 + try: + rel = filepath.parent.relative_to(NEXTCLOUD_PATH) + if rel.parts and rel.parts[0] in IGNORED_TOP_FOLDERS: + return 0 + except ValueError: + pass blocks = extract_blocks(filepath) if not blocks or not any( (b.get("text") or "").strip() or (b.get("heading") or "").strip()