api.py: save_document tool — pandoc render to Nextcloud Drafts/ via WebDAV
Claude can now write docx or pdf files to Aaron's Nextcloud Drafts/ when he asks for a document (bio, cover letter, statement, CV section) rather than chat text. Pandoc handles markdown -> docx and markdown -> pdf with the xelatex engine. Upload is a WebDAV PUT against the same Nextcloud instance dream.py already uses; NEXTCLOUD_URL / NEXTCLOUD_USER / NEXTCLOUD_PASSWORD in .env are reused. MKCOL ensures Drafts/ exists; PROPFIND-based collision check appends _2, _3, ... until unique. Filename sanitization strips path components and unsafe characters. System prompt instructs Claude to call save_document when the user wants a file (not chat text) and not to duplicate the file contents in the chat response — just write the file and tell Aaron where it landed. ingest.py and watcher.py now skip files under Drafts/ at ingest time so generated drafts don't pollute future retrieval. Drafts can still be opened, edited, and shipped; they just don't become part of the searchable corpus unless Aaron explicitly moves them out of Drafts/.
This commit is contained in:
+144
-1
@@ -4,6 +4,7 @@ import json
|
||||
import sqlite3
|
||||
import subprocess
|
||||
import hashlib
|
||||
import requests
|
||||
from pathlib import Path
|
||||
from datetime import datetime, timedelta
|
||||
from dotenv import load_dotenv
|
||||
@@ -140,6 +141,13 @@ consulting" not "my work." Results are unfiltered and ranked by
|
||||
semantic similarity; judge each chunk for relevance and ignore
|
||||
irrelevant hits rather than forcing them into the answer.
|
||||
|
||||
When Aaron asks for a document file — bio, cover letter, statement,
|
||||
CV section, anything he wants to send or edit outside chat — use the
|
||||
save_document tool to render the content to his Nextcloud Drafts/
|
||||
folder as docx (editable) or pdf (typeset). Don't duplicate the full
|
||||
content in the chat reply; just write the file and tell him where it
|
||||
landed. He can open it from any of his synced devices.
|
||||
|
||||
Use web search automatically when current external information is
|
||||
needed. Never re-brief on context that's already in memory or
|
||||
retrieved chunks.
|
||||
@@ -416,6 +424,134 @@ def create_conversation(title="New conversation"):
|
||||
conn.close()
|
||||
return conv_id
|
||||
|
||||
NEXTCLOUD_URL = os.getenv("NEXTCLOUD_URL", "https://nextcloud.aaronnelson.studio")
|
||||
NEXTCLOUD_USER = os.getenv("NEXTCLOUD_USER", "aaron")
|
||||
NEXTCLOUD_PASSWORD = os.getenv("NEXTCLOUD_PASSWORD", "")
|
||||
DRAFTS_WEBDAV = f"{NEXTCLOUD_URL}/remote.php/dav/files/{NEXTCLOUD_USER}/Drafts"
|
||||
|
||||
_FILENAME_SAFE_RE = re.compile(r"[^A-Za-z0-9_\-\. ]")
|
||||
|
||||
|
||||
SAVE_DOCUMENT_TOOL = {
|
||||
"name": "save_document",
|
||||
"description": (
|
||||
"Render markdown content to docx or pdf and save it to Aaron's Nextcloud "
|
||||
"Drafts/ folder (syncs to his other devices and web UI). Use this when "
|
||||
"Aaron asks for a document file rather than chat text — bios, cover "
|
||||
"letters, statements, CV sections, anything he'll edit or send. Returns "
|
||||
"the saved filename. Pick a descriptive filename (no extension) like "
|
||||
"'Aaron_Nelson_Bio_Utah_2026-05'. Format is 'docx' for editable drafts, "
|
||||
"'pdf' for typeset/print-ready output. Content should be well-formed "
|
||||
"markdown — # headings, **bold**, *italic*, - bulleted lists. Don't "
|
||||
"embed file content in the chat response too; just call this tool and "
|
||||
"tell Aaron where it landed."
|
||||
),
|
||||
"input_schema": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"content": {
|
||||
"type": "string",
|
||||
"description": "Document content in markdown.",
|
||||
},
|
||||
"filename": {
|
||||
"type": "string",
|
||||
"description": "Descriptive filename without extension.",
|
||||
},
|
||||
"format": {
|
||||
"type": "string",
|
||||
"enum": ["docx", "pdf"],
|
||||
"description": "Output format.",
|
||||
},
|
||||
},
|
||||
"required": ["content", "filename", "format"],
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def _safe_filename(name: str, ext: str) -> str:
|
||||
"""Strip path components and unsafe chars; force the requested extension."""
|
||||
base = Path(name).name
|
||||
base = _FILENAME_SAFE_RE.sub("_", base).strip().rstrip(".")
|
||||
if not base:
|
||||
base = "untitled"
|
||||
base = Path(base).stem
|
||||
return f"{base}.{ext}"
|
||||
|
||||
|
||||
def _webdav_unique_url(base_url: str, filename: str, auth) -> tuple[str, str]:
|
||||
"""Return a WebDAV URL that doesn't collide with an existing file. Appends
|
||||
_2, _3, ... until PROPFIND returns 404. Matches the convention dream.py uses."""
|
||||
stem = Path(filename).stem
|
||||
suffix = Path(filename).suffix
|
||||
name = filename
|
||||
i = 2
|
||||
while True:
|
||||
url = f"{base_url}/{name}"
|
||||
check = requests.request("PROPFIND", url, auth=auth, timeout=10)
|
||||
if check.status_code == 404:
|
||||
return url, name
|
||||
name = f"{stem}_{i}{suffix}"
|
||||
i += 1
|
||||
if i > 50:
|
||||
raise RuntimeError("could not find a free filename")
|
||||
|
||||
|
||||
def _execute_save_document(tool_input):
|
||||
"""Generate a document via pandoc and PUT it to Nextcloud Drafts/.
|
||||
Returns a user-facing status string for Claude to relay."""
|
||||
if not NEXTCLOUD_PASSWORD:
|
||||
return "save_document: NEXTCLOUD_PASSWORD not configured."
|
||||
|
||||
payload = tool_input or {}
|
||||
content = payload.get("content", "")
|
||||
raw_filename = payload.get("filename", "untitled")
|
||||
fmt = payload.get("format", "docx")
|
||||
|
||||
if not content.strip():
|
||||
return "save_document: empty content, nothing saved."
|
||||
if fmt not in ("docx", "pdf"):
|
||||
return f"save_document: unsupported format {fmt!r}; use 'docx' or 'pdf'."
|
||||
|
||||
safe_name = _safe_filename(raw_filename, fmt)
|
||||
auth = (NEXTCLOUD_USER, NEXTCLOUD_PASSWORD)
|
||||
|
||||
# Ensure Drafts/ exists. 201 = created, 405 = already there — both fine.
|
||||
try:
|
||||
requests.request("MKCOL", DRAFTS_WEBDAV, auth=auth, timeout=10)
|
||||
except requests.RequestException as e:
|
||||
return f"save_document: could not reach Nextcloud ({e})."
|
||||
|
||||
try:
|
||||
url, final_name = _webdav_unique_url(DRAFTS_WEBDAV, safe_name, auth)
|
||||
except (requests.RequestException, RuntimeError) as e:
|
||||
return f"save_document: filename probe failed ({e})."
|
||||
|
||||
cmd = ["pandoc", "-f", "markdown", "-t", fmt, "-o", "-"]
|
||||
if fmt == "pdf":
|
||||
cmd.insert(-2, "--pdf-engine=xelatex")
|
||||
try:
|
||||
proc = subprocess.run(
|
||||
cmd, input=content.encode("utf-8"),
|
||||
capture_output=True, timeout=120,
|
||||
)
|
||||
except subprocess.TimeoutExpired:
|
||||
return "save_document: pandoc timed out (>120s)."
|
||||
except FileNotFoundError:
|
||||
return "save_document: pandoc not installed."
|
||||
if proc.returncode != 0:
|
||||
err = proc.stderr.decode("utf-8", errors="replace")[:400]
|
||||
return f"save_document: pandoc failed: {err}"
|
||||
|
||||
try:
|
||||
put = requests.put(url, data=proc.stdout, auth=auth, timeout=60)
|
||||
except requests.RequestException as e:
|
||||
return f"save_document: WebDAV upload failed ({e})."
|
||||
if put.status_code not in (200, 201, 204):
|
||||
return f"save_document: WebDAV upload returned {put.status_code}."
|
||||
|
||||
return f"Saved to Nextcloud: Drafts/{final_name}"
|
||||
|
||||
|
||||
RETRIEVE_DOCUMENTS_TOOL = {
|
||||
"name": "retrieve_documents",
|
||||
"description": (
|
||||
@@ -488,7 +624,7 @@ def chat(user_message, conversation_id, settings, client_time=None):
|
||||
|
||||
messages = history + [{"role": "user", "content": full_message}]
|
||||
|
||||
tools = [RETRIEVE_DOCUMENTS_TOOL]
|
||||
tools = [RETRIEVE_DOCUMENTS_TOOL, SAVE_DOCUMENT_TOOL]
|
||||
if settings.get("web_search", True):
|
||||
tools.append({"type": "web_search_20250305", "name": "web_search"})
|
||||
|
||||
@@ -517,6 +653,13 @@ def chat(user_message, conversation_id, settings, client_time=None):
|
||||
"tool_use_id": block.id,
|
||||
"content": result_text,
|
||||
})
|
||||
elif block.name == "save_document":
|
||||
result_text = _execute_save_document(block.input)
|
||||
tool_results.append({
|
||||
"type": "tool_result",
|
||||
"tool_use_id": block.id,
|
||||
"content": result_text,
|
||||
})
|
||||
else:
|
||||
tool_results.append({
|
||||
"type": "tool_result",
|
||||
|
||||
@@ -77,12 +77,22 @@ def _resolve_failure(source: str) -> None:
|
||||
print(f" Could not resolve ingest failure record (non-fatal): {e}")
|
||||
|
||||
|
||||
IGNORED_TOP_FOLDERS = {"Drafts"}
|
||||
|
||||
|
||||
def _ingest_one(filepath: Path, embedder, root: Path = None) -> int:
|
||||
"""Ingest a single file. Returns chunk count, 0 on skip/failure."""
|
||||
if filepath.name.startswith(("~$", ".")):
|
||||
return 0
|
||||
if filepath.suffix.lower() not in SUPPORTED:
|
||||
return 0
|
||||
if root is not None:
|
||||
try:
|
||||
rel = filepath.parent.relative_to(root)
|
||||
if rel.parts and rel.parts[0] in IGNORED_TOP_FOLDERS:
|
||||
return 0
|
||||
except ValueError:
|
||||
pass
|
||||
blocks = extract_blocks(filepath)
|
||||
if not blocks or not any(
|
||||
(b.get("text") or "").strip() or (b.get("heading") or "").strip()
|
||||
|
||||
@@ -123,11 +123,20 @@ def resolve_ingest_failure(source: str):
|
||||
log.warning(f"Could not resolve ingest failure record (non-fatal): {e}")
|
||||
|
||||
|
||||
IGNORED_TOP_FOLDERS = {"Drafts"}
|
||||
|
||||
|
||||
def ingest_file(filepath: Path, embedder) -> int:
|
||||
if filepath.name.startswith(("~$", "~", ".")):
|
||||
return 0
|
||||
if filepath.suffix.lower() not in SUPPORTED:
|
||||
return 0
|
||||
try:
|
||||
rel = filepath.parent.relative_to(NEXTCLOUD_PATH)
|
||||
if rel.parts and rel.parts[0] in IGNORED_TOP_FOLDERS:
|
||||
return 0
|
||||
except ValueError:
|
||||
pass
|
||||
blocks = extract_blocks(filepath)
|
||||
if not blocks or not any(
|
||||
(b.get("text") or "").strip() or (b.get("heading") or "").strip()
|
||||
|
||||
Reference in New Issue
Block a user