api.py: save_document tool — pandoc render to Nextcloud Drafts/ via WebDAV
Claude can now write docx or pdf files to Aaron's Nextcloud Drafts/ when he asks for a document (bio, cover letter, statement, CV section) rather than chat text. Pandoc handles markdown -> docx and markdown -> pdf with the xelatex engine. Upload is a WebDAV PUT against the same Nextcloud instance dream.py already uses; NEXTCLOUD_URL / NEXTCLOUD_USER / NEXTCLOUD_PASSWORD in .env are reused. MKCOL ensures Drafts/ exists; PROPFIND-based collision check appends _2, _3, ... until unique. Filename sanitization strips path components and unsafe characters. System prompt instructs Claude to call save_document when the user wants a file (not chat text) and not to duplicate the file contents in the chat response — just write the file and tell Aaron where it landed. ingest.py and watcher.py now skip files under Drafts/ at ingest time so generated drafts don't pollute future retrieval. Drafts can still be opened, edited, and shipped; they just don't become part of the searchable corpus unless Aaron explicitly moves them out of Drafts/.
This commit is contained in:
+144
-1
@@ -4,6 +4,7 @@ import json
|
|||||||
import sqlite3
|
import sqlite3
|
||||||
import subprocess
|
import subprocess
|
||||||
import hashlib
|
import hashlib
|
||||||
|
import requests
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from datetime import datetime, timedelta
|
from datetime import datetime, timedelta
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
@@ -140,6 +141,13 @@ consulting" not "my work." Results are unfiltered and ranked by
|
|||||||
semantic similarity; judge each chunk for relevance and ignore
|
semantic similarity; judge each chunk for relevance and ignore
|
||||||
irrelevant hits rather than forcing them into the answer.
|
irrelevant hits rather than forcing them into the answer.
|
||||||
|
|
||||||
|
When Aaron asks for a document file — bio, cover letter, statement,
|
||||||
|
CV section, anything he wants to send or edit outside chat — use the
|
||||||
|
save_document tool to render the content to his Nextcloud Drafts/
|
||||||
|
folder as docx (editable) or pdf (typeset). Don't duplicate the full
|
||||||
|
content in the chat reply; just write the file and tell him where it
|
||||||
|
landed. He can open it from any of his synced devices.
|
||||||
|
|
||||||
Use web search automatically when current external information is
|
Use web search automatically when current external information is
|
||||||
needed. Never re-brief on context that's already in memory or
|
needed. Never re-brief on context that's already in memory or
|
||||||
retrieved chunks.
|
retrieved chunks.
|
||||||
@@ -416,6 +424,134 @@ def create_conversation(title="New conversation"):
|
|||||||
conn.close()
|
conn.close()
|
||||||
return conv_id
|
return conv_id
|
||||||
|
|
||||||
|
NEXTCLOUD_URL = os.getenv("NEXTCLOUD_URL", "https://nextcloud.aaronnelson.studio")
|
||||||
|
NEXTCLOUD_USER = os.getenv("NEXTCLOUD_USER", "aaron")
|
||||||
|
NEXTCLOUD_PASSWORD = os.getenv("NEXTCLOUD_PASSWORD", "")
|
||||||
|
DRAFTS_WEBDAV = f"{NEXTCLOUD_URL}/remote.php/dav/files/{NEXTCLOUD_USER}/Drafts"
|
||||||
|
|
||||||
|
_FILENAME_SAFE_RE = re.compile(r"[^A-Za-z0-9_\-\. ]")
|
||||||
|
|
||||||
|
|
||||||
|
SAVE_DOCUMENT_TOOL = {
|
||||||
|
"name": "save_document",
|
||||||
|
"description": (
|
||||||
|
"Render markdown content to docx or pdf and save it to Aaron's Nextcloud "
|
||||||
|
"Drafts/ folder (syncs to his other devices and web UI). Use this when "
|
||||||
|
"Aaron asks for a document file rather than chat text — bios, cover "
|
||||||
|
"letters, statements, CV sections, anything he'll edit or send. Returns "
|
||||||
|
"the saved filename. Pick a descriptive filename (no extension) like "
|
||||||
|
"'Aaron_Nelson_Bio_Utah_2026-05'. Format is 'docx' for editable drafts, "
|
||||||
|
"'pdf' for typeset/print-ready output. Content should be well-formed "
|
||||||
|
"markdown — # headings, **bold**, *italic*, - bulleted lists. Don't "
|
||||||
|
"embed file content in the chat response too; just call this tool and "
|
||||||
|
"tell Aaron where it landed."
|
||||||
|
),
|
||||||
|
"input_schema": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"content": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Document content in markdown.",
|
||||||
|
},
|
||||||
|
"filename": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Descriptive filename without extension.",
|
||||||
|
},
|
||||||
|
"format": {
|
||||||
|
"type": "string",
|
||||||
|
"enum": ["docx", "pdf"],
|
||||||
|
"description": "Output format.",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"required": ["content", "filename", "format"],
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _safe_filename(name: str, ext: str) -> str:
|
||||||
|
"""Strip path components and unsafe chars; force the requested extension."""
|
||||||
|
base = Path(name).name
|
||||||
|
base = _FILENAME_SAFE_RE.sub("_", base).strip().rstrip(".")
|
||||||
|
if not base:
|
||||||
|
base = "untitled"
|
||||||
|
base = Path(base).stem
|
||||||
|
return f"{base}.{ext}"
|
||||||
|
|
||||||
|
|
||||||
|
def _webdav_unique_url(base_url: str, filename: str, auth) -> tuple[str, str]:
|
||||||
|
"""Return a WebDAV URL that doesn't collide with an existing file. Appends
|
||||||
|
_2, _3, ... until PROPFIND returns 404. Matches the convention dream.py uses."""
|
||||||
|
stem = Path(filename).stem
|
||||||
|
suffix = Path(filename).suffix
|
||||||
|
name = filename
|
||||||
|
i = 2
|
||||||
|
while True:
|
||||||
|
url = f"{base_url}/{name}"
|
||||||
|
check = requests.request("PROPFIND", url, auth=auth, timeout=10)
|
||||||
|
if check.status_code == 404:
|
||||||
|
return url, name
|
||||||
|
name = f"{stem}_{i}{suffix}"
|
||||||
|
i += 1
|
||||||
|
if i > 50:
|
||||||
|
raise RuntimeError("could not find a free filename")
|
||||||
|
|
||||||
|
|
||||||
|
def _execute_save_document(tool_input):
|
||||||
|
"""Generate a document via pandoc and PUT it to Nextcloud Drafts/.
|
||||||
|
Returns a user-facing status string for Claude to relay."""
|
||||||
|
if not NEXTCLOUD_PASSWORD:
|
||||||
|
return "save_document: NEXTCLOUD_PASSWORD not configured."
|
||||||
|
|
||||||
|
payload = tool_input or {}
|
||||||
|
content = payload.get("content", "")
|
||||||
|
raw_filename = payload.get("filename", "untitled")
|
||||||
|
fmt = payload.get("format", "docx")
|
||||||
|
|
||||||
|
if not content.strip():
|
||||||
|
return "save_document: empty content, nothing saved."
|
||||||
|
if fmt not in ("docx", "pdf"):
|
||||||
|
return f"save_document: unsupported format {fmt!r}; use 'docx' or 'pdf'."
|
||||||
|
|
||||||
|
safe_name = _safe_filename(raw_filename, fmt)
|
||||||
|
auth = (NEXTCLOUD_USER, NEXTCLOUD_PASSWORD)
|
||||||
|
|
||||||
|
# Ensure Drafts/ exists. 201 = created, 405 = already there — both fine.
|
||||||
|
try:
|
||||||
|
requests.request("MKCOL", DRAFTS_WEBDAV, auth=auth, timeout=10)
|
||||||
|
except requests.RequestException as e:
|
||||||
|
return f"save_document: could not reach Nextcloud ({e})."
|
||||||
|
|
||||||
|
try:
|
||||||
|
url, final_name = _webdav_unique_url(DRAFTS_WEBDAV, safe_name, auth)
|
||||||
|
except (requests.RequestException, RuntimeError) as e:
|
||||||
|
return f"save_document: filename probe failed ({e})."
|
||||||
|
|
||||||
|
cmd = ["pandoc", "-f", "markdown", "-t", fmt, "-o", "-"]
|
||||||
|
if fmt == "pdf":
|
||||||
|
cmd.insert(-2, "--pdf-engine=xelatex")
|
||||||
|
try:
|
||||||
|
proc = subprocess.run(
|
||||||
|
cmd, input=content.encode("utf-8"),
|
||||||
|
capture_output=True, timeout=120,
|
||||||
|
)
|
||||||
|
except subprocess.TimeoutExpired:
|
||||||
|
return "save_document: pandoc timed out (>120s)."
|
||||||
|
except FileNotFoundError:
|
||||||
|
return "save_document: pandoc not installed."
|
||||||
|
if proc.returncode != 0:
|
||||||
|
err = proc.stderr.decode("utf-8", errors="replace")[:400]
|
||||||
|
return f"save_document: pandoc failed: {err}"
|
||||||
|
|
||||||
|
try:
|
||||||
|
put = requests.put(url, data=proc.stdout, auth=auth, timeout=60)
|
||||||
|
except requests.RequestException as e:
|
||||||
|
return f"save_document: WebDAV upload failed ({e})."
|
||||||
|
if put.status_code not in (200, 201, 204):
|
||||||
|
return f"save_document: WebDAV upload returned {put.status_code}."
|
||||||
|
|
||||||
|
return f"Saved to Nextcloud: Drafts/{final_name}"
|
||||||
|
|
||||||
|
|
||||||
RETRIEVE_DOCUMENTS_TOOL = {
|
RETRIEVE_DOCUMENTS_TOOL = {
|
||||||
"name": "retrieve_documents",
|
"name": "retrieve_documents",
|
||||||
"description": (
|
"description": (
|
||||||
@@ -488,7 +624,7 @@ def chat(user_message, conversation_id, settings, client_time=None):
|
|||||||
|
|
||||||
messages = history + [{"role": "user", "content": full_message}]
|
messages = history + [{"role": "user", "content": full_message}]
|
||||||
|
|
||||||
tools = [RETRIEVE_DOCUMENTS_TOOL]
|
tools = [RETRIEVE_DOCUMENTS_TOOL, SAVE_DOCUMENT_TOOL]
|
||||||
if settings.get("web_search", True):
|
if settings.get("web_search", True):
|
||||||
tools.append({"type": "web_search_20250305", "name": "web_search"})
|
tools.append({"type": "web_search_20250305", "name": "web_search"})
|
||||||
|
|
||||||
@@ -517,6 +653,13 @@ def chat(user_message, conversation_id, settings, client_time=None):
|
|||||||
"tool_use_id": block.id,
|
"tool_use_id": block.id,
|
||||||
"content": result_text,
|
"content": result_text,
|
||||||
})
|
})
|
||||||
|
elif block.name == "save_document":
|
||||||
|
result_text = _execute_save_document(block.input)
|
||||||
|
tool_results.append({
|
||||||
|
"type": "tool_result",
|
||||||
|
"tool_use_id": block.id,
|
||||||
|
"content": result_text,
|
||||||
|
})
|
||||||
else:
|
else:
|
||||||
tool_results.append({
|
tool_results.append({
|
||||||
"type": "tool_result",
|
"type": "tool_result",
|
||||||
|
|||||||
@@ -77,12 +77,22 @@ def _resolve_failure(source: str) -> None:
|
|||||||
print(f" Could not resolve ingest failure record (non-fatal): {e}")
|
print(f" Could not resolve ingest failure record (non-fatal): {e}")
|
||||||
|
|
||||||
|
|
||||||
|
IGNORED_TOP_FOLDERS = {"Drafts"}
|
||||||
|
|
||||||
|
|
||||||
def _ingest_one(filepath: Path, embedder, root: Path = None) -> int:
|
def _ingest_one(filepath: Path, embedder, root: Path = None) -> int:
|
||||||
"""Ingest a single file. Returns chunk count, 0 on skip/failure."""
|
"""Ingest a single file. Returns chunk count, 0 on skip/failure."""
|
||||||
if filepath.name.startswith(("~$", ".")):
|
if filepath.name.startswith(("~$", ".")):
|
||||||
return 0
|
return 0
|
||||||
if filepath.suffix.lower() not in SUPPORTED:
|
if filepath.suffix.lower() not in SUPPORTED:
|
||||||
return 0
|
return 0
|
||||||
|
if root is not None:
|
||||||
|
try:
|
||||||
|
rel = filepath.parent.relative_to(root)
|
||||||
|
if rel.parts and rel.parts[0] in IGNORED_TOP_FOLDERS:
|
||||||
|
return 0
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
blocks = extract_blocks(filepath)
|
blocks = extract_blocks(filepath)
|
||||||
if not blocks or not any(
|
if not blocks or not any(
|
||||||
(b.get("text") or "").strip() or (b.get("heading") or "").strip()
|
(b.get("text") or "").strip() or (b.get("heading") or "").strip()
|
||||||
|
|||||||
@@ -123,11 +123,20 @@ def resolve_ingest_failure(source: str):
|
|||||||
log.warning(f"Could not resolve ingest failure record (non-fatal): {e}")
|
log.warning(f"Could not resolve ingest failure record (non-fatal): {e}")
|
||||||
|
|
||||||
|
|
||||||
|
IGNORED_TOP_FOLDERS = {"Drafts"}
|
||||||
|
|
||||||
|
|
||||||
def ingest_file(filepath: Path, embedder) -> int:
|
def ingest_file(filepath: Path, embedder) -> int:
|
||||||
if filepath.name.startswith(("~$", "~", ".")):
|
if filepath.name.startswith(("~$", "~", ".")):
|
||||||
return 0
|
return 0
|
||||||
if filepath.suffix.lower() not in SUPPORTED:
|
if filepath.suffix.lower() not in SUPPORTED:
|
||||||
return 0
|
return 0
|
||||||
|
try:
|
||||||
|
rel = filepath.parent.relative_to(NEXTCLOUD_PATH)
|
||||||
|
if rel.parts and rel.parts[0] in IGNORED_TOP_FOLDERS:
|
||||||
|
return 0
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
blocks = extract_blocks(filepath)
|
blocks = extract_blocks(filepath)
|
||||||
if not blocks or not any(
|
if not blocks or not any(
|
||||||
(b.get("text") or "").strip() or (b.get("heading") or "").strip()
|
(b.get("text") or "").strip() or (b.get("heading") or "").strip()
|
||||||
|
|||||||
Reference in New Issue
Block a user