diff --git a/scripts/api.py b/scripts/api.py index 677c5e0..5c8d3a1 100644 --- a/scripts/api.py +++ b/scripts/api.py @@ -8,7 +8,7 @@ from datetime import datetime from dotenv import load_dotenv from sentence_transformers import SentenceTransformer import anthropic -from fastapi import FastAPI, Request, Response, Depends, HTTPException +from fastapi import FastAPI, Request, Response, Depends, HTTPException, BackgroundTasks import psycopg2 import psycopg2.extras from fastapi import UploadFile, File, Form @@ -696,8 +696,35 @@ async def run_dreamer(request: Request, auth: str = Depends(require_auth)): except Exception as e: return JSONResponse({"started": False, "error": str(e)}) +def transcribe_and_save(tmp_path, timestamp, nextcloud_url, nextcloud_user, nextcloud_password): + """Background task — transcribes audio and saves to Nextcloud after endpoint returns.""" + import requests as req_lib + nc_auth = (nextcloud_user, nextcloud_password) + try: + segments, _ = whisper_model.transcribe( + tmp_path, language="en", vad_filter=True, initial_prompt=WHISPER_PROMPT + ) + transcript = " ".join(s.text.strip() for s in segments).strip() + os.unlink(tmp_path) + if not transcript: + print(f"Async transcription empty for {timestamp} — nothing saved") + return + filename = f"{timestamp}-voice.md" + content_md = f"# Capture — {timestamp}\n\n**type:** voice\n**modality:** audio\n**status:** unprocessed\n\n---\n\n{transcript}\n" + captures_dir = f"{nextcloud_url}/remote.php/dav/files/{nextcloud_user}/Journal/Captures" + req_lib.request("MKCOL", captures_dir, auth=nc_auth, timeout=10) + url = f"{captures_dir}/{filename}" + req_lib.put(url, data=content_md.encode("utf-8"), auth=nc_auth, timeout=30) + print(f"Async transcription saved: {filename}") + except Exception as e: + if os.path.exists(tmp_path): + os.unlink(tmp_path) + print(f"Async transcription failed for {timestamp}: {e}") + + @app.post("/api/capture") async def capture_endpoint( + background_tasks: BackgroundTasks, audio: UploadFile = File(None), image: UploadFile = File(None), project: str = Form(None), @@ -836,7 +863,6 @@ Keep the full description to 150-250 words. Do not speculate beyond what is visi elif audio is not None: if not whisper_model: raise HTTPException(status_code=503, detail="Whisper not available") - tmp_path = None try: suffix = ".webm" if audio.content_type and "mp4" in audio.content_type: @@ -847,25 +873,17 @@ Keep the full description to 150-250 words. Do not speculate beyond what is visi content_bytes = await audio.read() tmp.write(content_bytes) tmp_path = tmp.name - segments, _ = whisper_model.transcribe( - tmp_path, language="en", vad_filter=True, initial_prompt=WHISPER_PROMPT + background_tasks.add_task( + transcribe_and_save, + tmp_path=tmp_path, + timestamp=timestamp, + nextcloud_url=nextcloud_url, + nextcloud_user=nextcloud_user, + nextcloud_password=nextcloud_password, ) - transcript = " ".join(s.text.strip() for s in segments).strip() - os.unlink(tmp_path) - tmp_path = None - if not transcript: - return JSONResponse({"ok": False, "error": "No speech detected"}) - filename = f"{timestamp}-voice.md" - content_md = f"# Capture — {timestamp}\n\n**type:** voice\n**modality:** audio\n**status:** unprocessed\n\n---\n\n{transcript}\n" - captures_dir = f"{nextcloud_url}/remote.php/dav/files/{nextcloud_user}/Journal/Captures" - req_lib.request("MKCOL", captures_dir, auth=nc_auth, timeout=10) - url = f"{captures_dir}/{filename}" - req_lib.put(url, data=content_md.encode("utf-8"), auth=nc_auth, timeout=30) - return JSONResponse({"ok": True, "filename": filename, "transcript": transcript}) + return JSONResponse({"ok": True, "filename": f"{timestamp}-voice.md", "async": True}) except Exception as e: - if tmp_path and os.path.exists(tmp_path): - os.unlink(tmp_path) - return JSONResponse({"ok": False, "error": str(e), "error_type": "transcription_failed"}) + return JSONResponse({"ok": False, "error": str(e), "error_type": "capture_failed"}) else: raise HTTPException(status_code=400, detail="No audio or image provided")