async voice transcription — return immediately, whisper runs in background

2026-04-29 17:48:22 +00:00
parent eb7cf3be10
commit a05fcec882
1 changed files with 37 additions and 19 deletions
@@ -8,7 +8,7 @@ from datetime import datetime
 from dotenv import load_dotenv
 from sentence_transformers import SentenceTransformer
 import anthropic
-from fastapi import FastAPI, Request, Response, Depends, HTTPException
+from fastapi import FastAPI, Request, Response, Depends, HTTPException, BackgroundTasks
 import psycopg2
 import psycopg2.extras
 from fastapi import UploadFile, File, Form
@@ -696,8 +696,35 @@ async def run_dreamer(request: Request, auth: str = Depends(require_auth)):
    except Exception as e:
        return JSONResponse({"started": False, "error": str(e)})
 def transcribe_and_save(tmp_path, timestamp, nextcloud_url, nextcloud_user, nextcloud_password):
    """Background task — transcribes audio and saves to Nextcloud after endpoint returns."""
    import requests as req_lib
    nc_auth = (nextcloud_user, nextcloud_password)
    try:
        segments, _ = whisper_model.transcribe(
            tmp_path, language="en", vad_filter=True, initial_prompt=WHISPER_PROMPT
        )
        transcript = " ".join(s.text.strip() for s in segments).strip()
        os.unlink(tmp_path)
        if not transcript:
            print(f"Async transcription empty for {timestamp} — nothing saved")
            return
        filename = f"{timestamp}-voice.md"
        content_md = f"# Capture — {timestamp}\n\n**type:** voice\n**modality:** audio\n**status:** unprocessed\n\n---\n\n{transcript}\n"
        captures_dir = f"{nextcloud_url}/remote.php/dav/files/{nextcloud_user}/Journal/Captures"
        req_lib.request("MKCOL", captures_dir, auth=nc_auth, timeout=10)
        url = f"{captures_dir}/{filename}"
        req_lib.put(url, data=content_md.encode("utf-8"), auth=nc_auth, timeout=30)
        print(f"Async transcription saved: {filename}")
    except Exception as e:
        if os.path.exists(tmp_path):
            os.unlink(tmp_path)
        print(f"Async transcription failed for {timestamp}: {e}")
@app.post("/api/capture")
 async def capture_endpoint(
    background_tasks: BackgroundTasks,
    audio: UploadFile = File(None),
    image: UploadFile = File(None),
    project: str = Form(None),
@@ -836,7 +863,6 @@ Keep the full description to 150-250 words. Do not speculate beyond what is visi
    elif audio is not None:
        if not whisper_model:
            raise HTTPException(status_code=503, detail="Whisper not available")
        tmp_path = None
        try:
            suffix = ".webm"
            if audio.content_type and "mp4" in audio.content_type:
@@ -847,25 +873,17 @@ Keep the full description to 150-250 words. Do not speculate beyond what is visi
                content_bytes = await audio.read()
                tmp.write(content_bytes)
                tmp_path = tmp.name
-            segments, _ = whisper_model.transcribe(
+            background_tasks.add_task(
-                tmp_path, language="en", vad_filter=True, initial_prompt=WHISPER_PROMPT
+                transcribe_and_save,
                tmp_path=tmp_path,
                timestamp=timestamp,
                nextcloud_url=nextcloud_url,
                nextcloud_user=nextcloud_user,
                nextcloud_password=nextcloud_password,
            )
-            transcript = " ".join(s.text.strip() for s in segments).strip()
+            return JSONResponse({"ok": True, "filename": f"{timestamp}-voice.md", "async": True})
            os.unlink(tmp_path)
            tmp_path = None
            if not transcript:
                return JSONResponse({"ok": False, "error": "No speech detected"})
            filename = f"{timestamp}-voice.md"
            content_md = f"# Capture — {timestamp}\n\n**type:** voice\n**modality:** audio\n**status:** unprocessed\n\n---\n\n{transcript}\n"
            captures_dir = f"{nextcloud_url}/remote.php/dav/files/{nextcloud_user}/Journal/Captures"
            req_lib.request("MKCOL", captures_dir, auth=nc_auth, timeout=10)
            url = f"{captures_dir}/{filename}"
            req_lib.put(url, data=content_md.encode("utf-8"), auth=nc_auth, timeout=30)
            return JSONResponse({"ok": True, "filename": filename, "transcript": transcript})
        except Exception as e:
-            if tmp_path and os.path.exists(tmp_path):
+            return JSONResponse({"ok": False, "error": str(e), "error_type": "capture_failed"})
                os.unlink(tmp_path)
            return JSONResponse({"ok": False, "error": str(e), "error_type": "transcription_failed"})
    else:
        raise HTTPException(status_code=400, detail="No audio or image provided")