async voice transcription — return immediately, whisper runs in background

This commit is contained in:
2026-04-29 17:48:22 +00:00
parent eb7cf3be10
commit a05fcec882
+37 -19
View File
@@ -8,7 +8,7 @@ from datetime import datetime
from dotenv import load_dotenv
from sentence_transformers import SentenceTransformer
import anthropic
from fastapi import FastAPI, Request, Response, Depends, HTTPException
from fastapi import FastAPI, Request, Response, Depends, HTTPException, BackgroundTasks
import psycopg2
import psycopg2.extras
from fastapi import UploadFile, File, Form
@@ -696,8 +696,35 @@ async def run_dreamer(request: Request, auth: str = Depends(require_auth)):
except Exception as e:
return JSONResponse({"started": False, "error": str(e)})
def transcribe_and_save(tmp_path, timestamp, nextcloud_url, nextcloud_user, nextcloud_password):
"""Background task — transcribes audio and saves to Nextcloud after endpoint returns."""
import requests as req_lib
nc_auth = (nextcloud_user, nextcloud_password)
try:
segments, _ = whisper_model.transcribe(
tmp_path, language="en", vad_filter=True, initial_prompt=WHISPER_PROMPT
)
transcript = " ".join(s.text.strip() for s in segments).strip()
os.unlink(tmp_path)
if not transcript:
print(f"Async transcription empty for {timestamp} — nothing saved")
return
filename = f"{timestamp}-voice.md"
content_md = f"# Capture — {timestamp}\n\n**type:** voice\n**modality:** audio\n**status:** unprocessed\n\n---\n\n{transcript}\n"
captures_dir = f"{nextcloud_url}/remote.php/dav/files/{nextcloud_user}/Journal/Captures"
req_lib.request("MKCOL", captures_dir, auth=nc_auth, timeout=10)
url = f"{captures_dir}/{filename}"
req_lib.put(url, data=content_md.encode("utf-8"), auth=nc_auth, timeout=30)
print(f"Async transcription saved: {filename}")
except Exception as e:
if os.path.exists(tmp_path):
os.unlink(tmp_path)
print(f"Async transcription failed for {timestamp}: {e}")
@app.post("/api/capture")
async def capture_endpoint(
background_tasks: BackgroundTasks,
audio: UploadFile = File(None),
image: UploadFile = File(None),
project: str = Form(None),
@@ -836,7 +863,6 @@ Keep the full description to 150-250 words. Do not speculate beyond what is visi
elif audio is not None:
if not whisper_model:
raise HTTPException(status_code=503, detail="Whisper not available")
tmp_path = None
try:
suffix = ".webm"
if audio.content_type and "mp4" in audio.content_type:
@@ -847,25 +873,17 @@ Keep the full description to 150-250 words. Do not speculate beyond what is visi
content_bytes = await audio.read()
tmp.write(content_bytes)
tmp_path = tmp.name
segments, _ = whisper_model.transcribe(
tmp_path, language="en", vad_filter=True, initial_prompt=WHISPER_PROMPT
background_tasks.add_task(
transcribe_and_save,
tmp_path=tmp_path,
timestamp=timestamp,
nextcloud_url=nextcloud_url,
nextcloud_user=nextcloud_user,
nextcloud_password=nextcloud_password,
)
transcript = " ".join(s.text.strip() for s in segments).strip()
os.unlink(tmp_path)
tmp_path = None
if not transcript:
return JSONResponse({"ok": False, "error": "No speech detected"})
filename = f"{timestamp}-voice.md"
content_md = f"# Capture — {timestamp}\n\n**type:** voice\n**modality:** audio\n**status:** unprocessed\n\n---\n\n{transcript}\n"
captures_dir = f"{nextcloud_url}/remote.php/dav/files/{nextcloud_user}/Journal/Captures"
req_lib.request("MKCOL", captures_dir, auth=nc_auth, timeout=10)
url = f"{captures_dir}/{filename}"
req_lib.put(url, data=content_md.encode("utf-8"), auth=nc_auth, timeout=30)
return JSONResponse({"ok": True, "filename": filename, "transcript": transcript})
return JSONResponse({"ok": True, "filename": f"{timestamp}-voice.md", "async": True})
except Exception as e:
if tmp_path and os.path.exists(tmp_path):
os.unlink(tmp_path)
return JSONResponse({"ok": False, "error": str(e), "error_type": "transcription_failed"})
return JSONResponse({"ok": False, "error": str(e), "error_type": "capture_failed"})
else:
raise HTTPException(status_code=400, detail="No audio or image provided")