async voice transcription — return immediately, whisper runs in background
This commit is contained in:
+37
-19
@@ -8,7 +8,7 @@ from datetime import datetime
|
||||
from dotenv import load_dotenv
|
||||
from sentence_transformers import SentenceTransformer
|
||||
import anthropic
|
||||
from fastapi import FastAPI, Request, Response, Depends, HTTPException
|
||||
from fastapi import FastAPI, Request, Response, Depends, HTTPException, BackgroundTasks
|
||||
import psycopg2
|
||||
import psycopg2.extras
|
||||
from fastapi import UploadFile, File, Form
|
||||
@@ -696,8 +696,35 @@ async def run_dreamer(request: Request, auth: str = Depends(require_auth)):
|
||||
except Exception as e:
|
||||
return JSONResponse({"started": False, "error": str(e)})
|
||||
|
||||
def transcribe_and_save(tmp_path, timestamp, nextcloud_url, nextcloud_user, nextcloud_password):
|
||||
"""Background task — transcribes audio and saves to Nextcloud after endpoint returns."""
|
||||
import requests as req_lib
|
||||
nc_auth = (nextcloud_user, nextcloud_password)
|
||||
try:
|
||||
segments, _ = whisper_model.transcribe(
|
||||
tmp_path, language="en", vad_filter=True, initial_prompt=WHISPER_PROMPT
|
||||
)
|
||||
transcript = " ".join(s.text.strip() for s in segments).strip()
|
||||
os.unlink(tmp_path)
|
||||
if not transcript:
|
||||
print(f"Async transcription empty for {timestamp} — nothing saved")
|
||||
return
|
||||
filename = f"{timestamp}-voice.md"
|
||||
content_md = f"# Capture — {timestamp}\n\n**type:** voice\n**modality:** audio\n**status:** unprocessed\n\n---\n\n{transcript}\n"
|
||||
captures_dir = f"{nextcloud_url}/remote.php/dav/files/{nextcloud_user}/Journal/Captures"
|
||||
req_lib.request("MKCOL", captures_dir, auth=nc_auth, timeout=10)
|
||||
url = f"{captures_dir}/{filename}"
|
||||
req_lib.put(url, data=content_md.encode("utf-8"), auth=nc_auth, timeout=30)
|
||||
print(f"Async transcription saved: {filename}")
|
||||
except Exception as e:
|
||||
if os.path.exists(tmp_path):
|
||||
os.unlink(tmp_path)
|
||||
print(f"Async transcription failed for {timestamp}: {e}")
|
||||
|
||||
|
||||
@app.post("/api/capture")
|
||||
async def capture_endpoint(
|
||||
background_tasks: BackgroundTasks,
|
||||
audio: UploadFile = File(None),
|
||||
image: UploadFile = File(None),
|
||||
project: str = Form(None),
|
||||
@@ -836,7 +863,6 @@ Keep the full description to 150-250 words. Do not speculate beyond what is visi
|
||||
elif audio is not None:
|
||||
if not whisper_model:
|
||||
raise HTTPException(status_code=503, detail="Whisper not available")
|
||||
tmp_path = None
|
||||
try:
|
||||
suffix = ".webm"
|
||||
if audio.content_type and "mp4" in audio.content_type:
|
||||
@@ -847,25 +873,17 @@ Keep the full description to 150-250 words. Do not speculate beyond what is visi
|
||||
content_bytes = await audio.read()
|
||||
tmp.write(content_bytes)
|
||||
tmp_path = tmp.name
|
||||
segments, _ = whisper_model.transcribe(
|
||||
tmp_path, language="en", vad_filter=True, initial_prompt=WHISPER_PROMPT
|
||||
background_tasks.add_task(
|
||||
transcribe_and_save,
|
||||
tmp_path=tmp_path,
|
||||
timestamp=timestamp,
|
||||
nextcloud_url=nextcloud_url,
|
||||
nextcloud_user=nextcloud_user,
|
||||
nextcloud_password=nextcloud_password,
|
||||
)
|
||||
transcript = " ".join(s.text.strip() for s in segments).strip()
|
||||
os.unlink(tmp_path)
|
||||
tmp_path = None
|
||||
if not transcript:
|
||||
return JSONResponse({"ok": False, "error": "No speech detected"})
|
||||
filename = f"{timestamp}-voice.md"
|
||||
content_md = f"# Capture — {timestamp}\n\n**type:** voice\n**modality:** audio\n**status:** unprocessed\n\n---\n\n{transcript}\n"
|
||||
captures_dir = f"{nextcloud_url}/remote.php/dav/files/{nextcloud_user}/Journal/Captures"
|
||||
req_lib.request("MKCOL", captures_dir, auth=nc_auth, timeout=10)
|
||||
url = f"{captures_dir}/{filename}"
|
||||
req_lib.put(url, data=content_md.encode("utf-8"), auth=nc_auth, timeout=30)
|
||||
return JSONResponse({"ok": True, "filename": filename, "transcript": transcript})
|
||||
return JSONResponse({"ok": True, "filename": f"{timestamp}-voice.md", "async": True})
|
||||
except Exception as e:
|
||||
if tmp_path and os.path.exists(tmp_path):
|
||||
os.unlink(tmp_path)
|
||||
return JSONResponse({"ok": False, "error": str(e), "error_type": "transcription_failed"})
|
||||
return JSONResponse({"ok": False, "error": str(e), "error_type": "capture_failed"})
|
||||
|
||||
else:
|
||||
raise HTTPException(status_code=400, detail="No audio or image provided")
|
||||
|
||||
Reference in New Issue
Block a user