async voice transcription — return immediately, whisper runs in background
This commit is contained in:
+37
-19
@@ -8,7 +8,7 @@ from datetime import datetime
|
|||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
from sentence_transformers import SentenceTransformer
|
from sentence_transformers import SentenceTransformer
|
||||||
import anthropic
|
import anthropic
|
||||||
from fastapi import FastAPI, Request, Response, Depends, HTTPException
|
from fastapi import FastAPI, Request, Response, Depends, HTTPException, BackgroundTasks
|
||||||
import psycopg2
|
import psycopg2
|
||||||
import psycopg2.extras
|
import psycopg2.extras
|
||||||
from fastapi import UploadFile, File, Form
|
from fastapi import UploadFile, File, Form
|
||||||
@@ -696,8 +696,35 @@ async def run_dreamer(request: Request, auth: str = Depends(require_auth)):
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
return JSONResponse({"started": False, "error": str(e)})
|
return JSONResponse({"started": False, "error": str(e)})
|
||||||
|
|
||||||
|
def transcribe_and_save(tmp_path, timestamp, nextcloud_url, nextcloud_user, nextcloud_password):
|
||||||
|
"""Background task — transcribes audio and saves to Nextcloud after endpoint returns."""
|
||||||
|
import requests as req_lib
|
||||||
|
nc_auth = (nextcloud_user, nextcloud_password)
|
||||||
|
try:
|
||||||
|
segments, _ = whisper_model.transcribe(
|
||||||
|
tmp_path, language="en", vad_filter=True, initial_prompt=WHISPER_PROMPT
|
||||||
|
)
|
||||||
|
transcript = " ".join(s.text.strip() for s in segments).strip()
|
||||||
|
os.unlink(tmp_path)
|
||||||
|
if not transcript:
|
||||||
|
print(f"Async transcription empty for {timestamp} — nothing saved")
|
||||||
|
return
|
||||||
|
filename = f"{timestamp}-voice.md"
|
||||||
|
content_md = f"# Capture — {timestamp}\n\n**type:** voice\n**modality:** audio\n**status:** unprocessed\n\n---\n\n{transcript}\n"
|
||||||
|
captures_dir = f"{nextcloud_url}/remote.php/dav/files/{nextcloud_user}/Journal/Captures"
|
||||||
|
req_lib.request("MKCOL", captures_dir, auth=nc_auth, timeout=10)
|
||||||
|
url = f"{captures_dir}/{filename}"
|
||||||
|
req_lib.put(url, data=content_md.encode("utf-8"), auth=nc_auth, timeout=30)
|
||||||
|
print(f"Async transcription saved: {filename}")
|
||||||
|
except Exception as e:
|
||||||
|
if os.path.exists(tmp_path):
|
||||||
|
os.unlink(tmp_path)
|
||||||
|
print(f"Async transcription failed for {timestamp}: {e}")
|
||||||
|
|
||||||
|
|
||||||
@app.post("/api/capture")
|
@app.post("/api/capture")
|
||||||
async def capture_endpoint(
|
async def capture_endpoint(
|
||||||
|
background_tasks: BackgroundTasks,
|
||||||
audio: UploadFile = File(None),
|
audio: UploadFile = File(None),
|
||||||
image: UploadFile = File(None),
|
image: UploadFile = File(None),
|
||||||
project: str = Form(None),
|
project: str = Form(None),
|
||||||
@@ -836,7 +863,6 @@ Keep the full description to 150-250 words. Do not speculate beyond what is visi
|
|||||||
elif audio is not None:
|
elif audio is not None:
|
||||||
if not whisper_model:
|
if not whisper_model:
|
||||||
raise HTTPException(status_code=503, detail="Whisper not available")
|
raise HTTPException(status_code=503, detail="Whisper not available")
|
||||||
tmp_path = None
|
|
||||||
try:
|
try:
|
||||||
suffix = ".webm"
|
suffix = ".webm"
|
||||||
if audio.content_type and "mp4" in audio.content_type:
|
if audio.content_type and "mp4" in audio.content_type:
|
||||||
@@ -847,25 +873,17 @@ Keep the full description to 150-250 words. Do not speculate beyond what is visi
|
|||||||
content_bytes = await audio.read()
|
content_bytes = await audio.read()
|
||||||
tmp.write(content_bytes)
|
tmp.write(content_bytes)
|
||||||
tmp_path = tmp.name
|
tmp_path = tmp.name
|
||||||
segments, _ = whisper_model.transcribe(
|
background_tasks.add_task(
|
||||||
tmp_path, language="en", vad_filter=True, initial_prompt=WHISPER_PROMPT
|
transcribe_and_save,
|
||||||
|
tmp_path=tmp_path,
|
||||||
|
timestamp=timestamp,
|
||||||
|
nextcloud_url=nextcloud_url,
|
||||||
|
nextcloud_user=nextcloud_user,
|
||||||
|
nextcloud_password=nextcloud_password,
|
||||||
)
|
)
|
||||||
transcript = " ".join(s.text.strip() for s in segments).strip()
|
return JSONResponse({"ok": True, "filename": f"{timestamp}-voice.md", "async": True})
|
||||||
os.unlink(tmp_path)
|
|
||||||
tmp_path = None
|
|
||||||
if not transcript:
|
|
||||||
return JSONResponse({"ok": False, "error": "No speech detected"})
|
|
||||||
filename = f"{timestamp}-voice.md"
|
|
||||||
content_md = f"# Capture — {timestamp}\n\n**type:** voice\n**modality:** audio\n**status:** unprocessed\n\n---\n\n{transcript}\n"
|
|
||||||
captures_dir = f"{nextcloud_url}/remote.php/dav/files/{nextcloud_user}/Journal/Captures"
|
|
||||||
req_lib.request("MKCOL", captures_dir, auth=nc_auth, timeout=10)
|
|
||||||
url = f"{captures_dir}/{filename}"
|
|
||||||
req_lib.put(url, data=content_md.encode("utf-8"), auth=nc_auth, timeout=30)
|
|
||||||
return JSONResponse({"ok": True, "filename": filename, "transcript": transcript})
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
if tmp_path and os.path.exists(tmp_path):
|
return JSONResponse({"ok": False, "error": str(e), "error_type": "capture_failed"})
|
||||||
os.unlink(tmp_path)
|
|
||||||
return JSONResponse({"ok": False, "error": str(e), "error_type": "transcription_failed"})
|
|
||||||
|
|
||||||
else:
|
else:
|
||||||
raise HTTPException(status_code=400, detail="No audio or image provided")
|
raise HTTPException(status_code=400, detail="No audio or image provided")
|
||||||
|
|||||||
Reference in New Issue
Block a user