Add Whisper small model — /api/transcribe endpoint, VAD filter, domain vocabulary prompt

This commit is contained in:
2026-04-26 15:25:22 +00:00
parent 17e06b1e70
commit 050fe4669b
+48
View File
@@ -10,6 +10,14 @@ import chromadb
from sentence_transformers import SentenceTransformer from sentence_transformers import SentenceTransformer
import anthropic import anthropic
from fastapi import FastAPI, Request, Response, Depends, HTTPException from fastapi import FastAPI, Request, Response, Depends, HTTPException
from fastapi import UploadFile, File
import tempfile
import os
try:
from faster_whisper import WhisperModel
HAS_WHISPER = True
except ImportError:
HAS_WHISPER = False
from fastapi.responses import FileResponse, JSONResponse from fastapi.responses import FileResponse, JSONResponse
import secrets import secrets
import hashlib import hashlib
@@ -38,6 +46,18 @@ DEFAULT_SETTINGS = {
} }
print("Loading Aaron AI...") print("Loading Aaron AI...")
WHISPER_PROMPT = (
"Grasshopper, Rhino, PolyJet, SLA, FDM, DMLS, ChromaDB, "
"HVAMC, FWN3D, Mossygear, Nextcloud, Gitea, computational design, "
"additive manufacturing, parametric, fabrication"
)
whisper_model = None
if HAS_WHISPER:
try:
whisper_model = WhisperModel("small", device="cpu", compute_type="int8", cpu_threads=4)
print("Whisper model loaded")
except Exception as e:
print(f"Whisper not available: {e}")
embedder = SentenceTransformer("all-MiniLM-L6-v2") embedder = SentenceTransformer("all-MiniLM-L6-v2")
chroma_client = chromadb.PersistentClient(path=DB_PATH) chroma_client = chromadb.PersistentClient(path=DB_PATH)
collection = chroma_client.get_or_create_collection( collection = chroma_client.get_or_create_collection(
@@ -533,6 +553,34 @@ async def get_status(auth: str = Depends(require_auth)):
"nextcloud_path": NEXTCLOUD_PATH "nextcloud_path": NEXTCLOUD_PATH
}) })
@app.post("/api/transcribe")
async def transcribe_audio(request: Request, audio: UploadFile = File(...), auth: str = Depends(require_auth)):
if not whisper_model:
raise HTTPException(status_code=503, detail="Whisper not available")
try:
suffix = ".webm"
if audio.content_type and "mp4" in audio.content_type:
suffix = ".mp4"
elif audio.content_type and "ogg" in audio.content_type:
suffix = ".ogg"
with tempfile.NamedTemporaryFile(suffix=suffix, delete=False) as tmp:
content = await audio.read()
tmp.write(content)
tmp_path = tmp.name
segments, info = whisper_model.transcribe(
tmp_path,
language="en",
vad_filter=True,
initial_prompt=WHISPER_PROMPT
)
transcript = " ".join(s.text.strip() for s in segments)
os.unlink(tmp_path)
return JSONResponse({"text": transcript, "language": info.language})
except Exception as e:
if os.path.exists(tmp_path):
os.unlink(tmp_path)
raise HTTPException(status_code=500, detail=str(e))
@app.post("/api/reindex") @app.post("/api/reindex")
async def trigger_reindex(auth: str = Depends(require_auth)): async def trigger_reindex(auth: str = Depends(require_auth)):
try: try: