Add Whisper small model — /api/transcribe endpoint, VAD filter, domain vocabulary prompt
This commit is contained in:
@@ -10,6 +10,14 @@ import chromadb
|
|||||||
from sentence_transformers import SentenceTransformer
|
from sentence_transformers import SentenceTransformer
|
||||||
import anthropic
|
import anthropic
|
||||||
from fastapi import FastAPI, Request, Response, Depends, HTTPException
|
from fastapi import FastAPI, Request, Response, Depends, HTTPException
|
||||||
|
from fastapi import UploadFile, File
|
||||||
|
import tempfile
|
||||||
|
import os
|
||||||
|
try:
|
||||||
|
from faster_whisper import WhisperModel
|
||||||
|
HAS_WHISPER = True
|
||||||
|
except ImportError:
|
||||||
|
HAS_WHISPER = False
|
||||||
from fastapi.responses import FileResponse, JSONResponse
|
from fastapi.responses import FileResponse, JSONResponse
|
||||||
import secrets
|
import secrets
|
||||||
import hashlib
|
import hashlib
|
||||||
@@ -38,6 +46,18 @@ DEFAULT_SETTINGS = {
|
|||||||
}
|
}
|
||||||
|
|
||||||
print("Loading Aaron AI...")
|
print("Loading Aaron AI...")
|
||||||
|
WHISPER_PROMPT = (
|
||||||
|
"Grasshopper, Rhino, PolyJet, SLA, FDM, DMLS, ChromaDB, "
|
||||||
|
"HVAMC, FWN3D, Mossygear, Nextcloud, Gitea, computational design, "
|
||||||
|
"additive manufacturing, parametric, fabrication"
|
||||||
|
)
|
||||||
|
whisper_model = None
|
||||||
|
if HAS_WHISPER:
|
||||||
|
try:
|
||||||
|
whisper_model = WhisperModel("small", device="cpu", compute_type="int8", cpu_threads=4)
|
||||||
|
print("Whisper model loaded")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Whisper not available: {e}")
|
||||||
embedder = SentenceTransformer("all-MiniLM-L6-v2")
|
embedder = SentenceTransformer("all-MiniLM-L6-v2")
|
||||||
chroma_client = chromadb.PersistentClient(path=DB_PATH)
|
chroma_client = chromadb.PersistentClient(path=DB_PATH)
|
||||||
collection = chroma_client.get_or_create_collection(
|
collection = chroma_client.get_or_create_collection(
|
||||||
@@ -533,6 +553,34 @@ async def get_status(auth: str = Depends(require_auth)):
|
|||||||
"nextcloud_path": NEXTCLOUD_PATH
|
"nextcloud_path": NEXTCLOUD_PATH
|
||||||
})
|
})
|
||||||
|
|
||||||
|
@app.post("/api/transcribe")
|
||||||
|
async def transcribe_audio(request: Request, audio: UploadFile = File(...), auth: str = Depends(require_auth)):
|
||||||
|
if not whisper_model:
|
||||||
|
raise HTTPException(status_code=503, detail="Whisper not available")
|
||||||
|
try:
|
||||||
|
suffix = ".webm"
|
||||||
|
if audio.content_type and "mp4" in audio.content_type:
|
||||||
|
suffix = ".mp4"
|
||||||
|
elif audio.content_type and "ogg" in audio.content_type:
|
||||||
|
suffix = ".ogg"
|
||||||
|
with tempfile.NamedTemporaryFile(suffix=suffix, delete=False) as tmp:
|
||||||
|
content = await audio.read()
|
||||||
|
tmp.write(content)
|
||||||
|
tmp_path = tmp.name
|
||||||
|
segments, info = whisper_model.transcribe(
|
||||||
|
tmp_path,
|
||||||
|
language="en",
|
||||||
|
vad_filter=True,
|
||||||
|
initial_prompt=WHISPER_PROMPT
|
||||||
|
)
|
||||||
|
transcript = " ".join(s.text.strip() for s in segments)
|
||||||
|
os.unlink(tmp_path)
|
||||||
|
return JSONResponse({"text": transcript, "language": info.language})
|
||||||
|
except Exception as e:
|
||||||
|
if os.path.exists(tmp_path):
|
||||||
|
os.unlink(tmp_path)
|
||||||
|
raise HTTPException(status_code=500, detail=str(e))
|
||||||
|
|
||||||
@app.post("/api/reindex")
|
@app.post("/api/reindex")
|
||||||
async def trigger_reindex(auth: str = Depends(require_auth)):
|
async def trigger_reindex(auth: str = Depends(require_auth)):
|
||||||
try:
|
try:
|
||||||
|
|||||||
Reference in New Issue
Block a user