From 92565dce5ceb06d8390fe3a28f03e58eca354287 Mon Sep 17 00:00:00 2001 From: Aaron Nelson Date: Wed, 29 Apr 2026 17:04:01 +0000 Subject: [PATCH] capture: auto-retry transcription failures up to 2x, queue blob on final failure --- app/capture/page.tsx | 379 ++++++++++--------------------------------- 1 file changed, 87 insertions(+), 292 deletions(-) diff --git a/app/capture/page.tsx b/app/capture/page.tsx index 7a7cf5e..5a8ab25 100644 --- a/app/capture/page.tsx +++ b/app/capture/page.tsx @@ -1,13 +1,12 @@ 'use client'; -import { useState, useRef, useEffect, useCallback } from 'react'; +import { useState, useRef, useEffect } from 'react'; const API_URL = process.env.NEXT_PUBLIC_API_URL || ''; const DB_NAME = 'bird-capture-queue'; const STORE_NAME = 'pending'; -const MAX_IMAGE_PX = 1568; // Claude vision optimal max dimension +const MAX_IMAGE_PX = 1568; -// IndexedDB queue async function openDB(): Promise { return new Promise((resolve, reject) => { const req = indexedDB.open(DB_NAME, 1); @@ -17,19 +16,12 @@ async function openDB(): Promise { }); } -async function queueCapture(formData: FormData) { +async function queueCapture(audio: Blob | null, image: Blob | null) { const db = await openDB(); const id = Date.now().toString(); - const audio = formData.get('audio') as Blob | null; - const image = formData.get('image') as Blob | null; return new Promise((resolve, reject) => { const tx = db.transaction(STORE_NAME, 'readwrite'); - tx.objectStore(STORE_NAME).put({ - id, - audio: audio || null, - image: image || null, - timestamp: new Date().toISOString(), - }); + tx.objectStore(STORE_NAME).put({ id, audio: audio || null, image: image || null, timestamp: new Date().toISOString() }); tx.oncomplete = () => resolve(); tx.onerror = () => reject(tx.error); }); @@ -55,7 +47,6 @@ async function removeCapture(id: string) { }); } -// Resize image client-side before upload async function resizeImage(file: File): Promise { return new Promise((resolve) => { const img = new Image(); @@ -70,8 +61,7 @@ async function resizeImage(file: File): Promise { else { w = Math.round(w * maxDim / h); h = maxDim; } } const canvas = document.createElement('canvas'); - canvas.width = w; - canvas.height = h; + canvas.width = w; canvas.height = h; const ctx = canvas.getContext('2d')!; ctx.drawImage(img, 0, 0, w, h); canvas.toBlob((blob) => resolve(blob!), 'image/jpeg', 0.88); @@ -80,15 +70,17 @@ async function resizeImage(file: File): Promise { }); } -async function submitCapture(audio: Blob | null, image: Blob | null): Promise { +async function submitCapture(audio: Blob | null, image: Blob | null): Promise<{ ok: boolean; errorType?: string }> { try { const form = new FormData(); if (audio) form.append('audio', audio, 'capture.webm'); if (image) form.append('image', image, 'capture.jpg'); const res = await fetch(`${API_URL}/api/capture`, { method: 'POST', body: form }); - return res.ok; + if (res.ok) return { ok: true }; + const body = await res.json().catch(() => ({})); + return { ok: false, errorType: body.error_type }; } catch { - return false; + return { ok: false }; } } @@ -109,7 +101,10 @@ export default function CapturePage() { const countdownRef = useRef(null); const mimeTypeRef = useRef('audio/webm'); const fileInputRef = useRef(null); - const MAX_SECONDS = 120; // shorter for annotation — 2 min max + const imageBlobRef = useRef(null); + const MAX_SECONDS = 120; + + useEffect(() => { imageBlobRef.current = imageBlob; }, [imageBlob]); useEffect(() => { loadRecentCaptures(); @@ -125,26 +120,20 @@ export default function CapturePage() { async function loadRecentCaptures() { try { const res = await fetch(`${API_URL}/api/captures`); - if (res.ok) { - const data = await res.json(); - setRecentCaptures(data.captures || []); - } + if (res.ok) { const data = await res.json(); setRecentCaptures(data.captures || []); } } catch {} } async function checkPending() { - try { - const p = await getPendingCaptures(); - setPendingCount(p.length); - } catch {} + try { const p = await getPendingCaptures(); setPendingCount(p.length); } catch {} } async function retryQueue() { try { const pending = await getPendingCaptures(); for (const item of pending) { - const ok = await submitCapture(item.audio, item.image); - if (ok) await removeCapture(item.id); + const result = await submitCapture(item.audio, item.image); + if (result.ok) await removeCapture(item.id); } const remaining = await getPendingCaptures(); setPendingCount(remaining.length); @@ -152,21 +141,16 @@ export default function CapturePage() { } catch {} } - // ── Image selection ──────────────────────────────────────────────────────── - - function handleCameraClick() { - fileInputRef.current?.click(); - } + function handleCameraClick() { fileInputRef.current?.click(); } async function handleImageSelected(e: React.ChangeEvent) { const file = e.target.files?.[0]; if (!file) return; const resized = await resizeImage(file); setImageBlob(resized); - const preview = URL.createObjectURL(resized); - setImagePreview(preview); + imageBlobRef.current = resized; + setImagePreview(URL.createObjectURL(resized)); setMode('image-selected'); - // Reset input so same file can be reselected e.target.value = ''; } @@ -174,24 +158,16 @@ export default function CapturePage() { if (imagePreview) URL.revokeObjectURL(imagePreview); setImagePreview(null); setImageBlob(null); + imageBlobRef.current = null; setMode('idle'); stopRecording(); } - // ── Voice recording ──────────────────────────────────────────────────────── - async function acquireWakeLock() { - try { - if ('wakeLock' in navigator) { - wakeLockRef.current = await (navigator as any).wakeLock.request('screen'); - } - } catch {} + try { if ('wakeLock' in navigator) wakeLockRef.current = await (navigator as any).wakeLock.request('screen'); } catch {} } - function releaseWakeLock() { - wakeLockRef.current?.release(); - wakeLockRef.current = null; - } + function releaseWakeLock() { wakeLockRef.current?.release(); wakeLockRef.current = null; } async function startRecording() { try { @@ -207,12 +183,7 @@ export default function CapturePage() { mr.onstop = async () => { streamRef.current?.getTracks().forEach(t => t.stop()); streamRef.current = null; - if (chunksRef.current.length === 0) { - // No audio recorded — submit image only - await submitFinal(null); - return; - } - const audioBlob = new Blob(chunksRef.current, { type: mimeTypeRef.current }); + const audioBlob = chunksRef.current.length > 0 ? new Blob(chunksRef.current, { type: mimeTypeRef.current }) : null; await submitFinal(audioBlob); }; mr.start(1000); @@ -221,65 +192,61 @@ export default function CapturePage() { setCountdown(MAX_SECONDS); await acquireWakeLock(); countdownRef.current = setInterval(() => { - setCountdown(prev => { - if (prev <= 1) { stopRecording(); return 0; } - return prev - 1; - }); + setCountdown(prev => { if (prev <= 1) { stopRecording(); return 0; } return prev - 1; }); }, 1000); } catch { setMode('error'); - setTimeout(() => setMode(imageBlob ? 'image-selected' : 'idle'), 3000); + setTimeout(() => setMode(imageBlobRef.current ? 'image-selected' : 'idle'), 3000); } } function stopRecording() { if (countdownRef.current) { clearInterval(countdownRef.current); countdownRef.current = null; } releaseWakeLock(); - if (mediaRecorderRef.current?.state === 'recording') { - mediaRecorderRef.current.stop(); - } + if (mediaRecorderRef.current?.state === 'recording') mediaRecorderRef.current.stop(); } - // ── Submission ───────────────────────────────────────────────────────────── - - async function submitFinal(audioBlob: Blob | null) { + async function submitFinal(audioBlob: Blob | null, retryCount = 0) { setMode('submitting'); - const ok = await submitCapture(audioBlob, imageBlob); - if (ok) { + const currentImageBlob = imageBlobRef.current; + const result = await submitCapture(audioBlob, currentImageBlob); + + if (result.ok) { setMode('saved'); if (imagePreview) URL.revokeObjectURL(imagePreview); setImagePreview(null); setImageBlob(null); + imageBlobRef.current = null; await loadRecentCaptures(); setTimeout(() => setMode('idle'), 3000); - } else { - try { - const form = new FormData(); - if (audioBlob) form.append('audio', audioBlob, 'capture.webm'); - if (imageBlob) form.append('image', imageBlob, 'capture.jpg'); - await queueCapture(form); - await checkPending(); - setMode('queued'); - } catch { - setMode('error'); - } - if (imagePreview) URL.revokeObjectURL(imagePreview); - setImagePreview(null); - setImageBlob(null); - setTimeout(() => setMode('idle'), 4000); + return; } + + const isTranscriptionFailure = result.errorType === 'transcription_failed' || result.errorType === 'empty_transcript'; + if (isTranscriptionFailure && retryCount < 2) { + setTimeout(() => submitFinal(audioBlob, retryCount + 1), 3000 * (retryCount + 1)); + return; + } + + try { + await queueCapture(audioBlob, currentImageBlob); + await checkPending(); + setMode('queued'); + } catch { + setMode('error'); + } + if (imagePreview) URL.revokeObjectURL(imagePreview); + setImagePreview(null); + setImageBlob(null); + imageBlobRef.current = null; + setTimeout(() => setMode('idle'), 4000); } - async function submitImageOnly() { - await submitFinal(null); - } - - // ── Tap handler for voice button ─────────────────────────────────────────── + async function submitImageOnly() { await submitFinal(null); } function handleVoiceTap() { if (mode === 'recording') stopRecording(); - else if (mode === 'image-selected') startRecording(); - else if (mode === 'idle') startRecording(); // voice-only capture + else if (mode === 'image-selected' || mode === 'idle') startRecording(); } const isRecording = mode === 'recording'; @@ -287,266 +254,94 @@ export default function CapturePage() { const hasImage = imageBlob !== null; return ( -
+
- {/* Header */}
-

- Bird — field recorder -

- {pendingCount > 0 && ( -

- {pendingCount} capture{pendingCount > 1 ? 's' : ''} queued -

- )} +

Bird — field recorder

+ {pendingCount > 0 &&

{pendingCount} capture{pendingCount > 1 ? 's' : ''} queued

}
- {/* Main area */}
- {/* Image preview */} {imagePreview && (
- capture preview - + capture preview +
)} - {/* Waveform — shows during recording */} {isRecording && (
{[4,8,16,6,24,10,32,14,20,8,12,28,6,18,4].map((h, i) => ( -
+
))}
)} - {/* Capture buttons row */}
- - {/* Camera button — always visible, opens image picker */} - - {/* Mic / submit button — large center */} - - {/* Submit image-only button — visible only when image selected and not recording */} {hasImage && !isRecording && !isBusy ? ( - - ) : ( -
- )} + ) :
}
- {/* Recording progress */} {isRecording && (
-
+
-

- {(() => { - const rm = Math.floor(countdown / 60); - const rs = countdown % 60; - return `${rm}:${rs.toString().padStart(2,'0')} left`; - })()} -

+

{Math.floor(countdown/60)}:{(countdown%60).toString().padStart(2,'0')} left

)} - {/* Status */}
- {mode === 'idle' && ( -

- tap camera or mic -

- )} - {mode === 'image-selected' && ( -

- annotate with voice or save now -

- )} - {mode === 'recording' && ( -

- recording annotation -

- )} - {mode === 'submitting' && ( -

- {hasImage ? 'processing image...' : 'transcribing...'} -

- )} - {mode === 'saved' && ( -

- saved ✓ -

- )} - {mode === 'queued' && ( -

- queued — will sync when online -

- )} - {mode === 'error' && ( -

- error — try again -

- )} + {mode === 'idle' &&

tap camera or mic

} + {mode === 'image-selected' &&

annotate with voice or save now

} + {mode === 'recording' &&

recording annotation

} + {mode === 'submitting' &&

transcribing...

} + {mode === 'saved' &&

saved ✓

} + {mode === 'queued' &&

queued — will sync when online

} + {mode === 'error' &&

error — try again

}
- {/* Recent captures */}
-

- recent captures -

+

recent captures

{recentCaptures.length === 0 ? (

none yet

) : (
{recentCaptures.slice(0, 4).map((c, i) => ( -
-
- - {c.name} - +
+
+ {c.name}
))}
)}
- {/* Hidden file input */} - +
);