Voice input — toggle mode, iOS touch support, MIME type detection

This commit is contained in:
2026-04-26 13:00:16 -04:00
parent 1495c2baef
commit 29259e04e0
+87 -46
View File
@@ -9,9 +9,11 @@ export default function MessageInput() {
const { currentId, setCurrentId, addMessage, setIsLoading, isLoading, setConversations } = useStore(); const { currentId, setCurrentId, addMessage, setIsLoading, isLoading, setConversations } = useStore();
const [text, setText] = useState(''); const [text, setText] = useState('');
const [recording, setRecording] = useState(false); const [recording, setRecording] = useState(false);
const [transcribing, setTranscribing] = useState(false);
const textareaRef = useRef<HTMLTextAreaElement>(null); const textareaRef = useRef<HTMLTextAreaElement>(null);
const mediaRecorderRef = useRef<MediaRecorder | null>(null); const mediaRecorderRef = useRef<MediaRecorder | null>(null);
const chunksRef = useRef<Blob[]>([]); const chunksRef = useRef<Blob[]>([]);
const streamRef = useRef<MediaStream | null>(null);
useEffect(() => { useEffect(() => {
textareaRef.current?.focus(); textareaRef.current?.focus();
@@ -49,16 +51,15 @@ export default function MessageInput() {
try { try {
const data = await api.sendMessage(message, convId); const data = await api.sendMessage(message, convId);
setCurrentId(data.conversation_id); setCurrentId(data.conversation_id);
const assistantMsg: Message = { addMessage({
role: 'assistant', role: 'assistant',
content: data.response, content: data.response,
sources: data.sources || [], sources: data.sources || [],
timestamp: new Date().toISOString(), timestamp: new Date().toISOString(),
}; });
addMessage(assistantMsg);
const updated = await api.getConversations(); const updated = await api.getConversations();
setConversations(updated); setConversations(updated);
} catch (e) { } catch {
addMessage({ addMessage({
role: 'assistant', role: 'assistant',
content: 'Error — please try again.', content: 'Error — please try again.',
@@ -71,34 +72,57 @@ export default function MessageInput() {
} }
} }
async function startRecording() { async function toggleRecording() {
try { if (recording) {
const stream = await navigator.mediaDevices.getUserMedia({ audio: true }); // Stop recording
const mr = new MediaRecorder(stream); mediaRecorderRef.current?.stop();
chunksRef.current = []; setRecording(false);
mr.ondataavailable = e => chunksRef.current.push(e.data); } else {
mr.onstop = async () => { // Start recording
stream.getTracks().forEach(t => t.stop()); try {
const blob = new Blob(chunksRef.current, { type: 'audio/webm' }); const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
try { streamRef.current = stream;
const { text: transcript } = await api.transcribe(blob);
setText(prev => prev ? prev + ' ' + transcript : transcript);
textareaRef.current?.focus();
} catch {
console.error('Transcription failed');
}
};
mr.start();
mediaRecorderRef.current = mr;
setRecording(true);
} catch {
alert('Microphone access denied');
}
}
function stopRecording() { // Pick supported mime type
mediaRecorderRef.current?.stop(); const mimeType = MediaRecorder.isTypeSupported('audio/webm')
setRecording(false); ? 'audio/webm'
: MediaRecorder.isTypeSupported('audio/mp4')
? 'audio/mp4'
: 'audio/ogg';
const mr = new MediaRecorder(stream, { mimeType });
chunksRef.current = [];
mr.ondataavailable = e => {
if (e.data.size > 0) chunksRef.current.push(e.data);
};
mr.onstop = async () => {
streamRef.current?.getTracks().forEach(t => t.stop());
if (chunksRef.current.length === 0) return;
setTranscribing(true);
try {
const blob = new Blob(chunksRef.current, { type: mimeType });
const { text: transcript } = await api.transcribe(blob);
if (transcript.trim()) {
setText(prev => prev ? prev + ' ' + transcript.trim() : transcript.trim());
setTimeout(() => autoResize(), 0);
}
} catch (e) {
console.error('Transcription failed', e);
} finally {
setTranscribing(false);
}
};
mr.start(1000); // collect data every second
mediaRecorderRef.current = mr;
setRecording(true);
} catch {
alert('Microphone access denied');
}
}
} }
function handleKeyDown(e: React.KeyboardEvent) { function handleKeyDown(e: React.KeyboardEvent) {
@@ -108,6 +132,9 @@ export default function MessageInput() {
} }
} }
const micColor = recording ? '#e8f5ed' : transcribing ? '#e8f5ed' : 'var(--text3)';
const micBg = recording ? '#a32d2d' : transcribing ? 'var(--accent)' : 'var(--bg3)';
return ( return (
<div <div
className="flex gap-2 items-end flex-shrink-0" className="flex gap-2 items-end flex-shrink-0"
@@ -119,27 +146,40 @@ export default function MessageInput() {
paddingRight: 'max(16px, env(safe-area-inset-right))', paddingRight: 'max(16px, env(safe-area-inset-right))',
}} }}
> >
{/* Voice button */} {/* Mic button — tap to start, tap to stop */}
<button <button
onMouseDown={startRecording} onPointerUp={toggleRecording}
onMouseUp={stopRecording}
onTouchStart={startRecording}
onTouchEnd={stopRecording}
className="flex-shrink-0 rounded-lg flex items-center justify-center transition-all" className="flex-shrink-0 rounded-lg flex items-center justify-center transition-all"
style={{ style={{
width: '44px', width: '44px',
height: '44px', height: '44px',
background: recording ? 'var(--accent)' : 'var(--bg3)', background: micBg,
border: 'none', border: 'none',
cursor: 'pointer', cursor: 'pointer',
color: recording ? '#e8f5ed' : 'var(--text3)', color: micColor,
touchAction: 'manipulation',
flexShrink: 0,
}} }}
aria-label={recording ? 'Recording...' : 'Hold to record'} aria-label={recording ? 'Stop recording' : transcribing ? 'Transcribing...' : 'Start recording'}
title="Hold to record voice" title={recording ? 'Tap to stop' : 'Tap to record'}
> >
<svg width="18" height="18" viewBox="0 0 24 24" fill="currentColor"> {transcribing ? (
<path d="M12 1a4 4 0 0 1 4 4v6a4 4 0 0 1-8 0V5a4 4 0 0 1 4-4zm0 2a2 2 0 0 0-2 2v6a2 2 0 0 0 4 0V5a2 2 0 0 0-2-2zm-7 8a7 7 0 0 0 14 0h2a9 9 0 0 1-8 8.94V22h-2v-2.06A9 9 0 0 1 3 11h2z"/> <svg width="18" height="18" viewBox="0 0 24 24" fill="currentColor">
</svg> <circle cx="12" cy="12" r="3" opacity="0.6">
<animate attributeName="opacity" values="0.6;1;0.6" dur="1s" repeatCount="indefinite"/>
</circle>
</svg>
) : recording ? (
// Square stop icon when recording
<svg width="14" height="14" viewBox="0 0 14 14" fill="currentColor">
<rect width="14" height="14" rx="2"/>
</svg>
) : (
// Mic icon when idle
<svg width="18" height="18" viewBox="0 0 24 24" fill="currentColor">
<path d="M12 1a4 4 0 0 1 4 4v6a4 4 0 0 1-8 0V5a4 4 0 0 1 4-4zm0 2a2 2 0 0 0-2 2v6a2 2 0 0 0 4 0V5a2 2 0 0 0-2-2zm-7 8a7 7 0 0 0 14 0h2a9 9 0 0 1-8 8.94V22h-2v-2.06A9 9 0 0 1 3 11h2z"/>
</svg>
)}
</button> </button>
{/* Text input */} {/* Text input */}
@@ -152,7 +192,7 @@ export default function MessageInput() {
value={text} value={text}
onChange={e => { setText(e.target.value); autoResize(); }} onChange={e => { setText(e.target.value); autoResize(); }}
onKeyDown={handleKeyDown} onKeyDown={handleKeyDown}
placeholder="Ask anything..." placeholder={recording ? 'Recording... tap mic to stop' : transcribing ? 'Transcribing...' : 'Ask anything...'}
rows={1} rows={1}
className="w-full block resize-none outline-none bg-transparent px-3 py-3 leading-relaxed min-w-0" className="w-full block resize-none outline-none bg-transparent px-3 py-3 leading-relaxed min-w-0"
style={{ style={{
@@ -167,9 +207,9 @@ export default function MessageInput() {
{/* Send button */} {/* Send button */}
<button <button
onClick={send} onPointerUp={send}
disabled={isLoading || !text.trim()} disabled={isLoading || !text.trim()}
className="flex-shrink-0 rounded-lg px-4 py-3 text-sm font-medium transition-opacity" className="flex-shrink-0 rounded-lg px-4 text-sm font-medium transition-opacity"
style={{ style={{
background: 'var(--accent)', background: 'var(--accent)',
color: '#e8f5ed', color: '#e8f5ed',
@@ -178,6 +218,7 @@ export default function MessageInput() {
opacity: isLoading || !text.trim() ? 0.4 : 1, opacity: isLoading || !text.trim() ? 0.4 : 1,
minHeight: '44px', minHeight: '44px',
fontFamily: 'var(--font-sans)', fontFamily: 'var(--font-sans)',
touchAction: 'manipulation',
}} }}
> >
Send Send