Voice input — toggle mode, iOS touch support, MIME type detection

2026-04-26 13:00:16 -04:00
parent 1495c2baef
commit 29259e04e0
1 changed files with 88 additions and 47 deletions
@@ -9,9 +9,11 @@ export default function MessageInput() {
  const { currentId, setCurrentId, addMessage, setIsLoading, isLoading, setConversations } = useStore();
  const [text, setText] = useState('');
  const [recording, setRecording] = useState(false);
  const [transcribing, setTranscribing] = useState(false);
  const textareaRef = useRef<HTMLTextAreaElement>(null);
  const mediaRecorderRef = useRef<MediaRecorder | null>(null);
  const chunksRef = useRef<Blob[]>([]);
  const streamRef = useRef<MediaStream | null>(null);
  useEffect(() => {
    textareaRef.current?.focus();
@@ -49,16 +51,15 @@ export default function MessageInput() {
    try {
      const data = await api.sendMessage(message, convId);
      setCurrentId(data.conversation_id);
-      const assistantMsg: Message = {
+      addMessage({
        role: 'assistant',
        content: data.response,
        sources: data.sources || [],
        timestamp: new Date().toISOString(),
-      };
+      });
      addMessage(assistantMsg);
      const updated = await api.getConversations();
      setConversations(updated);
-    } catch (e) {
+    } catch {
      addMessage({
        role: 'assistant',
        content: 'Error — please try again.',
@@ -71,34 +72,57 @@ export default function MessageInput() {
    }
  }
-  async function startRecording() {
+  async function toggleRecording() {
-    try {
+    if (recording) {
-      const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
+      // Stop recording
-      const mr = new MediaRecorder(stream);
+      mediaRecorderRef.current?.stop();
-      chunksRef.current = [];
+      setRecording(false);
-      mr.ondataavailable = e => chunksRef.current.push(e.data);
+    } else {
-      mr.onstop = async () => {
+      // Start recording
-        stream.getTracks().forEach(t => t.stop());
+      try {
-        const blob = new Blob(chunksRef.current, { type: 'audio/webm' });
+        const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
-        try {
+        streamRef.current = stream;
          const { text: transcript } = await api.transcribe(blob);
          setText(prev => prev ? prev + ' ' + transcript : transcript);
          textareaRef.current?.focus();
        } catch {
          console.error('Transcription failed');
        }
      };
      mr.start();
      mediaRecorderRef.current = mr;
      setRecording(true);
    } catch {
      alert('Microphone access denied');
    }
  }
-  function stopRecording() {
+        // Pick supported mime type
-    mediaRecorderRef.current?.stop();
+        const mimeType = MediaRecorder.isTypeSupported('audio/webm')
-    setRecording(false);
+          ? 'audio/webm'
          : MediaRecorder.isTypeSupported('audio/mp4')
          ? 'audio/mp4'
          : 'audio/ogg';
        const mr = new MediaRecorder(stream, { mimeType });
        chunksRef.current = [];
        mr.ondataavailable = e => {
          if (e.data.size > 0) chunksRef.current.push(e.data);
        };
        mr.onstop = async () => {
          streamRef.current?.getTracks().forEach(t => t.stop());
          if (chunksRef.current.length === 0) return;
          setTranscribing(true);
          try {
            const blob = new Blob(chunksRef.current, { type: mimeType });
            const { text: transcript } = await api.transcribe(blob);
            if (transcript.trim()) {
              setText(prev => prev ? prev + ' ' + transcript.trim() : transcript.trim());
              setTimeout(() => autoResize(), 0);
            }
          } catch (e) {
            console.error('Transcription failed', e);
          } finally {
            setTranscribing(false);
          }
        };
        mr.start(1000); // collect data every second
        mediaRecorderRef.current = mr;
        setRecording(true);
      } catch {
        alert('Microphone access denied');
      }
    }
  }
  function handleKeyDown(e: React.KeyboardEvent) {
@@ -108,6 +132,9 @@ export default function MessageInput() {
    }
  }
  const micColor = recording ? '#e8f5ed' : transcribing ? '#e8f5ed' : 'var(--text3)';
  const micBg = recording ? '#a32d2d' : transcribing ? 'var(--accent)' : 'var(--bg3)';
  return (
    <div
      className="flex gap-2 items-end flex-shrink-0"
@@ -119,27 +146,40 @@ export default function MessageInput() {
        paddingRight: 'max(16px, env(safe-area-inset-right))',
      }}
    >
-      {/* Voice button */}
+      {/* Mic button — tap to start, tap to stop */}
      <button
-        onMouseDown={startRecording}
+        onPointerUp={toggleRecording}
        onMouseUp={stopRecording}
        onTouchStart={startRecording}
        onTouchEnd={stopRecording}
        className="flex-shrink-0 rounded-lg flex items-center justify-center transition-all"
        style={{
          width: '44px',
          height: '44px',
-          background: recording ? 'var(--accent)' : 'var(--bg3)',
+          background: micBg,
          border: 'none',
          cursor: 'pointer',
-          color: recording ? '#e8f5ed' : 'var(--text3)',
+          color: micColor,
          touchAction: 'manipulation',
          flexShrink: 0,
        }}
-        aria-label={recording ? 'Recording...' : 'Hold to record'}
+        aria-label={recording ? 'Stop recording' : transcribing ? 'Transcribing...' : 'Start recording'}
-        title="Hold to record voice"
+        title={recording ? 'Tap to stop' : 'Tap to record'}
      >
-        <svg width="18" height="18" viewBox="0 0 24 24" fill="currentColor">
+        {transcribing ? (
-          <path d="M12 1a4 4 0 0 1 4 4v6a4 4 0 0 1-8 0V5a4 4 0 0 1 4-4zm0 2a2 2 0 0 0-2 2v6a2 2 0 0 0 4 0V5a2 2 0 0 0-2-2zm-7 8a7 7 0 0 0 14 0h2a9 9 0 0 1-8 8.94V22h-2v-2.06A9 9 0 0 1 3 11h2z"/>
+          <svg width="18" height="18" viewBox="0 0 24 24" fill="currentColor">
-        </svg>
+            <circle cx="12" cy="12" r="3" opacity="0.6">
              <animate attributeName="opacity" values="0.6;1;0.6" dur="1s" repeatCount="indefinite"/>
            </circle>
          </svg>
        ) : recording ? (
          // Square stop icon when recording
          <svg width="14" height="14" viewBox="0 0 14 14" fill="currentColor">
            <rect width="14" height="14" rx="2"/>
          </svg>
        ) : (
          // Mic icon when idle
          <svg width="18" height="18" viewBox="0 0 24 24" fill="currentColor">
            <path d="M12 1a4 4 0 0 1 4 4v6a4 4 0 0 1-8 0V5a4 4 0 0 1 4-4zm0 2a2 2 0 0 0-2 2v6a2 2 0 0 0 4 0V5a2 2 0 0 0-2-2zm-7 8a7 7 0 0 0 14 0h2a9 9 0 0 1-8 8.94V22h-2v-2.06A9 9 0 0 1 3 11h2z"/>
          </svg>
        )}
      </button>
      {/* Text input */}
@@ -152,7 +192,7 @@ export default function MessageInput() {
          value={text}
          onChange={e => { setText(e.target.value); autoResize(); }}
          onKeyDown={handleKeyDown}
-          placeholder="Ask anything..."
+          placeholder={recording ? 'Recording... tap mic to stop' : transcribing ? 'Transcribing...' : 'Ask anything...'}
          rows={1}
          className="w-full block resize-none outline-none bg-transparent px-3 py-3 leading-relaxed min-w-0"
          style={{
@@ -167,9 +207,9 @@ export default function MessageInput() {
      {/* Send button */}
      <button
-        onClick={send}
+        onPointerUp={send}
        disabled={isLoading || !text.trim()}
-        className="flex-shrink-0 rounded-lg px-4 py-3 text-sm font-medium transition-opacity"
+        className="flex-shrink-0 rounded-lg px-4 text-sm font-medium transition-opacity"
        style={{
          background: 'var(--accent)',
          color: '#e8f5ed',
@@ -178,6 +218,7 @@ export default function MessageInput() {
          opacity: isLoading || !text.trim() ? 0.4 : 1,
          minHeight: '44px',
          fontFamily: 'var(--font-sans)',
          touchAction: 'manipulation',
        }}
      >
        Send