Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
"@eslint/js": "^9.17.0",
"@tailwindcss/postcss": "^4.1.11",
"@tailwindcss/vite": "^4.1.11",
"@types/dom-speech-recognition": "^0.0.6",
"@types/node": "^22.13.1",
"@types/react": "^18.3.18",
"@types/react-dom": "^18.3.5",
Expand Down
46 changes: 45 additions & 1 deletion src/components/ChatInput.tsx
Original file line number Diff line number Diff line change
@@ -1,7 +1,13 @@
import { memo, useCallback, useEffect, useMemo } from 'react';
import toast from 'react-hot-toast';
import { useTranslation } from 'react-i18next';
import { LuArrowUp, LuPaperclip, LuSquare } from 'react-icons/lu';
import {
LuArrowUp,
LuCircleStop,
LuMic,
LuPaperclip,
LuSquare,
} from 'react-icons/lu';
import { TbAdjustmentsHorizontal } from 'react-icons/tb';
import { useNavigate } from 'react-router';
import { useChatContext } from '../context/chat';
Expand All @@ -10,6 +16,10 @@ import { useFileUpload } from '../hooks/useFileUpload';
import { MessageExtra } from '../types';
import { classNames, cleanCurrentUrl } from '../utils';
import { DropzoneArea } from './DropzoneArea';
import SpeechToText, {
IS_SPEECH_RECOGNITION_SUPPORTED,
SpeechRecordCallback,
} from './SpeechToText';

/**
* If the current URL contains "?m=...", prefill the message input with the value.
Expand Down Expand Up @@ -50,6 +60,11 @@ export const ChatInput = memo(
stopGenerating(convId);
}, [convId, stopGenerating]);

const handleRecord: SpeechRecordCallback = useCallback(
(text: string) => textarea.setValue(text),
[textarea]
);

const sendNewMessage = async () => {
const lastInpMsg = textarea.value();
if (lastInpMsg.trim().length === 0) {
Expand Down Expand Up @@ -145,6 +160,35 @@ export const ChatInput = memo(
</div>

<div className="flex items-center">
{IS_SPEECH_RECOGNITION_SUPPORTED && !isPending && (
<SpeechToText onRecord={handleRecord}>
{({ isRecording, startRecording, stopRecording }) => (
<>
{!isRecording && (
<button
className="btn btn-ghost w-8 h-8 p-0 rounded-full mr-2"
onClick={startRecording}
title="Record"
aria-label="Start Recording"
>
<LuMic className="h-5 w-5" />
</button>
)}
{isRecording && (
<button
className="btn btn-ghost w-8 h-8 p-0 rounded-full mr-2"
onClick={stopRecording}
title="Stop"
aria-label="Stop Recording"
>
<LuCircleStop className="h-5 w-5" />
</button>
)}
</>
)}
</SpeechToText>
)}

{isPending && (
<button
className="btn btn-neutral w-8 h-8 p-0 rounded-full"
Expand Down
167 changes: 167 additions & 0 deletions src/components/SpeechToText.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
import {
forwardRef,
Fragment,
ReactNode,
useCallback,
useEffect,
useImperativeHandle,
useRef,
useState,
} from 'react';

export type SpeechRecordCallback = (text: string) => void;

const SpeechRecognition =
typeof window === 'undefined'
? undefined
: window.SpeechRecognition || window.webkitSpeechRecognition;

export const IS_SPEECH_RECOGNITION_SUPPORTED = !!SpeechRecognition;

interface SpeechToTextProps {
lang?: string;
continuous?: boolean;
interimResults?: boolean;
onRecord?: SpeechRecordCallback;
}

interface SpeechToTextState {
isRecording: boolean;
transcript: string;
startRecording: () => void;
stopRecording: () => void;
}

const useSpeechToText = ({
lang,
continuous = true,
interimResults = true,
onRecord,
}: SpeechToTextProps): SpeechToTextState => {
const [isRecording, setIsRecording] = useState<boolean>(false);
const [transcript, setTranscript] = useState<string>('');
const recognitionRef = useRef<SpeechRecognition | null>(null);
const stoppedManuallyRef = useRef<boolean>(false);
const onRecordRef = useRef<SpeechRecordCallback | undefined>(onRecord);
const finalTranscriptRef = useRef<string>('');

useEffect(() => {
onRecordRef.current = onRecord;
}, [onRecord]);

useEffect(() => {
if (!IS_SPEECH_RECOGNITION_SUPPORTED) {
console.error('Speech Recognition is not supported in this browser.');
return;
}

const recognition = new SpeechRecognition!();
recognition.continuous = continuous;
recognition.interimResults = interimResults;
recognition.lang =
lang || navigator.languages?.[0] || navigator.language || 'en-US';

recognition.onstart = () => {
setIsRecording(true);
};
recognition.onresult = (event: SpeechRecognitionEvent) => {
if (!event?.results) return;

for (let i = event.resultIndex; i < event.results.length; i++) {
const result = event.results[i];
const { isFinal, length } = result;
if (length <= 0) continue;
const { transcript, confidence } = result[0];
const fullTranscript = [finalTranscriptRef.current, transcript].join(
' '
);
setTranscript(fullTranscript);
onRecordRef.current?.(fullTranscript);
if (isFinal && confidence > 0) {
finalTranscriptRef.current += transcript;
}
}
};
recognition.onerror = (event: SpeechRecognitionErrorEvent) => {
console.warn('Speech recognition error:', event);
setIsRecording(false);
};
recognition.onend = () => {
setIsRecording(false);
// Automatically restart if not stopped manually
if (continuous && !stoppedManuallyRef.current) {
try {
recognition.start();
} catch (error) {
console.error('Error restarting speech recognition:', error);
}
}
};

recognitionRef.current = recognition;

return () => {
if (!recognitionRef.current) return;

recognitionRef.current.onresult = null;
recognitionRef.current.onend = null;
recognitionRef.current.onerror = null;
recognitionRef.current.onstart = null;
recognitionRef.current.stop();
recognitionRef.current = null;
};
}, [lang, continuous, interimResults]);

const startRecording = useCallback(() => {
const recognition = recognitionRef.current;
if (recognition && !isRecording) {
setTranscript('');
finalTranscriptRef.current = '';
stoppedManuallyRef.current = false;
try {
recognition.start();
} catch (error) {
console.error('Failed to start recording:', error);
setIsRecording(false);
}
}
}, [isRecording]);

const stopRecording = useCallback(() => {
const recognition = recognitionRef.current;
if (recognition && isRecording) {
stoppedManuallyRef.current = true;
try {
recognition.stop();
} catch (error) {
console.error('Failed to stop recording:', error);
setIsRecording(false);
}
}
}, [isRecording]);

return {
isRecording,
transcript,
startRecording,
stopRecording,
};
};

const SpeechToText = forwardRef<
SpeechToTextState,
SpeechToTextProps & { children: (props: SpeechToTextState) => ReactNode }
>(({ children, lang, continuous, interimResults, onRecord }, ref) => {
const speechToText = useSpeechToText({
lang,
continuous,
interimResults,
onRecord,
});

useImperativeHandle(ref, () => speechToText, [speechToText]);

return <Fragment>{children(speechToText)}</Fragment>;
});

export default SpeechToText;
7 changes: 4 additions & 3 deletions src/components/TextToSpeech.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ const popularLanguages = [
'ar',
];

export const IS_SPEECH_SYNTHESIS_SUPPORTED = !!speechSynthesis || false;
export const IS_SPEECH_SYNTHESIS_SUPPORTED = !!window.speechSynthesis;
export const getSpeechSynthesisVoices = () =>
speechSynthesis
?.getVoices()
Expand Down Expand Up @@ -93,7 +93,7 @@ const useTextToSpeech = ({
utteranceRef.current.onerror = null;
}

const utterance = new SpeechSynthesisUtterance(text);
const utterance = new window.SpeechSynthesisUtterance(text);

utterance.voice = voice;
utterance.pitch = pitch;
Expand All @@ -109,7 +109,8 @@ const useTextToSpeech = ({
setIsPlaying(false);
};

utterance.onerror = () => {
utterance.onerror = (event) => {
console.error('Speech synthesis error: ', event.error);
setIsPlaying(false);
};

Expand Down