final changes

Mateusz Kopciński · Mateusz Kopciński · commit ab92c64a0e0a · 2025-05-19T12:17:52.000+02:00
diff --git a/docs/docs/natural-language-processing/useSpeechToText.md b/docs/docs/natural-language-processing/useSpeechToText.md
@@ -31,8 +31,6 @@ It is recommended to use models provided by us, which are available at our [Hugg
 
 ## Reference
 
-### File transcription
-
 You can obtain waveform from audio in any way most suitable to you, however in the snippet below we utilize `react-native-audio-api` library to process a mp3 file.
 
 ```typescript
@@ -65,72 +63,6 @@ if (error) {
 }
 ```
 
-### Live data (microphone) transcription
-
-```typescript
-import { STREAMING_ACTION, useSpeechToText } from 'react-native-executorch';
-import LiveAudioStream from 'react-native-live-audio-stream';
-import { useState } from 'react';
-import { Buffer } from 'buffer';
-
-const audioStreamOptions = {
-  sampleRate: 16000,
-  channels: 1,
-  bitsPerSample: 16,
-  audioSource: 1,
-  bufferSize: 16000,
-};
-
-const startStreamingAudio = (options: any, onChunk: (data: string) => void) => {
-  LiveAudioStream.init(options);
-  LiveAudioStream.on('data', onChunk);
-  LiveAudioStream.start();
-};
-
-const float32ArrayFromPCMBinaryBuffer = (b64EncodedBuffer: string) => {
-  const b64DecodedChunk = Buffer.from(b64EncodedBuffer, 'base64');
-  const int16Array = new Int16Array(b64DecodedChunk.buffer);
-
-  const float32Array = new Float32Array(int16Array.length);
-  for (let i = 0; i < int16Array.length; i++) {
-    float32Array[i] = Math.max(
-      -1,
-      Math.min(1, (int16Array[i] / audioStreamOptions.bufferSize) * 8)
-    );
-  }
-  return float32Array;
-};
-
-const [isRecording, setIsRecording] = useState(false);
-
-const speechToText = useSpeechToText({
-  modelName: 'moonshine',
-  windowSize: 3,
-  overlapSeconds: 1.2,
-});
-
-const onChunk = (data: string) => {
-  const float32Chunk = float32ArrayFromPCMBinaryBuffer(data);
-  speechToText.streamingTranscribe(
-    STREAMING_ACTION.DATA,
-    Array.from(float32Chunk)
-  );
-};
-
-const handleRecordPress = async () => {
-  if (isRecording) {
-    setIsRecording(false);
-    LiveAudioStream.stop();
-    messageRecorded.current = true;
-    await speechToText.streamingTranscribe(STREAMING_ACTION.STOP);
-  } else {
-    setIsRecording(true);
-    startStreamingAudio(audioStreamOptions, onChunk);
-    await speechToText.streamingTranscribe(STREAMING_ACTION.START);
-  }
-};
-```
-
 ### Streaming
 
 Given that STT models can process audio no longer than 30 seconds, there is a need to chunk the input audio. Chunking audio may result in cutting speech mid-sentence, which might be hard to understand for the model. To make it work, we employed an algorithm (adapted for mobile devices from [whisper-streaming](https://aclanthology.org/2023.ijcnlp-demo.3.pdf)) that uses overlapping audio chunks. This might introduce some overhead, but allows for processing audio inputs of arbitrary length.
@@ -302,11 +234,97 @@ function App() {
         title="Transcribe"
       />
       <Text>{error ? error : sequence}</Text>
-    </View>****
+    </View>
   );
 }
 ```
 
+### Live data (microphone) transcription
+
+```typescript
+import { STREAMING_ACTION, useSpeechToText } from 'react-native-executorch';
+import LiveAudioStream from 'react-native-live-audio-stream';
+import { useState } from 'react';
+import { Buffer } from 'buffer';
+
+const audioStreamOptions = {
+  sampleRate: 16000,
+  channels: 1,
+  bitsPerSample: 16,
+  audioSource: 1,
+  bufferSize: 16000,
+};
+
+const startStreamingAudio = (options: any, onChunk: (data: string) => void) => {
+  LiveAudioStream.init(options);
+  LiveAudioStream.on('data', onChunk);
+  LiveAudioStream.start();
+};
+
+const float32ArrayFromPCMBinaryBuffer = (b64EncodedBuffer: string) => {
+  const b64DecodedChunk = Buffer.from(b64EncodedBuffer, 'base64');
+  const int16Array = new Int16Array(b64DecodedChunk.buffer);
+
+  const float32Array = new Float32Array(int16Array.length);
+  for (let i = 0; i < int16Array.length; i++) {
+    float32Array[i] = Math.max(
+      -1,
+      Math.min(1, (int16Array[i] / audioStreamOptions.bufferSize) * 8)
+    );
+  }
+  return float32Array;
+};
+
+function App() {
+  const [isRecording, setIsRecording] = useState(false);
+  const speechToText = useSpeechToText({
+    modelName: 'moonshine',
+    windowSize: 3,
+    overlapSeconds: 1.2,
+  });
+
+  const onChunk = (data: string) => {
+    const float32Chunk = float32ArrayFromPCMBinaryBuffer(data);
+    speechToText.streamingTranscribe(
+      STREAMING_ACTION.DATA,
+      Array.from(float32Chunk)
+    );
+  };
+
+  const handleRecordPress = async () => {
+    if (isRecording) {
+      setIsRecording(false);
+      LiveAudioStream.stop();
+      messageRecorded.current = true;
+      await speechToText.streamingTranscribe(STREAMING_ACTION.STOP);
+    } else {
+      setIsRecording(true);
+      startStreamingAudio(audioStreamOptions, onChunk);
+      await speechToText.streamingTranscribe(STREAMING_ACTION.START);
+    }
+  };
+
+  return
+    <View>
+      <Text>
+        {speechToText.sequence}
+      </Text>
+      <TouchableOpacity
+        style={
+          !isRecording ? styles.recordTouchable : styles.recordingInfo
+        }
+        onPress={handleRecordPress}
+      >
+        {isRecording ? (
+          <Text>Stop</Text>
+        ) : (
+          <Text>Record</Text>
+        )}
+      </TouchableOpacity>
+    </View>
+}
+```
+
 ## Supported models
 
 | Model                                                                 |   Language   |
diff --git a/src/controllers/SpeechToTextController.ts b/src/controllers/SpeechToTextController.ts
@@ -73,7 +73,8 @@ export class SpeechToTextController {
     };
     this.onErrorCallback = (error) => {
       if (onErrorCallback) {
-        return onErrorCallback(error ? new Error(getError(error)) : undefined);
+        onErrorCallback(error ? new Error(getError(error)) : undefined);
+        return;
       } else {
         throw new Error(getError(error));
       }