chore: fix fetching files in speech to text controller (#274)

jakmro · web-flow · commit ba8304695a98 · 2025-05-16T13:44:30.000+02:00
## Description

1. Change fetching files in speech to text controller.
2. Use tokenizerModule in speech to text controller.
3. Fix speechToText demo app. Add microphone permissions and rebuild the
ios folder.
4. Rebuild android folder

### Type of change

- [x] Bug fix (non-breaking change which fixes an issue)
- [ ] New feature (non-breaking change which adds functionality)
- [ ] Breaking change (fix or feature that would cause existing
functionality to not work as expected)
- [ ] Documentation update (improves or adds clarity to existing
documentation)

### Tested on

- [x] iOS
- [x] Android

### Checklist

- [x] I have performed a self-review of my code
- [x] I have commented my code, particularly in hard-to-understand areas
- [x] I have updated the documentation accordingly
- [x] My changes generate no new warnings
diff --git a/examples/speech-to-text/android/app/src/main/AndroidManifest.xml b/examples/speech-to-text/android/app/src/main/AndroidManifest.xml
@@ -1,6 +1,7 @@
 <manifest xmlns:android="http://schemas.android.com/apk/res/android">
   <uses-permission android:name="android.permission.INTERNET"/>
   <uses-permission android:name="android.permission.READ_EXTERNAL_STORAGE"/>
+  <uses-permission android:name="android.permission.RECORD_AUDIO"/>
   <uses-permission android:name="android.permission.SYSTEM_ALERT_WINDOW"/>
   <uses-permission android:name="android.permission.VIBRATE"/>
   <uses-permission android:name="android.permission.WRITE_EXTERNAL_STORAGE"/>
diff --git a/examples/speech-to-text/android/app/src/main/java/com/anonymous/speechtotext/MainActivity.kt b/examples/speech-to-text/android/app/src/main/java/com/anonymous/speechtotext/MainActivity.kt
@@ -27,8 +27,8 @@ class MainActivity : ReactActivity() {
    * Returns the instance of the [ReactActivityDelegate]. We use [DefaultReactActivityDelegate]
    * which allows you to enable New Architecture with a single boolean flags [fabricEnabled]
    */
-  override fun createReactActivityDelegate(): ReactActivityDelegate {
-    return ReactActivityDelegateWrapper(
+  override fun createReactActivityDelegate(): ReactActivityDelegate =
+    ReactActivityDelegateWrapper(
       this,
       BuildConfig.IS_NEW_ARCHITECTURE_ENABLED,
       object : DefaultReactActivityDelegate(
@@ -37,7 +37,6 @@ class MainActivity : ReactActivity() {
         fabricEnabled,
       ) {},
     )
-  }
 
   /**
    * Align the back button behavior with Android S
diff --git a/examples/speech-to-text/android/app/src/main/java/com/anonymous/speechtotext/MainApplication.kt b/examples/speech-to-text/android/app/src/main/java/com/anonymous/speechtotext/MainApplication.kt
@@ -14,15 +14,17 @@ import com.facebook.soloader.SoLoader
 import expo.modules.ApplicationLifecycleDispatcher
 import expo.modules.ReactNativeHostWrapper
 
-class MainApplication : Application(), ReactApplication {
+class MainApplication :
+  Application(),
+  ReactApplication {
   override val reactNativeHost: ReactNativeHost =
     ReactNativeHostWrapper(
       this,
       object : DefaultReactNativeHost(this) {
         override fun getPackages(): List<ReactPackage> {
           val packages = PackageList(this).packages
           // Packages that cannot be autolinked yet can be added manually here, for example:
-          // packages.add(new MyReactNativePackage());
+          // packages.add(MyReactNativePackage())
           return packages
         }
 
diff --git a/examples/speech-to-text/app.json b/examples/speech-to-text/app.json
@@ -14,7 +14,10 @@
     },
     "ios": {
       "supportsTablet": true,
-      "bundleIdentifier": "com.anonymous.speechtotext"
+      "bundleIdentifier": "com.anonymous.speechtotext",
+      "infoPlist": {
+        "NSMicrophoneUsageDescription": "This app needs access to your microphone to record audio."
+      }
     },
     "android": {
       "adaptiveIcon": {
diff --git a/examples/speech-to-text/ios/speechtotext/Info.plist b/examples/speech-to-text/ios/speechtotext/Info.plist
@@ -44,6 +44,8 @@
       <key>NSAllowsLocalNetworking</key>
       <true/>
     </dict>
+    <key>NSMicrophoneUsageDescription</key>
+    <string>This app needs access to your microphone to record audio.</string>
     <key>UILaunchStoryboardName</key>
     <string>SplashScreen</string>
     <key>UIRequiredDeviceCapabilities</key>
diff --git a/examples/speech-to-text/screens/SpeechToTextScreen.tsx b/examples/speech-to-text/screens/SpeechToTextScreen.tsx
@@ -5,6 +5,8 @@ import {
   StyleSheet,
   SafeAreaView,
   TouchableOpacity,
+  PermissionsAndroid,
+  Platform,
 } from 'react-native';
 import LiveAudioStream from 'react-native-live-audio-stream';
 import SWMIcon from '../assets/swm_icon.svg';
@@ -75,6 +77,21 @@ export const SpeechToTextScreen = () => {
   };
 
   const handleRecordPress = async () => {
+    if (Platform.OS === 'android') {
+      const permission = await PermissionsAndroid.check(
+        PermissionsAndroid.PERMISSIONS.RECORD_AUDIO
+      );
+      if (!permission) {
+        const granted = await PermissionsAndroid.request(
+          PermissionsAndroid.PERMISSIONS.RECORD_AUDIO
+        );
+        if (granted !== PermissionsAndroid.RESULTS.GRANTED) {
+          console.log('Microphone permission denied');
+          return;
+        }
+      }
+    }
+
     if (isRecording) {
       LiveAudioStream.stop();
       setIsRecording(false);
@@ -162,7 +179,7 @@ export const SpeechToTextScreen = () => {
               }}
             >
               <Text style={[styles.recordingButtonText, styles.font13]}>
-                {'TRANSCRIBE FROM URL'}
+                TRANSCRIBE FROM URL
               </Text>
             </TouchableOpacity>
           </View>
@@ -226,6 +243,7 @@ const styles = StyleSheet.create({
     justifyContent: 'center',
     alignItems: 'center',
     marginBottom: 20,
+    backgroundColor: 'white',
   },
   recordingButtonWrapper: {
     flex: 1,
diff --git a/src/controllers/SpeechToTextController.ts b/src/controllers/SpeechToTextController.ts
@@ -6,10 +6,8 @@ import {
   NUM_TOKENS_TO_SLICE,
 } from '../constants/sttDefaults';
 import { AvailableModels, ModelConfig } from '../types/stt';
-import {
-  SpeechToTextNativeModule,
-  TokenizerNativeModule,
-} from '../native/RnExecutorchModules';
+import { SpeechToTextNativeModule } from '../native/RnExecutorchModules';
+import { TokenizerModule } from '../modules/natural_language_processing/TokenizerModule';
 import { ResourceSource } from '../types/common';
 import { ResourceFetcher } from '../utils/ResourceFetcher';
 import { longCommonInfPref } from '../utils/stt';
@@ -24,7 +22,7 @@ export class SpeechToTextController {
   public sequence: number[] = [];
   public isReady = false;
   public isGenerating = false;
-  private nativeTokenizer = TokenizerNativeModule;
+  private nativeTokenizer = TokenizerModule;
 
   // User callbacks
   private decodedTranscribeCallback: (sequence: number[]) => void;
@@ -85,24 +83,16 @@ export class SpeechToTextController {
     this.config = MODEL_CONFIGS[modelName];
 
     try {
-      encoderSource = await ResourceFetcher.fetch(
-        encoderSource || this.config.sources.encoder,
-        (progress) => this.modelDownloadProgressCallback?.(progress / 2)
-      );
-
-      decoderSource = await ResourceFetcher.fetch(
-        decoderSource || this.config.sources.decoder,
-        (progress) => this.modelDownloadProgressCallback?.(0.5 + progress / 2)
-      );
-
-      let tokenizerUri = await ResourceFetcher.fetch(
+      await this.nativeTokenizer.load(
         tokenizerSource || this.config.tokenizer.source
       );
 
-      // The tokenizer native module does not accept the file:// prefix
-      await this.nativeTokenizer.loadModule(
-        tokenizerUri.replace('file://', '')
-      );
+      [encoderSource, decoderSource] =
+        await ResourceFetcher.fetchMultipleResources(
+          this.modelDownloadProgressCallback,
+          encoderSource || this.config.sources.encoder,
+          decoderSource || this.config.sources.decoder
+        );
     } catch (e) {
       this.onErrorCallback?.(e);
       return;