Skip to content

Commit ba83046

Browse files
authored
chore: fix fetching files in speech to text controller (#274)
## Description 1. Change fetching files in speech to text controller. 2. Use tokenizerModule in speech to text controller. 3. Fix speechToText demo app. Add microphone permissions and rebuild the ios folder. 4. Rebuild android folder ### Type of change - [x] Bug fix (non-breaking change which fixes an issue) - [ ] New feature (non-breaking change which adds functionality) - [ ] Breaking change (fix or feature that would cause existing functionality to not work as expected) - [ ] Documentation update (improves or adds clarity to existing documentation) ### Tested on - [x] iOS - [x] Android ### Checklist - [x] I have performed a self-review of my code - [x] I have commented my code, particularly in hard-to-understand areas - [x] I have updated the documentation accordingly - [x] My changes generate no new warnings
1 parent c33c0a9 commit ba83046

File tree

7 files changed

+42
-27
lines changed

7 files changed

+42
-27
lines changed

examples/speech-to-text/android/app/src/main/AndroidManifest.xml

+1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
<manifest xmlns:android="http://schemas.android.com/apk/res/android">
22
<uses-permission android:name="android.permission.INTERNET"/>
33
<uses-permission android:name="android.permission.READ_EXTERNAL_STORAGE"/>
4+
<uses-permission android:name="android.permission.RECORD_AUDIO"/>
45
<uses-permission android:name="android.permission.SYSTEM_ALERT_WINDOW"/>
56
<uses-permission android:name="android.permission.VIBRATE"/>
67
<uses-permission android:name="android.permission.WRITE_EXTERNAL_STORAGE"/>

examples/speech-to-text/android/app/src/main/java/com/anonymous/speechtotext/MainActivity.kt

+2-3
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,8 @@ class MainActivity : ReactActivity() {
2727
* Returns the instance of the [ReactActivityDelegate]. We use [DefaultReactActivityDelegate]
2828
* which allows you to enable New Architecture with a single boolean flags [fabricEnabled]
2929
*/
30-
override fun createReactActivityDelegate(): ReactActivityDelegate {
31-
return ReactActivityDelegateWrapper(
30+
override fun createReactActivityDelegate(): ReactActivityDelegate =
31+
ReactActivityDelegateWrapper(
3232
this,
3333
BuildConfig.IS_NEW_ARCHITECTURE_ENABLED,
3434
object : DefaultReactActivityDelegate(
@@ -37,7 +37,6 @@ class MainActivity : ReactActivity() {
3737
fabricEnabled,
3838
) {},
3939
)
40-
}
4140

4241
/**
4342
* Align the back button behavior with Android S

examples/speech-to-text/android/app/src/main/java/com/anonymous/speechtotext/MainApplication.kt

+4-2
Original file line numberDiff line numberDiff line change
@@ -14,15 +14,17 @@ import com.facebook.soloader.SoLoader
1414
import expo.modules.ApplicationLifecycleDispatcher
1515
import expo.modules.ReactNativeHostWrapper
1616

17-
class MainApplication : Application(), ReactApplication {
17+
class MainApplication :
18+
Application(),
19+
ReactApplication {
1820
override val reactNativeHost: ReactNativeHost =
1921
ReactNativeHostWrapper(
2022
this,
2123
object : DefaultReactNativeHost(this) {
2224
override fun getPackages(): List<ReactPackage> {
2325
val packages = PackageList(this).packages
2426
// Packages that cannot be autolinked yet can be added manually here, for example:
25-
// packages.add(new MyReactNativePackage());
27+
// packages.add(MyReactNativePackage())
2628
return packages
2729
}
2830

examples/speech-to-text/app.json

+4-1
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,10 @@
1414
},
1515
"ios": {
1616
"supportsTablet": true,
17-
"bundleIdentifier": "com.anonymous.speechtotext"
17+
"bundleIdentifier": "com.anonymous.speechtotext",
18+
"infoPlist": {
19+
"NSMicrophoneUsageDescription": "This app needs access to your microphone to record audio."
20+
}
1821
},
1922
"android": {
2023
"adaptiveIcon": {

examples/speech-to-text/ios/speechtotext/Info.plist

+2
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,8 @@
4444
<key>NSAllowsLocalNetworking</key>
4545
<true/>
4646
</dict>
47+
<key>NSMicrophoneUsageDescription</key>
48+
<string>This app needs access to your microphone to record audio.</string>
4749
<key>UILaunchStoryboardName</key>
4850
<string>SplashScreen</string>
4951
<key>UIRequiredDeviceCapabilities</key>

examples/speech-to-text/screens/SpeechToTextScreen.tsx

+19-1
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@ import {
55
StyleSheet,
66
SafeAreaView,
77
TouchableOpacity,
8+
PermissionsAndroid,
9+
Platform,
810
} from 'react-native';
911
import LiveAudioStream from 'react-native-live-audio-stream';
1012
import SWMIcon from '../assets/swm_icon.svg';
@@ -75,6 +77,21 @@ export const SpeechToTextScreen = () => {
7577
};
7678

7779
const handleRecordPress = async () => {
80+
if (Platform.OS === 'android') {
81+
const permission = await PermissionsAndroid.check(
82+
PermissionsAndroid.PERMISSIONS.RECORD_AUDIO
83+
);
84+
if (!permission) {
85+
const granted = await PermissionsAndroid.request(
86+
PermissionsAndroid.PERMISSIONS.RECORD_AUDIO
87+
);
88+
if (granted !== PermissionsAndroid.RESULTS.GRANTED) {
89+
console.log('Microphone permission denied');
90+
return;
91+
}
92+
}
93+
}
94+
7895
if (isRecording) {
7996
LiveAudioStream.stop();
8097
setIsRecording(false);
@@ -162,7 +179,7 @@ export const SpeechToTextScreen = () => {
162179
}}
163180
>
164181
<Text style={[styles.recordingButtonText, styles.font13]}>
165-
{'TRANSCRIBE FROM URL'}
182+
TRANSCRIBE FROM URL
166183
</Text>
167184
</TouchableOpacity>
168185
</View>
@@ -226,6 +243,7 @@ const styles = StyleSheet.create({
226243
justifyContent: 'center',
227244
alignItems: 'center',
228245
marginBottom: 20,
246+
backgroundColor: 'white',
229247
},
230248
recordingButtonWrapper: {
231249
flex: 1,

src/controllers/SpeechToTextController.ts

+10-20
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,8 @@ import {
66
NUM_TOKENS_TO_SLICE,
77
} from '../constants/sttDefaults';
88
import { AvailableModels, ModelConfig } from '../types/stt';
9-
import {
10-
SpeechToTextNativeModule,
11-
TokenizerNativeModule,
12-
} from '../native/RnExecutorchModules';
9+
import { SpeechToTextNativeModule } from '../native/RnExecutorchModules';
10+
import { TokenizerModule } from '../modules/natural_language_processing/TokenizerModule';
1311
import { ResourceSource } from '../types/common';
1412
import { ResourceFetcher } from '../utils/ResourceFetcher';
1513
import { longCommonInfPref } from '../utils/stt';
@@ -24,7 +22,7 @@ export class SpeechToTextController {
2422
public sequence: number[] = [];
2523
public isReady = false;
2624
public isGenerating = false;
27-
private nativeTokenizer = TokenizerNativeModule;
25+
private nativeTokenizer = TokenizerModule;
2826

2927
// User callbacks
3028
private decodedTranscribeCallback: (sequence: number[]) => void;
@@ -85,24 +83,16 @@ export class SpeechToTextController {
8583
this.config = MODEL_CONFIGS[modelName];
8684

8785
try {
88-
encoderSource = await ResourceFetcher.fetch(
89-
encoderSource || this.config.sources.encoder,
90-
(progress) => this.modelDownloadProgressCallback?.(progress / 2)
91-
);
92-
93-
decoderSource = await ResourceFetcher.fetch(
94-
decoderSource || this.config.sources.decoder,
95-
(progress) => this.modelDownloadProgressCallback?.(0.5 + progress / 2)
96-
);
97-
98-
let tokenizerUri = await ResourceFetcher.fetch(
86+
await this.nativeTokenizer.load(
9987
tokenizerSource || this.config.tokenizer.source
10088
);
10189

102-
// The tokenizer native module does not accept the file:// prefix
103-
await this.nativeTokenizer.loadModule(
104-
tokenizerUri.replace('file://', '')
105-
);
90+
[encoderSource, decoderSource] =
91+
await ResourceFetcher.fetchMultipleResources(
92+
this.modelDownloadProgressCallback,
93+
encoderSource || this.config.sources.encoder,
94+
decoderSource || this.config.sources.decoder
95+
);
10696
} catch (e) {
10797
this.onErrorCallback?.(e);
10898
return;

0 commit comments

Comments
 (0)