Skip to content

Commit 86cde69

Browse files
author
Mateusz Kopciński
committed
fixed S2T streaming
improved error handling, minor refactoring
1 parent a3394df commit 86cde69

File tree

13 files changed

+429
-472
lines changed

13 files changed

+429
-472
lines changed

examples/llm/ios/Podfile.lock

Lines changed: 86 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1401,7 +1401,7 @@ PODS:
14011401
- React-jsiexecutor
14021402
- React-RCTFBReactNativeSpec
14031403
- ReactCommon/turbomodule/core
1404-
- react-native-executorch (0.3.1-stt-7):
1404+
- react-native-executorch (0.3.3):
14051405
- DoubleConversion
14061406
- glog
14071407
- hermes-engine
@@ -1823,6 +1823,82 @@ PODS:
18231823
- React-logger (= 0.79.2)
18241824
- React-perflogger (= 0.79.2)
18251825
- React-utils (= 0.79.2)
1826+
- RNAudioAPI (0.5.7):
1827+
- DoubleConversion
1828+
- glog
1829+
- hermes-engine
1830+
- RCT-Folly (= 2024.11.18.00)
1831+
- RCTRequired
1832+
- RCTTypeSafety
1833+
- React-Core
1834+
- React-debug
1835+
- React-Fabric
1836+
- React-featureflags
1837+
- React-graphics
1838+
- React-hermes
1839+
- React-ImageManager
1840+
- React-jsi
1841+
- React-NativeModulesApple
1842+
- React-RCTFabric
1843+
- React-renderercss
1844+
- React-rendererdebug
1845+
- React-utils
1846+
- ReactCodegen
1847+
- ReactCommon/turbomodule/bridging
1848+
- ReactCommon/turbomodule/core
1849+
- RNAudioAPI/audioapi (= 0.5.7)
1850+
- Yoga
1851+
- RNAudioAPI/audioapi (0.5.7):
1852+
- DoubleConversion
1853+
- glog
1854+
- hermes-engine
1855+
- RCT-Folly (= 2024.11.18.00)
1856+
- RCTRequired
1857+
- RCTTypeSafety
1858+
- React-Core
1859+
- React-debug
1860+
- React-Fabric
1861+
- React-featureflags
1862+
- React-graphics
1863+
- React-hermes
1864+
- React-ImageManager
1865+
- React-jsi
1866+
- React-NativeModulesApple
1867+
- React-RCTFabric
1868+
- React-renderercss
1869+
- React-rendererdebug
1870+
- React-utils
1871+
- ReactCodegen
1872+
- ReactCommon/turbomodule/bridging
1873+
- ReactCommon/turbomodule/core
1874+
- RNAudioAPI/audioapi/ios (= 0.5.7)
1875+
- Yoga
1876+
- RNAudioAPI/audioapi/ios (0.5.7):
1877+
- DoubleConversion
1878+
- glog
1879+
- hermes-engine
1880+
- RCT-Folly (= 2024.11.18.00)
1881+
- RCTRequired
1882+
- RCTTypeSafety
1883+
- React-Core
1884+
- React-debug
1885+
- React-Fabric
1886+
- React-featureflags
1887+
- React-graphics
1888+
- React-hermes
1889+
- React-ImageManager
1890+
- React-jsi
1891+
- React-NativeModulesApple
1892+
- React-RCTFabric
1893+
- React-renderercss
1894+
- React-rendererdebug
1895+
- React-utils
1896+
- ReactCodegen
1897+
- ReactCommon/turbomodule/bridging
1898+
- ReactCommon/turbomodule/core
1899+
- Yoga
1900+
- RNLiveAudioStream (1.1.1):
1901+
- React
18261902
- RNReanimated (3.17.5):
18271903
- DoubleConversion
18281904
- glog
@@ -2083,6 +2159,8 @@ DEPENDENCIES:
20832159
- ReactAppDependencyProvider (from `build/generated/ios`)
20842160
- ReactCodegen (from `build/generated/ios`)
20852161
- ReactCommon/turbomodule/core (from `../node_modules/react-native/ReactCommon`)
2162+
- RNAudioAPI (from `../node_modules/react-native-audio-api`)
2163+
- RNLiveAudioStream (from `../node_modules/react-native-live-audio-stream`)
20862164
- RNReanimated (from `../node_modules/react-native-reanimated`)
20872165
- RNSVG (from `../node_modules/react-native-svg`)
20882166
- Yoga (from `../node_modules/react-native/ReactCommon/yoga`)
@@ -2256,6 +2334,10 @@ EXTERNAL SOURCES:
22562334
:path: build/generated/ios
22572335
ReactCommon:
22582336
:path: "../node_modules/react-native/ReactCommon"
2337+
RNAudioAPI:
2338+
:path: "../node_modules/react-native-audio-api"
2339+
RNLiveAudioStream:
2340+
:path: "../node_modules/react-native-live-audio-stream"
22592341
RNReanimated:
22602342
:path: "../node_modules/react-native-reanimated"
22612343
RNSVG:
@@ -2313,7 +2395,7 @@ SPEC CHECKSUMS:
23132395
React-logger: 8edfcedc100544791cd82692ca5a574240a16219
23142396
React-Mapbuffer: c3f4b608e4a59dd2f6a416ef4d47a14400194468
23152397
React-microtasksnativemodule: 054f34e9b82f02bd40f09cebd4083828b5b2beb6
2316-
react-native-executorch: 8bca350ccbb66246dd7366fb6514fed57083930e
2398+
react-native-executorch: d0c3dffa0a4a4111ea9c7b97f3fbf088a48d3b2a
23172399
react-native-safe-area-context: 562163222d999b79a51577eda2ea8ad2c32b4d06
23182400
React-NativeModulesApple: 2c4377e139522c3d73f5df582e4f051a838ff25e
23192401
React-oscompat: ef5df1c734f19b8003e149317d041b8ce1f7d29c
@@ -2346,6 +2428,8 @@ SPEC CHECKSUMS:
23462428
ReactAppDependencyProvider: 04d5eb15eb46be6720e17a4a7fa92940a776e584
23472429
ReactCodegen: c63eda03ba1d94353fb97b031fc84f75a0d125ba
23482430
ReactCommon: 76d2dc87136d0a667678668b86f0fca0c16fdeb0
2431+
RNAudioAPI: 2e3fd4bf75aa5717791babb30126707504996f09
2432+
RNLiveAudioStream: 93ac2bb6065be9018d0b00157b220f11cebc1513
23492433
RNReanimated: 2313402fe27fecb7237619e9c6fcee3177f08a65
23502434
RNSVG: 794f269526df9ddc1f79b3d1a202b619df0368e3
23512435
SocketRocket: d4aabe649be1e368d1318fdf28a022d714d65748

examples/llm/ios/llm.xcodeproj/project.pbxproj

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,8 @@
2323
13B07F961A680F5B00A75B9A /* llm.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = llm.app; sourceTree = BUILT_PRODUCTS_DIR; };
2424
13B07FB51A68108700A75B9A /* Images.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; name = Images.xcassets; path = llm/Images.xcassets; sourceTree = "<group>"; };
2525
13B07FB61A68108700A75B9A /* Info.plist */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.plist.xml; name = Info.plist; path = llm/Info.plist; sourceTree = "<group>"; };
26-
52F93D50E948F855530BE970 /* PrivacyInfo.xcprivacy */ = {isa = PBXFileReference; includeInIndex = 1; name = PrivacyInfo.xcprivacy; path = llm/PrivacyInfo.xcprivacy; sourceTree = "<group>"; };
27-
66FD0DA88DE041D4B2294CB4 /* Aeonik-Medium.otf */ = {isa = PBXFileReference; explicitFileType = undefined; fileEncoding = undefined; includeInIndex = 0; lastKnownFileType = unknown; name = "Aeonik-Medium.otf"; path = "../assets/fonts/Aeonik-Medium.otf"; sourceTree = "<group>"; };
26+
52F93D50E948F855530BE970 /* PrivacyInfo.xcprivacy */ = {isa = PBXFileReference; includeInIndex = 1; lastKnownFileType = text.xml; name = PrivacyInfo.xcprivacy; path = llm/PrivacyInfo.xcprivacy; sourceTree = "<group>"; };
27+
66FD0DA88DE041D4B2294CB4 /* Aeonik-Medium.otf */ = {isa = PBXFileReference; explicitFileType = undefined; fileEncoding = 9; includeInIndex = 0; lastKnownFileType = unknown; name = "Aeonik-Medium.otf"; path = "../assets/fonts/Aeonik-Medium.otf"; sourceTree = "<group>"; };
2828
99F00DD47E42AF45816C58B8 /* Pods-llm.release.xcconfig */ = {isa = PBXFileReference; includeInIndex = 1; lastKnownFileType = text.xcconfig; name = "Pods-llm.release.xcconfig"; path = "Target Support Files/Pods-llm/Pods-llm.release.xcconfig"; sourceTree = "<group>"; };
2929
A8F7447548358E44DADA5029 /* Pods-llm.debug.xcconfig */ = {isa = PBXFileReference; includeInIndex = 1; lastKnownFileType = text.xcconfig; name = "Pods-llm.debug.xcconfig"; path = "Target Support Files/Pods-llm/Pods-llm.debug.xcconfig"; sourceTree = "<group>"; };
3030
AA286B85B6C04FC6940260E9 /* SplashScreen.storyboard */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = file.storyboard; name = SplashScreen.storyboard; path = llm/SplashScreen.storyboard; sourceTree = "<group>"; };
@@ -33,7 +33,7 @@
3333
ED297162215061F000B7C4FE /* JavaScriptCore.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = JavaScriptCore.framework; path = System/Library/Frameworks/JavaScriptCore.framework; sourceTree = SDKROOT; };
3434
F11748412D0307B40044C1D9 /* AppDelegate.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; name = AppDelegate.swift; path = llm/AppDelegate.swift; sourceTree = "<group>"; };
3535
F11748442D0722820044C1D9 /* llm-Bridging-Header.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = "llm-Bridging-Header.h"; path = "llm/llm-Bridging-Header.h"; sourceTree = "<group>"; };
36-
F73A1A19BDF34985A5AAA585 /* Aeonik-Regular.otf */ = {isa = PBXFileReference; explicitFileType = undefined; fileEncoding = undefined; includeInIndex = 0; lastKnownFileType = unknown; name = "Aeonik-Regular.otf"; path = "../assets/fonts/Aeonik-Regular.otf"; sourceTree = "<group>"; };
36+
F73A1A19BDF34985A5AAA585 /* Aeonik-Regular.otf */ = {isa = PBXFileReference; explicitFileType = undefined; fileEncoding = 9; includeInIndex = 0; lastKnownFileType = unknown; name = "Aeonik-Regular.otf"; path = "../assets/fonts/Aeonik-Regular.otf"; sourceTree = "<group>"; };
3737
/* End PBXFileReference section */
3838

3939
/* Begin PBXFrameworksBuildPhase section */
@@ -117,7 +117,6 @@
117117
66FD0DA88DE041D4B2294CB4 /* Aeonik-Medium.otf */,
118118
);
119119
name = Resources;
120-
path = "";
121120
sourceTree = "<group>";
122121
};
123122
B83BB53798F9A57DA3D5DBEC /* Pods */ = {
@@ -126,7 +125,6 @@
126125
A8F7447548358E44DADA5029 /* Pods-llm.debug.xcconfig */,
127126
99F00DD47E42AF45816C58B8 /* Pods-llm.release.xcconfig */,
128127
);
129-
name = Pods;
130128
path = Pods;
131129
sourceTree = "<group>";
132130
};
@@ -353,6 +351,7 @@
353351
CLANG_ENABLE_MODULES = YES;
354352
CODE_SIGN_ENTITLEMENTS = llm/llm.entitlements;
355353
CURRENT_PROJECT_VERSION = 1;
354+
DEVELOPMENT_TEAM = J5FM626PE2;
356355
ENABLE_BITCODE = NO;
357356
GCC_PREPROCESSOR_DEFINITIONS = (
358357
"$(inherited)",
@@ -389,6 +388,7 @@
389388
CLANG_ENABLE_MODULES = YES;
390389
CODE_SIGN_ENTITLEMENTS = llm/llm.entitlements;
391390
CURRENT_PROJECT_VERSION = 1;
391+
DEVELOPMENT_TEAM = J5FM626PE2;
392392
INFOPLIST_FILE = llm/Info.plist;
393393
IPHONEOS_DEPLOYMENT_TARGET = 15.1;
394394
LD_RUNPATH_SEARCH_PATHS = (
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
3+
<plist version="1.0">
4+
<dict>
5+
<key>IDEWorkspaceSharedSettings_AutocreateContextsIfNeeded</key>
6+
<false/>
7+
</dict>
8+
</plist>

examples/llm/ios/llm/Info.plist

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,8 @@
5252
<string>Allow $(PRODUCT_NAME) to access your reminders</string>
5353
<key>NSRemindersUsageDescription</key>
5454
<string>Allow $(PRODUCT_NAME) to access your reminders</string>
55+
<key>NSMicrophoneUsageDescription</key>
56+
<string>We need your permission to use the microphone.</string>
5557
<key>UIAppFonts</key>
5658
<array>
5759
<string>Aeonik-Regular.otf</string>

examples/llm/package.json

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,9 @@
1919
"metro-config": "^0.81.0",
2020
"react": "19.0.0",
2121
"react-native": "^0.79.2",
22-
"react-native-executorch": "/Users/kopcion/swm-ai/react-native-executorch/react-native-executorch-0.3.1-stt-7.tgz",
22+
"react-native-audio-api": "0.5.7",
23+
"react-native-executorch": "^0.3.1",
24+
"react-native-live-audio-stream": "^1.1.1",
2325
"react-native-loading-spinner-overlay": "^3.0.1",
2426
"react-native-markdown-display": "^7.0.2",
2527
"react-native-reanimated": "^3.17.5",

examples/llm/screens/LLMScreen.tsx

Lines changed: 27 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import { useEffect, useRef, useState } from 'react';
1+
import { useRef, useState } from 'react';
22
import {
33
Keyboard,
44
KeyboardAvoidingView,
@@ -14,12 +14,12 @@ import { SafeAreaView } from 'react-native-safe-area-context';
1414
import SWMIcon from '../assets/icons/swm_icon.svg';
1515
import Spinner from 'react-native-loading-spinner-overlay';
1616
import {
17-
// STREAMING_ACTION,
17+
STREAMING_ACTION,
1818
useSpeechToText,
19-
LLAMA3_2_1B_QLORA,
20-
LLAMA3_2_1B_TOKENIZER,
2119
useLLM,
22-
LLAMA3_2_TOKENIZER_CONFIG,
20+
QWEN3_0_6B_QUANTIZED,
21+
QWEN3_TOKENIZER,
22+
QWEN3_TOKENIZER_CONFIG,
2323
} from 'react-native-executorch';
2424
import PauseIcon from '../assets/icons/pause_icon.svg';
2525
import MicIcon from '../assets/icons/mic_icon.svg';
@@ -29,7 +29,6 @@ import ColorPalette from '../colors';
2929
import Messages from '../components/Messages';
3030
import LiveAudioStream from 'react-native-live-audio-stream';
3131
import { Buffer } from 'buffer';
32-
// import * as Speech from 'expo-speech';
3332

3433
const audioStreamOptions = {
3534
sampleRate: 16000,
@@ -60,59 +59,48 @@ const float32ArrayFromPCMBinaryBuffer = (b64EncodedBuffer: string) => {
6059
};
6160

6261
export default function ChatScreen() {
63-
// const [currentMessage, setCurrentMessage] = useState('');
6462
const [isRecording, setIsRecording] = useState(false);
6563
const [isTextInputFocused, setIsTextInputFocused] = useState(false);
6664
const [userInput, setUserInput] = useState('');
6765
const textInputRef = useRef<TextInput>(null);
6866
const messageRecorded = useRef<boolean>(false);
6967
const llm = useLLM({
70-
modelSource: LLAMA3_2_1B_QLORA,
71-
tokenizerSource: LLAMA3_2_1B_TOKENIZER,
72-
tokenizerConfigSource: LLAMA3_2_TOKENIZER_CONFIG,
68+
modelSource: QWEN3_0_6B_QUANTIZED,
69+
tokenizerSource: QWEN3_TOKENIZER,
70+
tokenizerConfigSource: QWEN3_TOKENIZER_CONFIG,
7371
chatConfig: {
7472
contextWindowLength: 6,
7573
},
7674
});
7775
const speechToText = useSpeechToText({
78-
modelName: 'whisper',
79-
windowSize: 5,
76+
modelName: 'moonshine',
77+
windowSize: 3,
8078
overlapSeconds: 1.2,
8179
});
8280

8381
const onChunk = (data: string) => {
8482
const float32Chunk = float32ArrayFromPCMBinaryBuffer(data);
85-
speechToText.transcribe(Array.from(float32Chunk));
86-
// speechToText.streamingTranscribe(
87-
// STREAMING_ACTION.DATA,
88-
// Array.from(float32Chunk)
89-
// );
83+
speechToText.streamingTranscribe(
84+
STREAMING_ACTION.DATA,
85+
Array.from(float32Chunk)
86+
);
9087
};
9188

9289
const handleRecordPress = async () => {
9390
if (isRecording) {
9491
setIsRecording(false);
9592
LiveAudioStream.stop();
9693
messageRecorded.current = true;
97-
// await llm.generate(
98-
// await speechToText.streamingTranscribe(STREAMING_ACTION.STOP)
99-
// );
94+
await llm.sendMessage(
95+
await speechToText.streamingTranscribe(STREAMING_ACTION.STOP)
96+
);
10097
} else {
10198
setIsRecording(true);
10299
startStreamingAudio(audioStreamOptions, onChunk);
103-
// await speechToText.streamingTranscribe(STREAMING_ACTION.START);
100+
await speechToText.streamingTranscribe(STREAMING_ACTION.START);
104101
}
105102
};
106103

107-
// const sendMessage = async () => {
108-
// setUserInput('');
109-
// textInputRef.current?.clear();
110-
// try {
111-
// await llm.sendMessage(userInput);
112-
// } catch (e) {
113-
// console.error(e);
114-
// }
115-
// };
116104
const sendMessage = async () => {
117105
if (userInput) {
118106
llm.sendMessage(userInput);
@@ -122,28 +110,6 @@ export default function ChatScreen() {
122110
}
123111
};
124112

125-
// useEffect(() => {
126-
// if (llm.response && !llm.isGenerating) {
127-
// appendToMessageHistory(llm.response, 'assistant');
128-
// }
129-
// }, [llm.response, llm.isGenerating]);
130-
131-
// const modifyLastMessage = (content: string) => {
132-
// setCurrentMessage((prevMessage) => prevMessage + content);
133-
// };
134-
135-
useEffect(() => {
136-
if (speechToText.sequence.length && !speechToText.isGenerating) {
137-
llm.sendMessage(speechToText.sequence);
138-
}
139-
}, [speechToText.sequence, speechToText.isGenerating, llm.sendMessage]); //eslint-disable-line react-hooks/exhaustive-deps
140-
141-
// const appendToMessageHistory = (content: string, role: SenderType) => {
142-
// setChatHistory((prevHistory) => [...prevHistory, { role, content }]);
143-
// if (role == 'assistant' && messageRecorded.current)
144-
// Speech.speak(content, { language: 'en-US' });
145-
// };
146-
147113
return !llm.isReady || !speechToText.isReady ? (
148114
<Spinner
149115
visible={!llm.isReady || !speechToText.isReady}
@@ -161,13 +127,17 @@ export default function ChatScreen() {
161127
<SWMIcon width={45} height={45} />
162128
<Text style={styles.textModelName}>llm 3.2 1B QLoRA x Whisper</Text>
163129
</View>
164-
{llm.messageHistory.length ? (
130+
{llm.messageHistory.length || speechToText.sequence ? (
165131
<View style={styles.chatContainer}>
166132
<Messages
167-
chatHistory={[
168-
...llm.messageHistory,
169-
{ role: 'user', content: speechToText.sequence },
170-
]}
133+
chatHistory={
134+
speechToText.isGenerating
135+
? [
136+
...llm.messageHistory,
137+
{ role: 'user', content: speechToText.sequence },
138+
]
139+
: llm.messageHistory
140+
}
171141
llmResponse={llm.response}
172142
isGenerating={llm.isGenerating}
173143
deleteMessage={llm.deleteMessage}

0 commit comments

Comments
 (0)