Skip to content

Commit 0f270a7

Browse files
committed
feat: streaming in s2t with deleting processed chunks
1 parent 41f64f3 commit 0f270a7

File tree

2 files changed

+136
-30
lines changed

2 files changed

+136
-30
lines changed
Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
import React from 'react';
2+
import {
3+
View,
4+
Text,
5+
Modal,
6+
TextInput,
7+
StyleSheet,
8+
TouchableOpacity,
9+
} from 'react-native';
10+
11+
const InputPrompt = ({
12+
value,
13+
onChangeText,
14+
modalVisible,
15+
setModalVisible,
16+
}: {
17+
value: string;
18+
onChangeText: (_: string) => void;
19+
modalVisible: boolean;
20+
setModalVisible: (_: boolean) => void;
21+
}) => {
22+
return (
23+
<View style={styles.centeredView}>
24+
<Modal
25+
animationType="slide"
26+
transparent={true}
27+
visible={modalVisible}
28+
onRequestClose={() => {
29+
setModalVisible(!modalVisible);
30+
}}
31+
>
32+
<TouchableOpacity
33+
style={styles.centeredView}
34+
activeOpacity={1}
35+
onPressOut={() => {
36+
setModalVisible(false);
37+
}}
38+
>
39+
<View style={styles.centeredView}>
40+
<View style={styles.modalView}>
41+
<TextInput
42+
placeholder="Enter audio url"
43+
style={styles.textInputStyle}
44+
onChangeText={(text) => onChangeText(text)}
45+
value={value}
46+
/>
47+
<TouchableOpacity
48+
style={styles.confirmButton}
49+
onPress={() => setModalVisible(!modalVisible)}
50+
>
51+
<Text style={styles.confirmText}>Confirm</Text>
52+
</TouchableOpacity>
53+
</View>
54+
</View>
55+
</TouchableOpacity>
56+
</Modal>
57+
</View>
58+
);
59+
};
60+
61+
const styles = StyleSheet.create({
62+
confirmText: {
63+
fontSize: 20,
64+
color: 'white',
65+
fontWeight: '400',
66+
},
67+
confirmButton: {
68+
backgroundColor: '#001A72',
69+
justifyContent: 'center',
70+
alignItems: 'center',
71+
width: '100%',
72+
height: 40,
73+
borderRadius: 40,
74+
paddingRight: 15,
75+
paddingLeft: 15,
76+
},
77+
centeredView: {
78+
flex: 1,
79+
justifyContent: 'center',
80+
alignItems: 'center',
81+
},
82+
modalView: {
83+
margin: 20,
84+
backgroundColor: 'white',
85+
borderRadius: 20,
86+
padding: 35,
87+
alignItems: 'center',
88+
shadowColor: '#000',
89+
shadowOffset: {
90+
width: 0,
91+
height: 2,
92+
},
93+
shadowOpacity: 0.25,
94+
shadowRadius: 4,
95+
elevation: 5,
96+
},
97+
textInputStyle: {
98+
textAlign: 'center',
99+
height: 40,
100+
width: 200,
101+
marginBottom: 20,
102+
borderRadius: 20,
103+
borderWidth: 1,
104+
padding: 10,
105+
borderColor: '#ccc',
106+
},
107+
});
108+
109+
export default InputPrompt;

src/controllers/SpeechToTextController.ts

Lines changed: 27 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ export class SpeechToTextController {
3232
private streamWaveform: number[] = [];
3333
private isDecodingChunk = false;
3434
private numberOfDecodedChunks = 0;
35-
private numberOfDeletedChunks = 0;
35+
private isChunkDeleted = false;
3636
private numOfChunks = 0;
3737

3838
// User callbacks
@@ -162,12 +162,12 @@ export class SpeechToTextController {
162162
}
163163
}
164164

165-
private chunkWaveform(waveform: number[]) {
165+
private chunkWaveform(waveform: number[], streamingSlice?: boolean) {
166166
this.chunks = [];
167167
this.numOfChunks = Math.ceil(waveform.length / this.windowSize);
168168
for (let i = 0; i < this.numOfChunks; i++) {
169169
let chunk;
170-
if (i == 0 && this.numberOfDeletedChunks > 0) {
170+
if (i == 0 && streamingSlice) {
171171
chunk = waveform.slice(
172172
0,
173173
Math.min(
@@ -381,24 +381,21 @@ export class SpeechToTextController {
381381
this.streamWaveform = [];
382382
this.prevSeq = [];
383383
this.numberOfDecodedChunks = 0;
384+
this.isChunkDeleted = false;
384385
this.decodedTranscribeCallback([]);
385386
this.isGeneratingCallback(true);
386387
}
387388
this.streamWaveform = [...this.streamWaveform, ...waveform];
388-
this.chunkWaveform(this.streamWaveform);
389+
this.chunkWaveform(this.streamWaveform, this.isChunkDeleted);
389390
if (!this.isDecodingChunk && streamAction != 2) {
390391
this.isDecodingChunk = true;
391392
while (
392-
this.chunks.at(-this.numOfChunks)?.length ==
393+
this.chunks.at(0)?.length ==
393394
2 * this.overlapSeconds + this.windowSize ||
394395
(this.numberOfDecodedChunks == 0 &&
395-
this.chunks.at(-this.numOfChunks)?.length ==
396-
this.windowSize + this.overlapSeconds)
396+
this.chunks.at(0)?.length == this.windowSize + this.overlapSeconds)
397397
) {
398-
let seq = await this.decodeChunk(
399-
this.chunks.at(-this.numOfChunks)!,
400-
audioLanguage
401-
);
398+
let seq = await this.decodeChunk(this.chunks.at(0)!, audioLanguage);
402399
const numSpecialTokens = (await this.getStartingTokenIds(audioLanguage))
403400
.length;
404401
// remove sos/eos token and 3 additional ones
@@ -418,28 +415,28 @@ export class SpeechToTextController {
418415
if (this.seqs.length < 2) {
419416
continue;
420417
}
418+
// remove data, which was processed and saved to this.seqs
419+
if (this.numOfChunks > 2) {
420+
if (!this.isChunkDeleted) {
421+
this.streamWaveform = this.streamWaveform.slice(
422+
-(
423+
this.streamWaveform.length -
424+
(this.windowSize + this.overlapSeconds)
425+
)
426+
);
427+
} else {
428+
this.streamWaveform = this.streamWaveform.slice(
429+
-(this.streamWaveform.length - this.windowSize)
430+
);
431+
}
432+
this.isChunkDeleted = true;
433+
this.numOfChunks--;
434+
}
421435
}
422436
this.isDecodingChunk = false;
423437
}
424-
// remove data from waveform, which was processed and saved to this.seqs
425-
while (this.numOfChunks > 2) {
426-
if (this.numberOfDeletedChunks == 0) {
427-
this.streamWaveform = this.streamWaveform.slice(
428-
-(
429-
this.streamWaveform.length -
430-
(this.windowSize + this.overlapSeconds)
431-
)
432-
);
433-
} else {
434-
this.streamWaveform = this.streamWaveform.slice(
435-
-(this.streamWaveform.length - this.windowSize)
436-
);
437-
}
438-
this.numberOfDeletedChunks++;
439-
this.numOfChunks--;
440-
}
441438
while (this.numOfChunks > 0 && streamAction == STREAMING_ACTION.STOP) {
442-
let seq = await this.decodeChunk(this.chunks.at(-this.numOfChunks)!);
439+
let seq = await this.decodeChunk(this.chunks.at(0)!);
443440
if (this.numberOfDecodedChunks == 0) {
444441
this.sequence = seq;
445442
this.decodedTranscribeCallback(seq);
@@ -465,7 +462,7 @@ export class SpeechToTextController {
465462

466463
private async tokenIdsToText(tokenIds: number[]): Promise<string> {
467464
try {
468-
return this.nativeTokenizer.decode(tokenIds);
465+
return this.nativeTokenizer.decode(tokenIds, true);
469466
} catch (e) {
470467
this.onErrorCallback?.(
471468
new Error(`An error has ocurred when decoding the token ids: ${e}`)

0 commit comments

Comments
 (0)