@@ -32,7 +32,7 @@ export class SpeechToTextController {
32
32
private streamWaveform : number [ ] = [ ] ;
33
33
private isDecodingChunk = false ;
34
34
private numberOfDecodedChunks = 0 ;
35
- private numberOfDeletedChunks = 0 ;
35
+ private isChunkDeleted = false ;
36
36
private numOfChunks = 0 ;
37
37
38
38
// User callbacks
@@ -162,12 +162,12 @@ export class SpeechToTextController {
162
162
}
163
163
}
164
164
165
- private chunkWaveform ( waveform : number [ ] ) {
165
+ private chunkWaveform ( waveform : number [ ] , streamingSlice ?: boolean ) {
166
166
this . chunks = [ ] ;
167
167
this . numOfChunks = Math . ceil ( waveform . length / this . windowSize ) ;
168
168
for ( let i = 0 ; i < this . numOfChunks ; i ++ ) {
169
169
let chunk ;
170
- if ( i == 0 && this . numberOfDeletedChunks > 0 ) {
170
+ if ( i == 0 && streamingSlice ) {
171
171
chunk = waveform . slice (
172
172
0 ,
173
173
Math . min (
@@ -381,24 +381,21 @@ export class SpeechToTextController {
381
381
this . streamWaveform = [ ] ;
382
382
this . prevSeq = [ ] ;
383
383
this . numberOfDecodedChunks = 0 ;
384
+ this . isChunkDeleted = false ;
384
385
this . decodedTranscribeCallback ( [ ] ) ;
385
386
this . isGeneratingCallback ( true ) ;
386
387
}
387
388
this . streamWaveform = [ ...this . streamWaveform , ...waveform ] ;
388
- this . chunkWaveform ( this . streamWaveform ) ;
389
+ this . chunkWaveform ( this . streamWaveform , this . isChunkDeleted ) ;
389
390
if ( ! this . isDecodingChunk && streamAction != 2 ) {
390
391
this . isDecodingChunk = true ;
391
392
while (
392
- this . chunks . at ( - this . numOfChunks ) ?. length ==
393
+ this . chunks . at ( 0 ) ?. length ==
393
394
2 * this . overlapSeconds + this . windowSize ||
394
395
( this . numberOfDecodedChunks == 0 &&
395
- this . chunks . at ( - this . numOfChunks ) ?. length ==
396
- this . windowSize + this . overlapSeconds )
396
+ this . chunks . at ( 0 ) ?. length == this . windowSize + this . overlapSeconds )
397
397
) {
398
- let seq = await this . decodeChunk (
399
- this . chunks . at ( - this . numOfChunks ) ! ,
400
- audioLanguage
401
- ) ;
398
+ let seq = await this . decodeChunk ( this . chunks . at ( 0 ) ! , audioLanguage ) ;
402
399
const numSpecialTokens = ( await this . getStartingTokenIds ( audioLanguage ) )
403
400
. length ;
404
401
// remove sos/eos token and 3 additional ones
@@ -418,28 +415,28 @@ export class SpeechToTextController {
418
415
if ( this . seqs . length < 2 ) {
419
416
continue ;
420
417
}
418
+ // remove data, which was processed and saved to this.seqs
419
+ if ( this . numOfChunks > 2 ) {
420
+ if ( ! this . isChunkDeleted ) {
421
+ this . streamWaveform = this . streamWaveform . slice (
422
+ - (
423
+ this . streamWaveform . length -
424
+ ( this . windowSize + this . overlapSeconds )
425
+ )
426
+ ) ;
427
+ } else {
428
+ this . streamWaveform = this . streamWaveform . slice (
429
+ - ( this . streamWaveform . length - this . windowSize )
430
+ ) ;
431
+ }
432
+ this . isChunkDeleted = true ;
433
+ this . numOfChunks -- ;
434
+ }
421
435
}
422
436
this . isDecodingChunk = false ;
423
437
}
424
- // remove data from waveform, which was processed and saved to this.seqs
425
- while ( this . numOfChunks > 2 ) {
426
- if ( this . numberOfDeletedChunks == 0 ) {
427
- this . streamWaveform = this . streamWaveform . slice (
428
- - (
429
- this . streamWaveform . length -
430
- ( this . windowSize + this . overlapSeconds )
431
- )
432
- ) ;
433
- } else {
434
- this . streamWaveform = this . streamWaveform . slice (
435
- - ( this . streamWaveform . length - this . windowSize )
436
- ) ;
437
- }
438
- this . numberOfDeletedChunks ++ ;
439
- this . numOfChunks -- ;
440
- }
441
438
while ( this . numOfChunks > 0 && streamAction == STREAMING_ACTION . STOP ) {
442
- let seq = await this . decodeChunk ( this . chunks . at ( - this . numOfChunks ) ! ) ;
439
+ let seq = await this . decodeChunk ( this . chunks . at ( 0 ) ! ) ;
443
440
if ( this . numberOfDecodedChunks == 0 ) {
444
441
this . sequence = seq ;
445
442
this . decodedTranscribeCallback ( seq ) ;
@@ -465,7 +462,7 @@ export class SpeechToTextController {
465
462
466
463
private async tokenIdsToText ( tokenIds : number [ ] ) : Promise < string > {
467
464
try {
468
- return this . nativeTokenizer . decode ( tokenIds ) ;
465
+ return this . nativeTokenizer . decode ( tokenIds , true ) ;
469
466
} catch ( e ) {
470
467
this . onErrorCallback ?.(
471
468
new Error ( `An error has ocurred when decoding the token ids: ${ e } ` )
0 commit comments