feat: text embeddings dynamic size; new text embedding models consts (#273)

jakmro · web-flow · commit 524139090186 · 2025-05-12T11:46:22.000+02:00
## Description

text embeddings dynamic size. New text embedding models constants.
Default value for skipSpecialTokens.

### Type of change

- [ ] Bug fix (non-breaking change which fixes an issue)
- [x] New feature (non-breaking change which adds functionality)
- [ ] Breaking change (fix or feature that would cause existing
functionality to not work as expected)
- [ ] Documentation update (improves or adds clarity to existing
documentation)

### Tested on

- [x] iOS
- [x] Android

### Checklist

- [x] I have performed a self-review of my code
- [x] I have commented my code, particularly in hard-to-understand areas
- [ ] I have updated the documentation accordingly
- [x] My changes generate no new warnings
diff --git a/android/src/main/java/com/swmansion/rnexecutorch/models/TextEmbeddings/TextEmbeddingsModel.kt b/android/src/main/java/com/swmansion/rnexecutorch/models/TextEmbeddings/TextEmbeddingsModel.kt
@@ -18,12 +18,12 @@ class TextEmbeddingsModel(
   fun preprocess(input: String): Array<LongArray> {
     val inputIds = tokenizer.encode(input).map { it.toLong() }.toLongArray()
     val attentionMask = inputIds.map { if (it != 0L) 1L else 0L }.toLongArray()
-    return arrayOf(inputIds, attentionMask) // Shape: [2, max_length]
+    return arrayOf(inputIds, attentionMask) // Shape: [2, tokens]
   }
 
   fun postprocess(
-    modelOutput: FloatArray, // [max_length * embedding_dim]
-    attentionMask: LongArray, // [max_length]
+    modelOutput: FloatArray, // [tokens * embedding_dim]
+    attentionMask: LongArray, // [tokens]
   ): DoubleArray {
     val modelOutputDouble = modelOutput.map { it.toDouble() }.toDoubleArray()
     val embeddings = TextEmbeddingsUtils.meanPooling(modelOutputDouble, attentionMask)
diff --git a/ios/RnExecutorch/models/text_embeddings/TextEmbeddingsModel.mm b/ios/RnExecutorch/models/text_embeddings/TextEmbeddingsModel.mm
@@ -9,11 +9,11 @@ - (NSArray *)preprocess:(NSString *)input {
   for (int i = 0; i < [input_ids count]; i++) {
     [attention_mask addObject:@((int)([input_ids[i] intValue] != 0))];
   }
-  return @[ input_ids, attention_mask ]; // [2, max_length]
+  return @[ input_ids, attention_mask ]; // [2, tokens]
 }
 
-- (NSArray *)postprocess:(NSArray *)modelOutput // [max_length * embedding_dim]
-           attentionMask:(NSArray *)attentionMask // [max_length]
+- (NSArray *)postprocess:(NSArray *)modelOutput   // [tokens * embedding_dim]
+           attentionMask:(NSArray *)attentionMask // [tokens]
 {
   NSArray *embeddings = [TextEmbeddingsUtils meanPooling:modelOutput
                                            attentionMask:attentionMask];
@@ -22,7 +22,18 @@ - (NSArray *)postprocess:(NSArray *)modelOutput // [max_length * embedding_dim]
 
 - (NSArray *)runModel:(NSString *)input {
   NSArray *modelInput = [self preprocess:input];
-  NSArray *modelOutput = [self forward:modelInput];
+
+  NSMutableArray *inputTypes = [NSMutableArray arrayWithObjects:@4, @4, nil];
+  NSMutableArray *shapes = [NSMutableArray new];
+
+  NSNumber *tokenCount = @([modelInput[0] count]);
+  for (__unused id _ in modelInput) {
+    [shapes addObject:[NSMutableArray arrayWithObjects:@1, tokenCount, nil]];
+  }
+
+  NSArray *modelOutput = [self forward:modelInput
+                                shapes:shapes
+                            inputTypes:inputTypes];
   return [self postprocess:modelOutput[0] attentionMask:modelInput[1]];
 }
 
diff --git a/src/constants/modelUrls.ts b/src/constants/modelUrls.ts
@@ -168,6 +168,21 @@ export const ALL_MINILM_L6_V2 =
 export const ALL_MINILM_L6_V2_TOKENIZER =
   'https://huggingface.co/software-mansion/react-native-executorch-all-MiniLM-L6-v2/resolve/v0.4.0/tokenizer.json';
 
+export const ALL_MPNET_BASE_V2 =
+  'https://huggingface.co/software-mansion/react-native-executorch-all-mpnet-base-v2/resolve/v0.4.0/all-mpnet-base-v2_xnnpack.pte';
+export const ALL_MPNET_BASE_V2_TOKENIZER =
+  'https://huggingface.co/software-mansion/react-native-executorch-all-mpnet-base-v2/resolve/v0.4.0/tokenizer.json';
+
+export const MULTI_QA_MINILM_L6_COS_V1 =
+  'https://huggingface.co/software-mansion/react-native-executorch-multi-qa-MiniLM-L6-cos-v1/resolve/v0.4.0/multi-qa-MiniLM-L6-cos-v1_xnnpack.pte';
+export const MULTI_QA_MINILM_L6_COS_V1_TOKENIZER =
+  'https://huggingface.co/software-mansion/react-native-executorch-multi-qa-MiniLM-L6-cos-v1/resolve/v0.4.0/tokenizer.json';
+
+export const MULTI_QA_MPNET_BASE_DOT_V1 =
+  'https://huggingface.co/software-mansion/react-native-executorch-multi-qa-mpnet-base-dot-v1/resolve/v0.4.0/multi-qa-mpnet-base-dot-v1_xnnpack.pte';
+export const MULTI_QA_MPNET_BASE_DOT_V1_TOKENIZER =
+  'https://huggingface.co/software-mansion/react-native-executorch-multi-qa-mpnet-base-dot-v1/resolve/v0.4.0/tokenizer.json';
+
 // Backward compatibility
 export const LLAMA3_2_3B_URL = LLAMA3_2_3B;
 export const LLAMA3_2_3B_QLORA_URL = LLAMA3_2_3B_QLORA;
diff --git a/src/modules/natural_language_processing/TokenizerModule.ts b/src/modules/natural_language_processing/TokenizerModule.ts
@@ -11,7 +11,7 @@ export class TokenizerModule extends BaseModule {
 
   static async decode(
     input: number[],
-    skipSpecialTokens: boolean
+    skipSpecialTokens = false
   ): Promise<string> {
     return await this.nativeModule.decode(input, skipSpecialTokens);
   }

Original file line number	Diff line number	Diff line change
`@@ -11,7 +11,7 @@ export class TokenizerModule extends BaseModule {`
`11`	`11`
`12`	`12`	`static async decode(`
`13`	`13`	`input: number[],`
`14`		`- skipSpecialTokens: boolean`
	`14`	`+ skipSpecialTokens = false`
`15`	`15`	`): Promise<string> {`
`16`	`16`	`return await this.nativeModule.decode(input, skipSpecialTokens);`
`17`	`17`	`}`