Skip to content

Commit 2145796

Browse files
feat: add option to pass skip special tokens flag to decode (#162)
## Description This PR adds skipSpecialTokens flag to argument list of decode method ### Type of change - [ ] Bug fix (non-breaking change which fixes an issue) - [x] New feature (non-breaking change which adds functionality) - [ ] Breaking change (fix or feature that would cause existing functionality to not work as expected) - [ ] Documentation update (improves or adds clarity to existing documentation) ### Tested on - [x] iOS - [x] Android ### Testing instructions <!-- Provide step-by-step instructions on how to test your changes. Include setup details if necessary. --> ### Screenshots <!-- Add screenshots here, if applicable --> ### Related issues <!-- Link related issues here using #issue-number --> ### Checklist - [x] I have performed a self-review of my code - [ ] I have commented my code, particularly in hard-to-understand areas - [ ] I have updated the documentation accordingly - [x] My changes generate no new warnings ### Additional notes <!-- Include any additional information, assumptions, or context that reviewers might need to understand this PR. -->
1 parent 346f74d commit 2145796

File tree

12 files changed

+24
-10
lines changed

12 files changed

+24
-10
lines changed

android/libs/executorch.aar

-46.8 MB
Binary file not shown.

android/src/main/java/com/swmansion/rnexecutorch/Tokenizer.kt

+2-1
Original file line numberDiff line numberDiff line change
@@ -32,10 +32,11 @@ class Tokenizer(
3232

3333
override fun decode(
3434
input: ReadableArray,
35+
skipSpecialTokens: Boolean,
3536
promise: Promise,
3637
) {
3738
try {
38-
promise.resolve(tokenizer.decode(createIntArray(input)))
39+
promise.resolve(tokenizer.decode(createIntArray(input), skipSpecialTokens))
3940
} catch (e: Exception) {
4041
promise.reject(e.message!!, ETError.UndefinedError.toString())
4142
}

ios/ExecutorchLib.xcframework/ios-arm64-simulator/ExecutorchLib.framework/Headers/HuggingFaceTokenizer.h

+2
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
- (instancetype)initWithTokenizerPath:(NSString *)tokenizerPath;
66
- (NSArray<NSNumber *> *)encode:(NSString *)text;
77
- (NSString *)decode:(NSArray<NSNumber *> *)tokenIds;
8+
- (NSString *)decode:(NSArray<NSNumber *> *)tokenIds
9+
skipSpecialTokens:(BOOL)skipSpecialTokens;
810
- (NSUInteger)getVocabSize;
911
- (NSString *)idToToken:(NSInteger)tokenId;
1012
- (NSInteger)tokenToId:(NSString *)token;

ios/ExecutorchLib.xcframework/ios-arm64-simulator/ExecutorchLib.framework/_CodeSignature/CodeResources

+2-2
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
</data>
1111
<key>Headers/HuggingFaceTokenizer.h</key>
1212
<data>
13-
cVZsliuTmV2umgK601d6PamTRSw=
13+
OJmjnUO2q4YZvXqqz4vCymvZFP0=
1414
</data>
1515
<key>Headers/LLaMARunner.h</key>
1616
<data>
@@ -34,7 +34,7 @@
3434
<dict>
3535
<key>hash2</key>
3636
<data>
37-
0ETM5qw12+W8ULx2zP2UkVomFrBRnwAr8I7po2ACk/k=
37+
KIuu4Sb2dgJ9swgF7bGppf5T9KjRhVQ6ERGHAvs4D/Q=
3838
</data>
3939
</dict>
4040
<key>Headers/LLaMARunner.h</key>
Binary file not shown.

ios/ExecutorchLib.xcframework/ios-arm64/ExecutorchLib.framework/Headers/HuggingFaceTokenizer.h

+2
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
- (instancetype)initWithTokenizerPath:(NSString *)tokenizerPath;
66
- (NSArray<NSNumber *> *)encode:(NSString *)text;
77
- (NSString *)decode:(NSArray<NSNumber *> *)tokenIds;
8+
- (NSString *)decode:(NSArray<NSNumber *> *)tokenIds
9+
skipSpecialTokens:(BOOL)skipSpecialTokens;
810
- (NSUInteger)getVocabSize;
911
- (NSString *)idToToken:(NSInteger)tokenId;
1012
- (NSInteger)tokenToId:(NSString *)token;

ios/ExecutorchLib.xcframework/ios-arm64/ExecutorchLib.framework/_CodeSignature/CodeResources

+2-2
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
</data>
1111
<key>Headers/HuggingFaceTokenizer.h</key>
1212
<data>
13-
cVZsliuTmV2umgK601d6PamTRSw=
13+
OJmjnUO2q4YZvXqqz4vCymvZFP0=
1414
</data>
1515
<key>Headers/LLaMARunner.h</key>
1616
<data>
@@ -34,7 +34,7 @@
3434
<dict>
3535
<key>hash2</key>
3636
<data>
37-
0ETM5qw12+W8ULx2zP2UkVomFrBRnwAr8I7po2ACk/k=
37+
KIuu4Sb2dgJ9swgF7bGppf5T9KjRhVQ6ERGHAvs4D/Q=
3838
</data>
3939
</dict>
4040
<key>Headers/LLaMARunner.h</key>

ios/RnExecutorch/Tokenizer.mm

+4-3
Original file line numberDiff line numberDiff line change
@@ -32,10 +32,11 @@ - (void)encode:(NSString *)input
3232
}
3333

3434
- (void)decode:(NSArray *)input
35-
resolve:(RCTPromiseResolveBlock)resolve
36-
reject:(RCTPromiseRejectBlock)reject {
35+
skipSpecialTokens:(BOOL)skipSpecialTokens
36+
resolve:(RCTPromiseResolveBlock)resolve
37+
reject:(RCTPromiseRejectBlock)reject {
3738
@try {
38-
resolve([tokenizer decode:input]);
39+
resolve([tokenizer decode:input skipSpecialTokens:skipSpecialTokens]);
3940
} @catch (NSException *exception) {
4041
reject(@"tokenizer_error",
4142
[NSString stringWithFormat:@"%@", exception.reason], nil);

src/native/NativeTokenizer.ts

+1-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ import { TurboModuleRegistry } from 'react-native';
33

44
export interface Spec extends TurboModule {
55
load(tokenizerSource: string): Promise<number>;
6-
decode(input: number[]): Promise<string>;
6+
decode(input: number[], skipSpecialTokens: boolean): Promise<string>;
77
encode(input: string): Promise<number[]>;
88
getVocabSize(): Promise<number>;
99
idToToken(tokenId: number): Promise<string>;

third-party/ios/ExecutorchLib/ExecutorchLib/Exported/HuggingFaceTokenizer.h

+2
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
- (instancetype)initWithTokenizerPath:(NSString *)tokenizerPath;
66
- (NSArray<NSNumber *> *)encode:(NSString *)text;
77
- (NSString *)decode:(NSArray<NSNumber *> *)tokenIds;
8+
- (NSString *)decode:(NSArray<NSNumber *> *)tokenIds
9+
skipSpecialTokens:(BOOL)skipSpecialTokens;
810
- (NSUInteger)getVocabSize;
911
- (NSString *)idToToken:(NSInteger)tokenId;
1012
- (NSInteger)tokenToId:(NSString *)token;

third-party/ios/ExecutorchLib/ExecutorchLib/Exported/HuggingFaceTokenizer.mm

+7-1
Original file line numberDiff line numberDiff line change
@@ -44,12 +44,18 @@ - (instancetype)initWithTokenizerPath:(NSString *)tokenizerPath {
4444
}
4545

4646
- (NSString *)decode:(NSArray<NSNumber *> *)tokenIds {
47+
return [self decode:tokenIds skipSpecialTokens:NO];
48+
}
49+
50+
- (NSString *)decode:(NSArray<NSNumber *> *)tokenIds
51+
skipSpecialTokens:(BOOL)skipSpecialTokens {
4752
std::vector<int32_t> stdTokenIds;
4853
stdTokenIds.reserve([tokenIds count]);
4954
for (NSNumber *tokenId in tokenIds) {
5055
stdTokenIds.push_back([tokenId intValue]);
5156
}
52-
std::string decodedString = _tokenizer->Decode(stdTokenIds);
57+
std::string decodedString =
58+
_tokenizer->Decode(stdTokenIds, skipSpecialTokens);
5359
return [NSString stringWithUTF8String:decodedString.c_str()];
5460
}
5561

0 commit comments

Comments
 (0)