You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
it('should tokenize/detokenize a sentence with non-breaking spaces',function(){
148
158
vartextWith2S='I met Mr.\u00a0Gandhi. Mr.\u00a0Gandhi is a nice person.';
149
159
varsentences=nlp.readDoc(textWith2S).sentences();
@@ -154,6 +164,16 @@ describe( 'wink-nlp test-coverage and basic behavior', function () {
154
164
});
155
165
});
156
166
167
+
it('should tokenize/detokenize a sentence with non-regular spaces',function(){
168
+
vartextWith2S='I met Mr.\u2002\u2003\u2004\u2005\u2009\u200a\u202f\u205fGandhi. Mr.\u2002\u2003\u2004\u2005\u2009\u200a\u202f\u205fGandhi is a nice person.';
169
+
varsentences=nlp.readDoc(textWith2S).sentences();
170
+
varsentencesText=['I met Mr.\u2002\u2003\u2004\u2005\u2009\u200a\u202f\u205fGandhi.','Mr.\u2002\u2003\u2004\u2005\u2009\u200a\u202f\u205fGandhi is a nice person.'];
171
+
// Reconstruction.
172
+
sentences.each((s,k)=>{
173
+
expect(s.out()).to.equal(sentencesText[k]);
174
+
});
175
+
});
176
+
157
177
it('should tokenize/detokenize the entities\' value as text with non-breaking spaces',function(){
158
178
vartextWith2S='I purchased 10 mangoes on March\u00a010th for US$\u00a099.00.';
159
179
varentities=nlp.readDoc(textWith2S).entities();
@@ -165,6 +185,18 @@ describe( 'wink-nlp test-coverage and basic behavior', function () {
165
185
});
166
186
});
167
187
188
+
it('should tokenize/detokenize the entities\' value as text with non-regular spaces',function(){
189
+
vartextWith2S='I purchased 10 mangoes on March\u2002\u2003\u2004\u2005\u2009\u200a\u202f\u205f10th for US$\u2002\u2003\u2004\u2005\u2009\u200a\u202f\u205f99.00.';
it('should preserve non-regular spaces with mark up',function(){
210
+
vartextWith2S='I purchased mangoes on March\u2002\u2003\u2004\u2005\u2009\u200a\u202f\u205f10th for US$\u2002\u2003\u2004\u2005\u2009\u200a\u202f\u205f99.00.';
211
+
vardoc4mark=nlp.readDoc(textWith2S);
212
+
doc4mark.entities().each((e)=>e.markup());
213
+
varmarkedText='I purchased mangoes on <mark>March\u2002\u2003\u2004\u2005\u2009\u200a\u202f\u205f10th</mark> for <mark>US$\u2002\u2003\u2004\u2005\u2009\u200a\u202f\u205f99.00</mark>.';
it('should correctly reconstruct non-breaking spaces with its.precedingSpaces',function(){
178
219
vartext='U.S.A is my birth place. \u00a0 I was born\u00a0on 06.12.1924.';
179
220
varreconstructed=[];
@@ -182,6 +223,14 @@ describe( 'wink-nlp test-coverage and basic behavior', function () {
182
223
expect(reconstructed.join('')).to.equal(' \u00a0 I was born\u00a0on 06.12.1924.');
183
224
});
184
225
226
+
it('should correctly reconstruct non-regular spaces with its.precedingSpaces',function(){
227
+
vartext='U.S.A is my birth place. \u2002\u2003\u2004\u2005\u2009\u200a\u202f\u205f I was born\u2002\u2003\u2004\u2005\u2009\u200a\u202f\u205fon 06.12.1924.';
expect(reconstructed.join('')).to.equal(' \u2002\u2003\u2004\u2005\u2009\u200a\u202f\u205f I was born\u2002\u2003\u2004\u2005\u2009\u200a\u202f\u205fon 06.12.1924.');
0 commit comments