@@ -224,11 +224,18 @@ impl<'o, 'tb> SegmentedTokenIter<'o, 'tb> {
224
224
}
225
225
}
226
226
227
- impl Token < ' _ > {
227
+ pub trait Normalize {
228
+ type Item ;
229
+ fn normalize ( self , options : & NormalizerOption ) -> Self :: Item ;
230
+ }
231
+
232
+ impl Normalize for Token < ' _ > {
233
+ type Item = Self ;
234
+
228
235
/// Normalize [`Token`] using all the compatible Normalizers.
229
236
///
230
237
/// A Latin `Token` would not be normalized the same as a Chinese `Token`.
231
- pub fn normalize ( mut self , options : & NormalizerOption ) -> Self {
238
+ fn normalize ( mut self , options : & NormalizerOption ) -> Self :: Item {
232
239
for normalizer in NORMALIZERS . iter ( ) {
233
240
if normalizer. should_normalize ( & self ) {
234
241
self = normalizer. normalize ( self , options) ;
@@ -247,12 +254,32 @@ impl Token<'_> {
247
254
}
248
255
}
249
256
257
+ impl < ' o > Normalize for & ' o str {
258
+ type Item = Cow < ' o , str > ;
259
+
260
+ /// Normalize an str.
261
+ fn normalize ( self , options : & NormalizerOption ) -> Self :: Item {
262
+ let mut normalized = Token { lemma : Cow :: Borrowed ( self ) , ..Default :: default ( ) } ;
263
+ for normalizer in NORMALIZERS . iter ( ) {
264
+ normalized = normalizer. normalize ( normalized, options) ;
265
+ }
266
+
267
+ if options. lossy {
268
+ for normalizer in LOSSY_NORMALIZERS . iter ( ) {
269
+ normalized = normalizer. normalize ( normalized, options) ;
270
+ }
271
+ }
272
+
273
+ normalized. lemma
274
+ }
275
+ }
276
+
250
277
#[ cfg( test) ]
251
278
mod test {
252
279
macro_rules! test_normalizer {
253
280
( $normalizer: expr, $tokens: expr, $normalizer_result: expr, $global_result: expr) => {
254
281
use super :: * ;
255
- use crate :: Token ;
282
+ use crate :: { Token , Normalize } ;
256
283
257
284
const TEST_NORMALIZER_OPTIONS : NormalizerOption = NormalizerOption {
258
285
create_char_map: true ,
0 commit comments