Skip to content

Commit a885520

Browse files
committed
emoji: Recognize word-aligned matches in ranking
Fixes #1068.
1 parent bb8935a commit a885520

File tree

2 files changed

+70
-42
lines changed

2 files changed

+70
-42
lines changed

lib/model/emoji.dart

+24-26
Original file line numberDiff line numberDiff line change
@@ -379,7 +379,15 @@ enum EmojiMatchQuality {
379379
/// The query matches a prefix of the emoji name, but not the whole name.
380380
prefix,
381381

382-
/// The query matches somewhere in the emoji name, but not at the start.
382+
/// The query matches starting at the start of a word in the emoji name,
383+
/// but not the start of the whole name.
384+
///
385+
/// For example a name "ab_cd_ef" would match queries "c" or "cd_e"
386+
/// at this level, but not a query "b_cd_ef".
387+
wordAligned,
388+
389+
/// The query matches somewhere in the emoji name,
390+
/// but not at the start of any word.
383391
other;
384392

385393
/// The best possible quality of match.
@@ -490,25 +498,17 @@ class EmojiAutocompleteQuery extends ComposeAutocompleteQuery {
490498
// See also commentary in [_rankResult].
491499

492500
// TODO(#1067) this assumes emojiName is already lower-case (and no diacritics)
493-
if (emojiName == _adjusted) return EmojiMatchQuality.exact;
494-
if (emojiName.startsWith(_adjusted)) return EmojiMatchQuality.prefix;
495-
if (_nameMatches(emojiName)) return EmojiMatchQuality.other;
496-
return null;
497-
}
498-
499-
bool _nameMatches(String emojiName) {
501+
if (emojiName == _adjusted) return EmojiMatchQuality.exact;
502+
if (emojiName.startsWith(_adjusted)) return EmojiMatchQuality.prefix;
503+
if (emojiName.contains(_sepAdjusted)) return EmojiMatchQuality.wordAligned;
500504
if (!_adjusted.contains(_separator)) {
501505
// If the query is a single token (doesn't contain a separator),
502-
// the match can be anywhere in the string.
503-
return emojiName.contains(_adjusted);
506+
// allow a match anywhere in the string, too.
507+
if (emojiName.contains(_adjusted)) return EmojiMatchQuality.other;
508+
} else {
509+
// Otherwise, require at least a word-aligned match.
504510
}
505-
506-
// If there is a separator in the query, then we
507-
// require the match to start at the start of a token.
508-
// (E.g. for 'ab_cd_ef', query could be 'ab_c' or 'cd_ef',
509-
// but not 'b_cd_ef'.)
510-
assert(!emojiName.startsWith(_adjusted)); // checked before calling this method
511-
return emojiName.contains(_sepAdjusted);
511+
return null;
512512
}
513513

514514
/// A measure of the result's quality in the context of the query,
@@ -521,11 +521,9 @@ class EmojiAutocompleteQuery extends ComposeAutocompleteQuery {
521521
// Compare sort_emojis in Zulip web:
522522
// https://github.com/zulip/zulip/blob/83a121c7e/web/shared/src/typeahead.ts#L322-L382
523523
//
524-
// Behavior differences we should or might copy, TODO(#1068):
525-
// * Web ranks matches starting at a word boundary ahead of
526-
// other non-prefix matches; we don't yet.
527-
// * Relatedly, web favors popular emoji only upon a word-aligned match.
524+
// Behavior differences we might copy, TODO:
528525
// * Web ranks each name of a Unicode emoji separately.
526+
// * Web recognizes a word-aligned match starting after [ /-] as well as [_].
529527
//
530528
// Behavior differences that web should probably fix, TODO(web):
531529
// * Among popular emoji with non-exact matches,
@@ -554,15 +552,15 @@ class EmojiAutocompleteQuery extends ComposeAutocompleteQuery {
554552
ReactionType.unicodeEmoji => false,
555553
};
556554
return switch (matchQuality) {
557-
EmojiMatchQuality.exact => 0,
558-
EmojiMatchQuality.prefix => isPopular ? 1 : isCustomEmoji ? 3 : 4,
559-
// TODO word-boundary vs. not
560-
EmojiMatchQuality.other => isPopular ? 2 : isCustomEmoji ? 5 : 6,
555+
EmojiMatchQuality.exact => 0,
556+
EmojiMatchQuality.prefix => isPopular ? 1 : isCustomEmoji ? 3 : 5,
557+
EmojiMatchQuality.wordAligned => isPopular ? 2 : isCustomEmoji ? 4 : 6,
558+
EmojiMatchQuality.other => isCustomEmoji ? 7 : 8,
561559
};
562560
}
563561

564562
/// The number of possible values returned by [_rankResult].
565-
static const _numResultRanks = 7;
563+
static const _numResultRanks = 9;
566564

567565
@override
568566
String toString() {

test/model/emoji_test.dart

+46-16
Original file line numberDiff line numberDiff line change
@@ -384,7 +384,7 @@ void main() {
384384
check(matchOfName('o', 'open_book')).prefix;
385385
check(matchOfName('open', 'open_book')).prefix;
386386
check(matchOfName('pe', 'open_book')).other;
387-
check(matchOfName('boo', 'open_book')).other;
387+
check(matchOfName('boo', 'open_book')).wordAligned;
388388
check(matchOfName('ok', 'open_book')).other;
389389
});
390390

@@ -396,7 +396,7 @@ void main() {
396396
check(matchOfName('pen_', 'open_book')).none;
397397
check(matchOfName('n_b', 'open_book')).none;
398398

399-
check(matchOfName('blue_dia', 'large_blue_diamond')).other;
399+
check(matchOfName('blue_dia', 'large_blue_diamond')).wordAligned;
400400
});
401401

402402
test('spaces in query behave as underscores', () {
@@ -407,7 +407,7 @@ void main() {
407407
check(matchOfName('pen ', 'open_book')).none;
408408
check(matchOfName('n b', 'open_book')).none;
409409

410-
check(matchOfName('blue dia', 'large_blue_diamond')).other;
410+
check(matchOfName('blue dia', 'large_blue_diamond')).wordAligned;
411411
});
412412

413413
test('query is lower-cased', () {
@@ -426,13 +426,17 @@ void main() {
426426
check(matchOfNames('open b', ['x', 'open_book'])).prefix;
427427
check(matchOfNames('pen_', ['x', 'open_book'])).none;
428428

429+
check(matchOfNames('blue_dia', ['x', 'large_blue_diamond'])).wordAligned;
430+
429431
check(matchOfNames('Smi', ['x', 'smile'])).prefix;
430432
});
431433

432434
test('best match among name and aliases prevails', () {
433-
check(matchOfNames('a', ['ab', 'a', 'ba', 'x'])).exact;
434-
check(matchOfNames('a', ['ba', 'ab', 'x'])).prefix;
435-
check(matchOfNames('a', ['ba', 'ab'])).prefix;
435+
check(matchOfNames('a', ['ab', 'a', 'b_a', 'ba', 'x'])).exact;
436+
check(matchOfNames('a', ['ba', 'ab', 'b_a', 'x'])).prefix;
437+
check(matchOfNames('a', ['ba', 'ab', 'b_a'])).prefix;
438+
check(matchOfNames('a', ['ba', 'b_a', 'x'])).wordAligned;
439+
check(matchOfNames('a', ['b_a', 'ba'])).wordAligned;
436440
check(matchOfNames('a', ['ba', 'x'])).other;
437441
check(matchOfNames('a', ['x', 'y', 'z'])).none;
438442
});
@@ -478,7 +482,7 @@ void main() {
478482
check(matchOf('eqeq', realmCandidate('eqeq'))).exact;
479483
check(matchOf('open_', realmCandidate('open_book'))).prefix;
480484
check(matchOf('n_b', realmCandidate('open_book'))).none;
481-
check(matchOf('blue dia', realmCandidate('large_blue_diamond'))).other;
485+
check(matchOf('blue dia', realmCandidate('large_blue_diamond'))).wordAligned;
482486
check(matchOf('Smi', realmCandidate('smile'))).prefix;
483487
});
484488

@@ -513,10 +517,12 @@ void main() {
513517

514518
final octopus = unicode(['octopus'], emojiCode: '1f419');
515519
final workingOnIt = unicode(['working_on_it'], emojiCode: '1f6e0');
520+
final love = unicode(['love'], emojiCode: '2764'); // aka :heart:
516521

517-
test('ranks exact before prefix before other match', () {
522+
test('ranks match quality exact/prefix/word-aligned/other', () {
518523
checkPrecedes('o', unicode(['o']), unicode(['onion']));
519-
checkPrecedes('o', unicode(['onion']), unicode(['book']));
524+
checkPrecedes('o', unicode(['onion']), unicode(['squared_ok']));
525+
checkPrecedes('o', unicode(['squared_ok']), unicode(['book']));
520526
});
521527

522528
test('ranks popular before realm before other Unicode', () {
@@ -535,28 +541,51 @@ void main() {
535541
checkPrecedes('o', octopus, realmCandidate('open_book'));
536542
});
537543

538-
test('ranks popular-vs-not more significant than prefix/other', () {
539-
// Popular other beats realm prefix.
544+
test('ranks popular-vs-not more significant than prefix/word-aligned', () {
545+
// Popular word-aligned beats realm prefix.
540546
checkPrecedes('o', workingOnIt, realmCandidate('open_book'));
541547
});
542548

543-
test('ranks prefix/other more significant than custom/other', () {
544-
// Generic Unicode prefix beats realm other.
545-
checkPrecedes('o', unicode(['ok']), realmCandidate('yo'));
549+
test('ranks popular as if generic when non-word-aligned', () {
550+
// Generic word-aligned beats popular other.
551+
checkPrecedes('o', unicode(['squared_ok']), love);
552+
// Popular other ranks below even custom other…
553+
checkPrecedes('o', realmCandidate('yo'), love);
554+
// … and same as generic Unicode other.
555+
checkSameRank('o', love, unicode(['book']));
556+
557+
// And that emoji really does count as popular,
558+
// beating custom emoji when both have a prefix match.
559+
checkPrecedes('l', love, realmCandidate('logs'));
560+
});
561+
562+
test('ranks custom/other more significant than prefix/word-aligned', () {
563+
// Custom word-aligned beats generic prefix.
564+
checkPrecedes('o', realmCandidate('laughing_blue_octopus'),
565+
unicode(['ok']));
566+
});
567+
568+
test('ranks word-aligned/other more significant than custom/other', () {
569+
// Generic Unicode word-aligned beats realm other.
570+
checkPrecedes('o', unicode(['squared_ok']), realmCandidate('yo'));
546571
});
547572

548573
test('full list of ranks', () {
549574
check([
550575
rankOf('o', unicode(['o'])), // exact (generic)
551576
rankOf('o', octopus), // prefix popular
552-
rankOf('o', workingOnIt), // other popular
577+
rankOf('o', workingOnIt), // word-aligned popular
553578
rankOf('o', realmCandidate('open_book')), // prefix realm
554579
rankOf('z', zulipCandidate()), // == prefix :zulip:
580+
rankOf('y', realmCandidate('thank_you')), // word-aligned realm
581+
// (word-aligned :zulip: is impossible because the name is one word)
555582
rankOf('o', unicode(['ok'])), // prefix generic
583+
rankOf('o', unicode(['squared_ok'])), // word-aligned generic
556584
rankOf('o', realmCandidate('yo')), // other realm
557585
rankOf('p', zulipCandidate()), // == other :zulip:
558586
rankOf('o', unicode(['book'])), // other generic
559-
]).deepEquals([0, 1, 2, 3, 3, 4, 5, 5, 6]);
587+
rankOf('o', love), // == other popular
588+
]).deepEquals([0, 1, 2, 3, 3, 4, 5, 6, 7, 7, 8, 8]);
560589
});
561590
});
562591
}
@@ -585,6 +614,7 @@ extension EmojiCandidateChecks on Subject<EmojiCandidate> {
585614
extension EmojiMatchQualityChecks on Subject<EmojiMatchQuality?> {
586615
void get exact => equals(EmojiMatchQuality.exact);
587616
void get prefix => equals(EmojiMatchQuality.prefix);
617+
void get wordAligned => equals(EmojiMatchQuality.wordAligned);
588618
void get other => equals(EmojiMatchQuality.other);
589619
void get none => isNull();
590620
}

0 commit comments

Comments
 (0)