Skip to content

Commit aa75693

Browse files
committed
Ignore punctuation characters when trying to match mentions
1 parent 0a589c3 commit aa75693

File tree

5 files changed

+226
-157
lines changed

5 files changed

+226
-157
lines changed

src/sidebar/components/MarkdownEditor.tsx

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -29,17 +29,17 @@ import {
2929
} from 'preact/hooks';
3030

3131
import { isMacOS } from '../../shared/user-agent';
32+
import {
33+
getContainingMentionOffsets,
34+
termBeforePosition,
35+
} from '../helpers/mentions';
3236
import {
3337
LinkType,
3438
convertSelectionToLink,
3539
toggleBlockStyle,
3640
toggleSpanStyle,
3741
} from '../markdown-commands';
3842
import type { EditorState } from '../markdown-commands';
39-
import {
40-
getContainingWordOffsets,
41-
termBeforePosition,
42-
} from '../util/term-before-position';
4343
import { getCaretCoordinates } from '../util/textarea-caret-position';
4444
import MarkdownView from './MarkdownView';
4545
import MentionPopover from './MentionPopover';
@@ -264,7 +264,7 @@ function TextArea({
264264
(suggestion: UserItem) => {
265265
const textarea = textareaRef.current!;
266266
const { value } = textarea;
267-
const { start, end } = getContainingWordOffsets(
267+
const { start, end } = getContainingMentionOffsets(
268268
value,
269269
textarea.selectionStart,
270270
);

src/sidebar/helpers/mentions.ts

Lines changed: 69 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,19 @@
11
import type { Mention } from '../../types/api';
22
import { buildAccountID } from './account-id';
33

4+
// Pattern that matches characters treated as the boundary of a mention.
5+
const BOUNDARY_CHARS = String.raw`[\s,.;:|?!'"\-()[\]{}]`;
6+
7+
// Pattern that matches Hypothesis usernames.
8+
// See https://github.com/hypothesis/h/blob/797d9a4/h/models/user.py#L25
9+
const USERNAME_PAT = '[A-Za-z0-9_][A-Za-z0-9._]+[A-Za-z0-9_]';
10+
11+
// Pattern that finds user mentions in text.
12+
const MENTIONS_PAT = new RegExp(
13+
`(^|${BOUNDARY_CHARS})@(${USERNAME_PAT})(?=${BOUNDARY_CHARS}|$)`,
14+
'g',
15+
);
16+
417
/**
518
* Wrap all occurrences of @mentions in provided text into the corresponding
619
* special tag, as long as they are surrounded by "empty" space (space, tab, new
@@ -10,22 +23,15 @@ import { buildAccountID } from './account-id';
1023
* `<a data-hyp-mention data-userid="acct:[email protected]">@someuser</a>`
1124
*/
1225
export function wrapMentions(text: string, authority: string): string {
13-
return text.replace(
14-
// Capture both the potential empty character before the mention (space, tab
15-
// or new line), and the term following the `@` character.
16-
// When we build the mention tag, we need to prepend that empty character to
17-
// avoid altering the spacing and structure of the text.
18-
/(^|\s)@(\w+)(?=\s|$)/g,
19-
(match, precedingWhitespace, username) => {
20-
const tag = document.createElement('a');
21-
22-
tag.setAttribute('data-hyp-mention', '');
23-
tag.setAttribute('data-userid', buildAccountID(username, authority));
24-
tag.textContent = `@${username}`;
25-
26-
return `${precedingWhitespace}${tag.outerHTML}`;
27-
},
28-
);
26+
return text.replace(MENTIONS_PAT, (match, precedingChar, username) => {
27+
const tag = document.createElement('a');
28+
29+
tag.setAttribute('data-hyp-mention', '');
30+
tag.setAttribute('data-userid', buildAccountID(username, authority));
31+
tag.textContent = `@${username}`;
32+
33+
return `${precedingChar}${tag.outerHTML}`;
34+
});
2935
}
3036

3137
/**
@@ -115,3 +121,50 @@ export function renderMentionTags(
115121

116122
return foundMentions;
117123
}
124+
125+
/**
126+
* Returns the word at a specific position in a string, surrounded by empty
127+
* characters or punctuation characters.
128+
*/
129+
export function termBeforePosition(text: string, position: number): string {
130+
const { start } = getContainingMentionOffsets(text, position);
131+
return text.slice(start, position);
132+
}
133+
134+
export type WordOffsets = {
135+
start: number;
136+
end: number;
137+
};
138+
139+
/**
140+
* Returns the `start` and `end` positions for the word or mention that overlaps
141+
* with provided reference position.
142+
*
143+
* For example, given the text "hello @hypothesis", and the reference position 9
144+
* (which corresponds to the `y` character) it will return the start and end of
145+
* the `@hypothesis` mention, hence { start: 6, end: 17 }.
146+
*
147+
* Useful to get the offsets of the mention matching the caret position in text
148+
* inputs and textareas.
149+
*/
150+
export function getContainingMentionOffsets(
151+
text: string,
152+
referencePosition: number,
153+
): WordOffsets {
154+
const precedingText = text.slice(0, referencePosition);
155+
const matches = [...precedingText.matchAll(new RegExp(BOUNDARY_CHARS, 'g'))];
156+
const precedingCharPos =
157+
matches.length > 0 ? Math.max(...matches.map(match => match.index)) : -1;
158+
159+
const subsequentCharPos = text
160+
.slice(referencePosition)
161+
.search(new RegExp(BOUNDARY_CHARS));
162+
163+
return {
164+
start: precedingCharPos + 1,
165+
end:
166+
subsequentCharPos === -1
167+
? text.length
168+
: referencePosition + subsequentCharPos,
169+
};
170+
}

src/sidebar/helpers/test/mentions-test.js

Lines changed: 152 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,10 @@
1-
import { renderMentionTags, unwrapMentions, wrapMentions } from '../mentions';
1+
import {
2+
renderMentionTags,
3+
unwrapMentions,
4+
wrapMentions,
5+
getContainingMentionOffsets,
6+
termBeforePosition,
7+
} from '../mentions';
28

39
const mentionTag = (username, authority) =>
410
`<a data-hyp-mention="" data-userid="acct:${username}@${authority}">@${username}</a>`;
@@ -40,16 +46,35 @@ look at ${mentionTag('foo', 'example.com')} comment`,
4046
},
4147
// Multiple mentions
4248
{
43-
text: 'Hey @jane look at this quote from @rob',
49+
text: 'Hey @jane, look at this quote from @rob',
4450
authority: 'example.com',
45-
textWithTags: `Hey ${mentionTag('jane', 'example.com')} look at this quote from ${mentionTag('rob', 'example.com')}`,
51+
textWithTags: `Hey ${mentionTag('jane', 'example.com')}, look at this quote from ${mentionTag('rob', 'example.com')}`,
52+
},
53+
// Mentions wrapped in punctuation chars
54+
{
55+
text: '(@jane) {@rob} and @john?',
56+
authority: 'example.com',
57+
textWithTags: `(${mentionTag('jane', 'example.com')}) {${mentionTag('rob', 'example.com')}} and ${mentionTag('john', 'example.com')}?`,
58+
},
59+
// username-like patterns with invalid chars should be ignored
60+
{
61+
text: 'Hello @not+a/user=name',
62+
authority: 'example.com',
63+
textWithTags: `Hello @not+a/user=name`,
4664
},
4765
// Email addresses should be ignored
4866
{
4967
text: 'Ignore email: [email protected]',
5068
authority: 'example.com',
5169
textWithTags: 'Ignore email: [email protected]',
5270
},
71+
// Trailing dots should not be considered part of the mention, but dots
72+
// in-between should
73+
{
74+
text: 'Hello @jane.doe.',
75+
authority: 'example.com',
76+
textWithTags: `Hello ${mentionTag('jane.doe', 'example.com')}.`,
77+
},
5378
].forEach(({ text, authority, textWithTags }) => {
5479
describe('wrapMentions', () => {
5580
it('wraps every mention in a mention tag', () => {
@@ -119,3 +144,127 @@ describe('renderMentionTags', () => {
119144
assert.equal(fourthMention, '@user_id_missing');
120145
});
121146
});
147+
148+
// To make these tests more predictable, we place the `$` sign in the position
149+
// to be checked. That way it's easier to see what is the "word" preceding it.
150+
// The test will then get the `$` sign index and remove it from the text
151+
// before passing it to `termBeforePosition`.
152+
[
153+
// First and last positions
154+
{
155+
text: '$Hello world',
156+
expectedTerm: '',
157+
expectedOffsets: { start: 0, end: 5 },
158+
},
159+
{
160+
text: 'Hello world$',
161+
expectedTerm: 'world',
162+
expectedOffsets: { start: 6, end: 11 },
163+
},
164+
165+
// Position in the middle of words
166+
{
167+
text: 'Hell$o world',
168+
expectedTerm: 'Hell',
169+
expectedOffsets: { start: 0, end: 5 },
170+
},
171+
{
172+
text: 'Hello wor$ld',
173+
expectedTerm: 'wor',
174+
expectedOffsets: { start: 6, end: 11 },
175+
},
176+
177+
// Position preceded by "empty space"
178+
{
179+
text: 'Hello $world',
180+
expectedTerm: '',
181+
expectedOffsets: { start: 6, end: 11 },
182+
},
183+
{
184+
text: `Text with
185+
multiple
186+
$
187+
lines
188+
`,
189+
expectedTerm: '',
190+
expectedOffsets: { start: 31, end: 31 },
191+
},
192+
193+
// Position preceded by/in the middle of a word for multi-line text
194+
{
195+
text: `Text with$
196+
multiple
197+
198+
lines
199+
`,
200+
expectedTerm: 'with',
201+
expectedOffsets: { start: 5, end: 9 },
202+
},
203+
{
204+
text: `Text with
205+
multiple
206+
207+
li$nes
208+
`,
209+
expectedTerm: 'li',
210+
expectedOffsets: { start: 32, end: 37 },
211+
},
212+
213+
// Including punctuation characters
214+
...[
215+
',',
216+
'.',
217+
';',
218+
':',
219+
'|',
220+
'?',
221+
'!',
222+
"'",
223+
'"',
224+
'-',
225+
'(',
226+
')',
227+
'[',
228+
']',
229+
'{',
230+
'}',
231+
].flatMap(char => [
232+
{
233+
text: `Foo${char}$ bar`,
234+
expectedTerm: '',
235+
expectedOffsets: { start: 4, end: 4 },
236+
},
237+
{
238+
text: `${char}Foo$ bar`,
239+
expectedTerm: 'Foo',
240+
expectedOffsets: { start: 1, end: 4 },
241+
},
242+
{
243+
text: `hello ${char}fo$o${char} bar`,
244+
expectedTerm: 'fo',
245+
expectedOffsets: { start: 7, end: 10 },
246+
},
247+
]),
248+
].forEach(({ text, expectedTerm, expectedOffsets }) => {
249+
// Get the position of the `$` sign in the text, then remove it
250+
const position = text.indexOf('$');
251+
const textWithoutDollarSign = text.replace('$', '');
252+
253+
describe('termBeforePosition', () => {
254+
it('returns the term right before provided position', () => {
255+
assert.equal(
256+
termBeforePosition(textWithoutDollarSign, position),
257+
expectedTerm,
258+
);
259+
});
260+
});
261+
262+
describe('getContainingMentionOffsets', () => {
263+
it('returns expected offsets', () => {
264+
assert.deepEqual(
265+
getContainingMentionOffsets(textWithoutDollarSign, position),
266+
expectedOffsets,
267+
);
268+
});
269+
});
270+
});

src/sidebar/util/term-before-position.ts

Lines changed: 0 additions & 43 deletions
This file was deleted.

0 commit comments

Comments
 (0)