Skip to content

Commit 30075d2

Browse files
authored
Stricter URL recognition for autolinking (#11871)
Documentation editor: Use only regex-based URL recognition for pasted text; increase strictness of regex. Fixes #11697.
1 parent cf82c8c commit 30075d2

File tree

5 files changed

+8
-38
lines changed

5 files changed

+8
-38
lines changed

app/gui/src/project-view/components/DocumentationEditor/__tests__/textPaste.test.ts

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@ test.each([
1010
},
1111
{
1212
clipboard: 'example.com',
13-
inserted: '<https://example.com>',
1413
},
1514
{
1615
clipboard: 'http://example.com',
@@ -22,15 +21,12 @@ test.each([
2221
},
2322
{
2423
clipboard: 'example.com/Address containing spaces and a < character',
25-
inserted: '<https://example.com/Address containing spaces and a %3C character>',
2624
},
2725
{
2826
clipboard: 'example.com/Address resembling *bold syntax*',
29-
inserted: '<https://example.com/Address resembling %2Abold syntax%2A>',
3027
},
3128
{
3229
clipboard: 'Url: www.a.example.com, another: www.b.example.com',
33-
inserted: 'Url: <https://www.a.example.com>, another: <https://www.b.example.com>',
3430
},
3531
{
3632
clipboard: 'gopher:///no/autolinking/unusual/protocols',
@@ -53,6 +49,9 @@ test.each([
5349
{
5450
clipboard: 'example.com with trailing text',
5551
},
52+
{
53+
clipboard: 'Standard.Base.Math',
54+
},
5655
])('Auto-linking pasted text: $clipboard', ({ clipboard, inserted }) => {
5756
expect(transformPastedText(clipboard)).toBe(inserted ?? clipboard)
5857
})

app/gui/src/project-view/components/DocumentationEditor/textPaste.ts

Lines changed: 2 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -5,30 +5,10 @@ function uriEscapeChar(char: string) {
55
}
66

77
function toAutoLink(text: string) {
8-
return `<${addProtocolIfMissing(text).replaceAll(/[\][<>*`]/g, uriEscapeChar)}>`
9-
}
10-
11-
function addProtocolIfMissing(url: string) {
12-
return (URL.canParse(url) ? '' : 'https://') + url
13-
}
14-
15-
/**
16-
* Return whether the input is likely to be a URL, possibly with the protocol omitted. This matches more aggressively
17-
* than {@link LINKABLE_URL_REGEX}, but rejects some inputs that would technically make valid URLs but are more likely
18-
* to be other text.
19-
*/
20-
function isReasonableUrl(text: string) {
21-
const textWithProto = addProtocolIfMissing(text)
22-
let textAsUrl: URL | undefined
23-
try {
24-
textAsUrl = new URL(textWithProto)
25-
} catch {
26-
return false
27-
}
28-
return textAsUrl.protocol.match(/https?:/) && textAsUrl.hostname.match(/\.[a-z]/)
8+
return `<${text.replaceAll(/[\][<>*`]/g, uriEscapeChar)}>`
299
}
3010

3111
/** Convert the input to Markdown. This includes converting any likely URLs to <autolink>s. */
3212
export function transformPastedText(text: string): string {
33-
return isReasonableUrl(text) ? toAutoLink(text) : text.replaceAll(LINKABLE_URL_REGEX, toAutoLink)
13+
return text.replaceAll(LINKABLE_URL_REGEX, toAutoLink)
3414
}

app/gui/src/project-view/components/PlainTextEditor/___tests__/urlLinks.test.ts

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -50,15 +50,6 @@ test.each([
5050
},
5151
],
5252
},
53-
{
54-
text: 'Url: www.example.com',
55-
expectedLinks: [
56-
{
57-
text: 'www.example.com',
58-
href: 'https://www.example.com',
59-
},
60-
],
61-
},
6253
{
6354
text: 'Email: [email protected]',
6455
expectedLinks: [

app/gui/src/project-view/util/__tests__/link.test.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@ import { LINKABLE_EMAIL_REGEX, LINKABLE_URL_REGEX } from '../link'
33

44
const cases = {
55
urls: [
6-
'www.a.b',
76
'http://example.com',
87
'https://a.b',
98
'https://some.local',
@@ -19,6 +18,7 @@ const cases = {
1918
2019
],
2120
neither: [
21+
'www.a.b',
2222
'https://💩.la/',
2323
'a.b',
2424
'http://AsDf',

app/gui/src/project-view/util/link.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
/**
22
* Heuristic that matches strings suitable to be automatically interpreted as links. Recognizes absolute URLs with
3-
* `http` and `https` protocols, and some protocol-less strings that are likely to be URLs.
3+
* `http` and `https` protocols.
44
*/
55
export const LINKABLE_URL_REGEX =
6-
/(?:https?:\/\/(?:www\.)?|www\.)[-a-zA-Z0-9@:%._+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b[-a-zA-Z0-9()@:%_+.~#?&/=]*/g
6+
/https?:\/\/[-a-zA-Z0-9@:%._+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b[-a-zA-Z0-9()@:%_+.~#?&/=]*/g
77

88
/** Heuristic that matches strings suitable to be automatically interpreted as email addresses. */
99
export const LINKABLE_EMAIL_REGEX =

0 commit comments

Comments
 (0)