Skip to content

Commit 0bb9f0b

Browse files
Improves branch name autolink matching logic (closes #3894)
Refactors the branch name autolink matching logic to improve accuracy and reduce false positives. - Introduces multiple regexes for branch name matching based on prefix and issue number patterns. - Updates test - Filters remote autolinks for branch names to only include non-dynamic autolinks of type 'branch'. - Fixes issue where integration autolinks were incorrectly collected for branch autolinking. - Sorts refsets so that issue integrations are checked first for matches. - Limits branch autolink matches to 1 maximum per branch name.
1 parent 120864f commit 0bb9f0b

File tree

5 files changed

+96
-74
lines changed

5 files changed

+96
-74
lines changed

src/autolinks/__tests__/autolinks.test.ts

+26-16
Original file line numberDiff line numberDiff line change
@@ -24,24 +24,34 @@ function assertAutolinks(actual: Map<string, Autolink>, expected: Array<string>)
2424

2525
suite('Autolinks Test Suite', () => {
2626
test('Branch name autolinks', () => {
27-
assertAutolinks(getBranchAutolinks('123', mockRefSets()), ['test/123']);
28-
assertAutolinks(getBranchAutolinks('feature/123', mockRefSets()), ['test/123']);
27+
// Matches under rule 1 (prefixed 2+ digit number followed by end of string)
28+
assertAutolinks(getBranchAutolinks('feature/PRE-12', mockRefSets(['PRE-'])), ['test/12']);
29+
// Matches under rule 1 (prefixed 2+ digit number followed by a separator)
30+
assertAutolinks(getBranchAutolinks('feature/PRE-12.2', mockRefSets(['PRE-'])), ['test/12']);
31+
// Matches under rule 2 (feature/ followed by a 2+ digit number and nothing after it)
32+
assertAutolinks(getBranchAutolinks('feature/12', mockRefSets()), ['test/12']);
33+
// Matches under rule 2 (feature/ followed by a 2+ digit number and a separator after it)
34+
assertAutolinks(getBranchAutolinks('feature/12.test-bug', mockRefSets()), ['test/12']);
35+
// Matches under rule 3 (3+ digit issue number preceded by at least two non-slash, non-digit characters)
2936
assertAutolinks(getBranchAutolinks('feature/PRE-123', mockRefSets()), ['test/123']);
30-
assertAutolinks(getBranchAutolinks('123.2', mockRefSets()), ['test/123', 'test/2']);
37+
// Matches under rule 3 (3+ digit issue number followed by at least two non-slash, non-digit characters)
38+
assertAutolinks(getBranchAutolinks('123abc', mockRefSets()), ['test/123']);
39+
// Matches under rule 3 (3+ digit issue number is the entire branch name)
40+
assertAutolinks(getBranchAutolinks('123', mockRefSets()), ['test/123']);
41+
// Fails all rules because it is a 1 digit number.
42+
assertAutolinks(getBranchAutolinks('feature/3', mockRefSets([''])), []);
43+
// Fails all rules because it is a 1 digit number.
44+
assertAutolinks(getBranchAutolinks('feature/3', mockRefSets(['PRE-'])), []);
45+
// Fails all rules. In rule 3, fails because one of the two following characters is a number.
46+
assertAutolinks(getBranchAutolinks('123.2', mockRefSets()), []);
47+
// Fails all rules. In rule 3, fails because the issue is a full section (a slash before and after it).
48+
assertAutolinks(getBranchAutolinks('improvement/123/ui-fix', mockRefSets()), []);
49+
// Fails all rules. 2&3 because the ref is prefixed, and 1 because the branch name doesn't have the ref's prefix.
3150
assertAutolinks(getBranchAutolinks('123', mockRefSets(['PRE-'])), []);
32-
assertAutolinks(getBranchAutolinks('feature/123', mockRefSets(['PRE-'])), []);
33-
assertAutolinks(getBranchAutolinks('feature/2-fa/123', mockRefSets([''])), ['test/123', 'test/2']);
34-
assertAutolinks(getBranchAutolinks('feature/2-fa/123', mockRefSets([''])), ['test/123', 'test/2']);
35-
// incorrectly solved cat worths to compare the blocks length so that the less block size (without possible link) is more likely a link
36-
assertAutolinks(getBranchAutolinks('feature/2-fa/3', mockRefSets([''])), ['test/2', 'test/3']);
37-
assertAutolinks(getBranchAutolinks('feature/PRE-123', mockRefSets(['PRE-'])), ['test/123']);
38-
assertAutolinks(getBranchAutolinks('feature/PRE-123.2', mockRefSets(['PRE-'])), ['test/123']);
39-
assertAutolinks(getBranchAutolinks('feature/3-123-PRE-123', mockRefSets(['PRE-'])), ['test/123']);
40-
assertAutolinks(
41-
getBranchAutolinks('feature/3-123-PRE-123', mockRefSets(['', 'PRE-'])),
42-
43-
['test/123', 'test/3'],
44-
);
51+
// Fails all rules. 2 because the issue is not immediately following the feature/ section, and 3 because it is a full section.
52+
assertAutolinks(getBranchAutolinks('feature/2-fa/123', mockRefSets([''])), []);
53+
// Fails all rules. 2 because of non-separator character after issue number, and 3 because it has end-of-string two character after.
54+
assertAutolinks(getBranchAutolinks('feature/123a', mockRefSets(['PRE-'])), []);
4555
});
4656

4757
test('Commit message autolinks', () => {

src/autolinks/autolinksProvider.ts

+10-6
Original file line numberDiff line numberDiff line change
@@ -122,9 +122,13 @@ export class AutolinksProvider implements Disposable {
122122
}
123123

124124
/** Collects remote provider autolink references into @param refsets */
125-
private collectRemoteAutolinks(remote: GitRemote | undefined, refsets: RefSet[]): void {
125+
private collectRemoteAutolinks(remote: GitRemote | undefined, refsets: RefSet[], forBranch?: boolean): void {
126126
if (remote?.provider?.autolinks.length) {
127-
refsets.push([remote.provider, remote.provider.autolinks]);
127+
let autolinks = remote.provider.autolinks;
128+
if (forBranch) {
129+
autolinks = autolinks.filter(autolink => !isDynamic(autolink) && autolink.referenceType === 'branch');
130+
}
131+
refsets.push([remote.provider, autolinks]);
128132
}
129133
}
130134

@@ -135,12 +139,12 @@ export class AutolinksProvider implements Disposable {
135139
}
136140
}
137141

138-
private async getRefSets(remote?: GitRemote) {
142+
private async getRefSets(remote?: GitRemote, forBranch?: boolean) {
139143
return this._refsetCache.get(remote?.remoteKey, async () => {
140144
const refsets: RefSet[] = [];
141145

142-
await this.collectIntegrationAutolinks(remote, refsets);
143-
this.collectRemoteAutolinks(remote, refsets);
146+
await this.collectIntegrationAutolinks(forBranch ? undefined : remote, refsets);
147+
this.collectRemoteAutolinks(remote, refsets, forBranch);
144148
this.collectCustomAutolinks(refsets);
145149

146150
return refsets;
@@ -149,7 +153,7 @@ export class AutolinksProvider implements Disposable {
149153

150154
/** @returns A sorted list of autolinks. the first match is the most relevant */
151155
async getBranchAutolinks(branchName: string, remote?: GitRemote): Promise<Map<string, Autolink>> {
152-
const refsets = await this.getRefSets(remote);
156+
const refsets = await this.getRefSets(remote, true);
153157
if (refsets.length === 0) return emptyAutolinkMap;
154158

155159
return getBranchAutolinks(branchName, refsets);

src/autolinks/models/autolinks.ts

+1-2
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@ export interface AutolinkReference {
2626
export interface Autolink extends Omit<CacheableAutolinkReference, 'id'> {
2727
provider?: ProviderReference;
2828
id: string;
29-
index?: number;
3029
}
3130

3231
export type EnrichedAutolink = [
@@ -56,7 +55,7 @@ export interface CacheableAutolinkReference extends AutolinkReference {
5655
messageHtmlRegex?: RegExp;
5756
messageMarkdownRegex?: RegExp;
5857
messageRegex?: RegExp;
59-
branchNameRegex?: RegExp;
58+
branchNameRegexes?: RegExp[];
6059
}
6160

6261
export interface DynamicAutolinkReference {

src/autolinks/utils/-webview/autolinks.utils.ts

+41-45
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@ export function serializeAutolink(value: Autolink): Autolink {
2020
}
2121
: undefined,
2222
id: value.id,
23-
index: value.index,
2423
prefix: value.prefix,
2524
url: value.url,
2625
alphanumeric: value.alphanumeric,
@@ -43,23 +42,6 @@ function isCacheable(ref: AutolinkReference | DynamicAutolinkReference): ref is
4342
return 'prefix' in ref && ref.prefix != null && 'url' in ref && ref.url != null;
4443
}
4544

46-
/**
47-
* Compares autolinks
48-
* @returns non-0 result that means a probability of the autolink `b` is more relevant of the autolink `a`
49-
*/
50-
function compareAutolinks(a: Autolink, b: Autolink): number {
51-
// consider that if the number is in the start, it's the most relevant link
52-
if (b.index === 0) return 1;
53-
if (a.index === 0) return -1;
54-
55-
// maybe it worths to use some weight function instead.
56-
return (
57-
b.prefix.length - a.prefix.length ||
58-
b.id.length - a.id.length ||
59-
(b.index != null && a.index != null ? -(b.index - a.index) : 0)
60-
);
61-
}
62-
6345
export function ensureCachedRegex(
6446
ref: Autolink | CacheableAutolinkReference,
6547
outputFormat: 'html',
@@ -97,15 +79,29 @@ export function ensureCachedRegex(
9779
}
9880
}
9981

100-
export function ensureCachedBranchNameRegex(
82+
export function ensureCachedBranchNameRegexes(
10183
ref: CacheableAutolinkReference,
102-
): asserts ref is RequireSome<CacheableAutolinkReference, 'branchNameRegex'> {
103-
ref.branchNameRegex ??= new RegExp(
104-
`(^|\\-|_|\\.|\\/)(?<prefix>${ref.prefix})(?<issueKeyNumber>${
105-
ref.alphanumeric ? '\\w' : '\\d'
106-
}+)(?=$|\\-|_|\\.|\\/)`,
107-
'gi',
108-
);
84+
): asserts ref is RequireSome<CacheableAutolinkReference, 'branchNameRegexes'> {
85+
if (ref.prefix?.length > 0) {
86+
ref.branchNameRegexes ??= [
87+
// Rule 1: Any prefixed ref followed by a 2+ digit number and either a connector or end-of-string after it
88+
new RegExp(`(?<prefix>${ref.prefix})(?<issueKeyNumber>\\d{2,})(?:[\\/\\-\\_\\.]|$)`, 'i'),
89+
];
90+
} else {
91+
ref.branchNameRegexes ??= [
92+
// Rule 2: Any 2+ digit number preceded by feature|feat|fix|bug|bugfix|hotfix|issue|ticket with a connector before it, and either a connector or end-of-string after it
93+
new RegExp(
94+
`(?:feature|feat|fix|bug|bugfix|hotfix|issue|ticket)(?:\\/#|-#|_#|\\.#|[\\/\\-\\_\\.#])(?<issueKeyNumber>\\d{2,})(?:[\\/\\-\\_\\.]|$)`,
95+
'i',
96+
),
97+
// Rule 3.1: Any 3+ digit number preceded by at least two non-slash, non-numeric characters
98+
new RegExp(`(?:[^\\d/]{2})(?<issueKeyNumber>\\d{3,})`, 'i'),
99+
// Rule 3.2: Any 3+ digit number followed by at least two non-slash, non-numeric characters
100+
new RegExp(`(?<issueKeyNumber>\\d{3,})(?:[^\\d/]{2})`, 'i'),
101+
// Rule 3.3: A 3+ digit number is the entire branch name
102+
new RegExp(`^(?<issueKeyNumber>\\d{3,})$`, 'i'),
103+
];
104+
}
109105
}
110106

111107
export const numRegex = /<num>/g;
@@ -135,7 +131,6 @@ export function getAutolinks(message: string, refsets: Readonly<RefSet[]>): Map<
135131
autolinks.set(num, {
136132
provider: provider,
137133
id: num,
138-
index: match.index,
139134
prefix: ref.prefix,
140135
url: ref.url?.replace(numRegex, num),
141136
alphanumeric: ref.alphanumeric,
@@ -155,9 +150,16 @@ export function getAutolinks(message: string, refsets: Readonly<RefSet[]>): Map<
155150
export function getBranchAutolinks(branchName: string, refsets: Readonly<RefSet[]>): Map<string, Autolink> {
156151
const autolinks = new Map<string, Autolink>();
157152

158-
let match;
159153
let num;
160-
for (const [provider, refs] of refsets) {
154+
let match;
155+
// Sort refsets so that issue integrations are checked first for matches
156+
const sortedRefSets = [...refsets].sort((a, b) => {
157+
if (a[0]?.id === IssueIntegrationId.Jira || a[0]?.id === IssueIntegrationId.Trello) return -1;
158+
if (b[0]?.id === IssueIntegrationId.Jira || b[0]?.id === IssueIntegrationId.Trello) return 1;
159+
return 0;
160+
});
161+
162+
for (const [provider, refs] of sortedRefSets) {
161163
for (const ref of refs) {
162164
if (
163165
!isCacheable(ref) ||
@@ -167,33 +169,27 @@ export function getBranchAutolinks(branchName: string, refsets: Readonly<RefSet[
167169
continue;
168170
}
169171

170-
ensureCachedBranchNameRegex(ref);
171-
const matches = branchName.matchAll(ref.branchNameRegex);
172-
do {
173-
match = matches.next();
174-
if (!match.value?.groups) break;
175-
176-
num = match?.value?.groups.issueKeyNumber;
177-
let index = match.value.index;
172+
ensureCachedBranchNameRegexes(ref);
173+
for (const regex of ref.branchNameRegexes) {
174+
match = branchName.match(regex);
175+
if (!match?.groups) continue;
176+
num = match.groups.issueKeyNumber;
178177
const linkUrl = ref.url?.replace(numRegex, num);
179-
// strange case (I would say synthetic), but if we parse the link twice, use the most relevant of them
180-
const existingIndex = autolinks.get(linkUrl)?.index;
181-
if (existingIndex != null) {
182-
index = Math.min(index, existingIndex);
183-
}
184178
autolinks.set(linkUrl, {
185179
...ref,
186180
provider: provider,
187181
id: num,
188-
index: index,
189182
url: linkUrl,
190183
title: ref.title?.replace(numRegex, num),
191184
description: ref.description?.replace(numRegex, num),
192185
descriptor: ref.descriptor,
193186
});
194-
} while (!match.done);
187+
188+
// Stop at the first match
189+
return autolinks;
190+
}
195191
}
196192
}
197193

198-
return new Map([...autolinks.entries()].sort((a, b) => compareAutolinks(a[1], b[1])));
194+
return autolinks;
199195
}

src/plus/integrations/providers/jira.ts

+18-5
Original file line numberDiff line numberDiff line change
@@ -57,16 +57,29 @@ export class JiraIntegration extends IssueIntegration<IssueIntegrationId.Jira> {
5757
const projects = this._projects.get(`${this._session.accessToken}:${organization.id}`);
5858
if (projects != null) {
5959
for (const project of projects) {
60-
const prefix = `${project.key}-`;
60+
const dashedPrefix = `${project.key}-`;
61+
const underscoredPrefix = `${project.key}_`;
6162
autolinks.push({
62-
prefix: prefix,
63-
url: `${organization.url}/browse/${prefix}<num>`,
63+
prefix: dashedPrefix,
64+
url: `${organization.url}/browse/${dashedPrefix}<num>`,
6465
alphanumeric: false,
6566
ignoreCase: false,
66-
title: `Open Issue ${prefix}<num> on ${organization.name}`,
67+
title: `Open Issue ${dashedPrefix}<num> on ${organization.name}`,
6768

6869
type: 'issue',
69-
description: `${organization.name} Issue ${prefix}<num>`,
70+
description: `${organization.name} Issue ${dashedPrefix}<num>`,
71+
descriptor: { ...organization },
72+
});
73+
autolinks.push({
74+
prefix: underscoredPrefix,
75+
url: `${organization.url}/browse/${dashedPrefix}<num>`,
76+
alphanumeric: false,
77+
ignoreCase: false,
78+
referenceType: 'branch',
79+
title: `Open Issue ${dashedPrefix}<num> on ${organization.name}`,
80+
81+
type: 'issue',
82+
description: `${organization.name} Issue ${dashedPrefix}<num>`,
7083
descriptor: { ...organization },
7184
});
7285
}

0 commit comments

Comments
 (0)