Skip to content

Commit b5e01fc

Browse files
committed
roachtest: introduce registry.ErrorWithOwner
This function allows the caller to assign ownership directly when creating an error, which is very useful when we know that a failure during a certain part of the test should always be investigated by a certain team (for instance, if a test fails during setup, Test Eng should investigate, as the owners of the test infrastructure). Epic: none Release note: None
1 parent 94cbfe1 commit b5e01fc

File tree

7 files changed

+246
-144
lines changed

7 files changed

+246
-144
lines changed

pkg/cmd/roachtest/github.go

Lines changed: 40 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ import (
2727
"github.com/cockroachdb/cockroach/pkg/roachprod/logger"
2828
"github.com/cockroachdb/cockroach/pkg/roachprod/vm"
2929
"github.com/cockroachdb/cockroach/pkg/roachprod/vm/gce"
30+
"github.com/cockroachdb/errors"
3031
)
3132

3233
type githubIssues struct {
@@ -147,37 +148,47 @@ func (g *githubIssues) createPostRequest(
147148
var mention []string
148149
var projColID int
149150

150-
issueOwner := spec.Owner
151-
issueName := testName
152-
issueClusterName := ""
151+
var (
152+
issueOwner = spec.Owner
153+
issueName = testName
154+
messagePrefix string
155+
infraFlake bool
156+
)
157+
158+
// handleErrorWithOwnership updates the local variables in this
159+
// function that contain the name of the issue being created,
160+
// message prefix, and team that will own it.
161+
handleErrorWithOwnership := func(err registry.ErrorWithOwnership) {
162+
issueOwner = err.Owner
163+
infraFlake = err.InfraFlake
164+
165+
if err.TitleOverride != "" {
166+
issueName = err.TitleOverride
167+
messagePrefix = fmt.Sprintf("test %s failed: ", testName)
168+
}
169+
}
153170

154-
messagePrefix := ""
155-
var infraFlake bool
156-
firstFailure := failures[0]
157-
// Overrides to shield eng teams from potential flakes
171+
issueClusterName := ""
172+
errWithOwnership := failuresSpecifyOwner(failures)
158173
switch {
159-
case failuresContainsError(failures, errVMPreemption):
160-
issueOwner = registry.OwnerTestEng
161-
issueName = "vm_preemption"
162-
messagePrefix = fmt.Sprintf("test %s failed due to ", testName)
163-
infraFlake = true
164-
case failureContainsError(firstFailure, errClusterProvisioningFailed):
165-
issueOwner = registry.OwnerTestEng
166-
issueName = "cluster_creation"
167-
messagePrefix = fmt.Sprintf("test %s was skipped due to ", testName)
168-
infraFlake = true
169-
case failureContainsError(firstFailure, rperrors.ErrSSH255):
170-
issueOwner = registry.OwnerTestEng
171-
issueName = "ssh_problem"
172-
messagePrefix = fmt.Sprintf("test %s failed due to ", testName)
173-
infraFlake = true
174-
case failureContainsError(firstFailure, gce.ErrDNSOperation):
175-
issueOwner = registry.OwnerTestEng
176-
issueName = "dns_problem"
177-
messagePrefix = fmt.Sprintf("test %s failed due to ", testName)
178-
infraFlake = true
179-
case failureContainsError(firstFailure, errDuringPostAssertions):
180-
messagePrefix = fmt.Sprintf("test %s failed during post test assertions (see test-post-assertions.log) due to ", testName)
174+
case errWithOwnership != nil:
175+
handleErrorWithOwnership(*errWithOwnership)
176+
177+
// The following error come from various entrypoints in roachprod,
178+
// but we know that they should be handled by TestEng whenever they
179+
// happen during a test.
180+
case failuresContainsError(failures, rperrors.ErrSSH255):
181+
handleErrorWithOwnership(registry.ErrorWithOwner(
182+
registry.OwnerTestEng, errors.New("SSH problem"),
183+
registry.WithTitleOverride("ssh_problem"),
184+
registry.InfraFlake,
185+
))
186+
case failuresContainsError(failures, gce.ErrDNSOperation):
187+
handleErrorWithOwnership(registry.ErrorWithOwner(
188+
registry.OwnerTestEng, errors.New("DNS problem"),
189+
registry.WithTitleOverride("dns_problem"),
190+
registry.InfraFlake,
191+
))
181192
}
182193

183194
// Issues posted from roachtest are identifiable as such, and they are also release blockers

pkg/cmd/roachtest/github_test.go

Lines changed: 87 additions & 81 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,6 @@ import (
3030
"github.com/cockroachdb/cockroach/pkg/roachprod/vm"
3131
"github.com/cockroachdb/cockroach/pkg/roachprod/vm/gce"
3232
"github.com/cockroachdb/cockroach/pkg/testutils/echotest"
33-
"github.com/stretchr/testify/assert"
3433
"github.com/stretchr/testify/require"
3534
)
3635

@@ -128,6 +127,8 @@ func TestCreatePostRequest(t *testing.T) {
128127
return failure{squashedErr: ref}
129128
}
130129

130+
const testName = "github_test"
131+
131132
// TODO(radu): these tests should be converted to datadriven tests which
132133
// output the full rendering of the github issue message along with the
133134
// metadata.
@@ -143,6 +144,9 @@ func TestCreatePostRequest(t *testing.T) {
143144
failures []failure
144145
expectedPost bool
145146
expectedLabels []string
147+
expectedTeam string
148+
expectedName string
149+
expectedMessagePrefix string
146150
expectedReleaseBlocker bool
147151
expectedSkipTestFailure bool
148152
expectedParams map[string]string
@@ -154,6 +158,8 @@ func TestCreatePostRequest(t *testing.T) {
154158
failures: []failure{createFailure(errors.New("other"))},
155159
expectedPost: true,
156160
expectedLabels: []string{"C-test-failure"},
161+
expectedTeam: "@cockroachdb/unowned",
162+
expectedName: testName,
157163
expectedParams: prefixAll(map[string]string{
158164
"cloud": "gce",
159165
"encrypted": "false",
@@ -171,9 +177,14 @@ func TestCreatePostRequest(t *testing.T) {
171177
localSSD: true,
172178
metamorphicBuild: true,
173179
arch: vm.ArchARM64,
174-
failures: []failure{createFailure(errClusterProvisioningFailed)},
175-
expectedPost: true,
176-
expectedLabels: []string{"T-testeng", "X-infra-flake"},
180+
failures: []failure{
181+
createFailure(errClusterProvisioningFailed(errors.New("gcloud error"))),
182+
},
183+
expectedPost: true,
184+
expectedLabels: []string{"T-testeng", "X-infra-flake"},
185+
expectedTeam: "@cockroachdb/test-eng",
186+
expectedName: "cluster_creation",
187+
expectedMessagePrefix: testName + " failed",
177188
expectedParams: prefixAll(map[string]string{
178189
"cloud": "gce",
179190
"encrypted": "false",
@@ -195,6 +206,9 @@ func TestCreatePostRequest(t *testing.T) {
195206
failures: []failure{createFailure(rperrors.ErrSSH255)},
196207
expectedPost: true,
197208
expectedLabels: []string{"T-testeng", "X-infra-flake"},
209+
expectedTeam: "@cockroachdb/test-eng",
210+
expectedName: "ssh_problem",
211+
expectedMessagePrefix: testName + " failed",
198212
expectedParams: prefixAll(map[string]string{
199213
"cloud": "gce",
200214
"ssd": "0",
@@ -210,25 +224,24 @@ func TestCreatePostRequest(t *testing.T) {
210224
failures: []failure{createFailure(errors.New("other"))},
211225
expectedLabels: []string{"C-test-failure"},
212226
},
213-
// 5. Error during post test assertions
214-
{
215-
nonReleaseBlocker: true,
216-
failures: []failure{createFailure(errDuringPostAssertions)},
217-
expectedLabels: []string{"C-test-failure"},
218-
},
219-
// 6. Error during dns operation.
227+
// 5. Error during dns operation.
220228
{
221-
nonReleaseBlocker: true,
222-
failures: []failure{createFailure(gce.ErrDNSOperation)},
223-
expectedPost: true,
224-
expectedLabels: []string{"T-testeng", "X-infra-flake"},
229+
nonReleaseBlocker: true,
230+
failures: []failure{createFailure(gce.ErrDNSOperation)},
231+
expectedPost: true,
232+
expectedLabels: []string{"T-testeng", "X-infra-flake"},
233+
expectedTeam: "@cockroachdb/test-eng",
234+
expectedName: "dns_problem",
235+
expectedMessagePrefix: testName + " failed",
225236
},
226-
// 7. Assert that extra labels in the test spec are added to the issue.
237+
// 6. Assert that extra labels in the test spec are added to the issue.
227238
{
228239
extraLabels: []string{"foo-label"},
229240
failures: []failure{createFailure(errors.New("other"))},
230241
expectedPost: true,
231242
expectedLabels: []string{"C-test-failure", "release-blocker", "foo-label"},
243+
expectedTeam: "@cockroachdb/unowned",
244+
expectedName: testName,
232245
expectedParams: prefixAll(map[string]string{
233246
"cloud": "gce",
234247
"encrypted": "false",
@@ -241,13 +254,15 @@ func TestCreatePostRequest(t *testing.T) {
241254
"coverageBuild": "false",
242255
}),
243256
},
244-
// 8. Verify that release-blocker label is not applied on metamorphic builds
257+
// 7. Verify that release-blocker label is not applied on metamorphic builds
245258
// (for now).
246259
{
247260
metamorphicBuild: true,
248261
failures: []failure{createFailure(errors.New("other"))},
249262
expectedPost: true,
250263
expectedLabels: []string{"C-test-failure", "B-metamorphic-enabled"},
264+
expectedTeam: "@cockroachdb/unowned",
265+
expectedName: testName,
251266
expectedParams: prefixAll(map[string]string{
252267
"cloud": "gce",
253268
"encrypted": "false",
@@ -260,13 +275,15 @@ func TestCreatePostRequest(t *testing.T) {
260275
"coverageBuild": "false",
261276
}),
262277
},
263-
// 9. Verify that release-blocker label is not applied on coverage builds (for
278+
// 8. Verify that release-blocker label is not applied on coverage builds (for
264279
// now).
265280
{
266281
extraLabels: []string{"foo-label"},
267282
coverageBuild: true,
268283
failures: []failure{createFailure(errors.New("other"))},
269284
expectedPost: true,
285+
expectedTeam: "@cockroachdb/unowned",
286+
expectedName: testName,
270287
expectedLabels: []string{"C-test-failure", "B-coverage-enabled", "foo-label"},
271288
expectedParams: prefixAll(map[string]string{
272289
"cloud": "gce",
@@ -280,29 +297,40 @@ func TestCreatePostRequest(t *testing.T) {
280297
"coverageBuild": "true",
281298
}),
282299
},
283-
// 10. Verify preemption failure are routed to test-eng and marked as infra-flake,
300+
// 9. Verify preemption failure are routed to test-eng and marked as infra-flake,
284301
// even if the first failure is another handled error.
285302
{
286-
nonReleaseBlocker: true,
287-
failures: []failure{createFailure(gce.ErrDNSOperation), createFailure(errVMPreemption)},
288-
expectedPost: true,
289-
expectedLabels: []string{"T-testeng", "X-infra-flake"},
303+
nonReleaseBlocker: true,
304+
failures: []failure{createFailure(gce.ErrDNSOperation), createFailure(vmPreemptionError("my_VM"))},
305+
expectedPost: true,
306+
expectedName: "vm_preemption",
307+
expectedTeam: "@cockroachdb/test-eng",
308+
expectedMessagePrefix: testName + " failed",
309+
expectedLabels: []string{"T-testeng", "X-infra-flake"},
290310
},
291-
// 11. Verify preemption failure are routed to test-eng and marked as infra-flake, when the
311+
// 10. Verify preemption failure are routed to test-eng and marked as infra-flake, when the
292312
// first failure is a non-handled error.
293313
{
294-
nonReleaseBlocker: true,
295-
failures: []failure{createFailure(errors.New("random")), createFailure(errVMPreemption)},
296-
expectedPost: true,
297-
expectedLabels: []string{"T-testeng", "X-infra-flake"},
314+
nonReleaseBlocker: true,
315+
failures: []failure{createFailure(errors.New("random")), createFailure(vmPreemptionError("my_VM"))},
316+
expectedPost: true,
317+
expectedTeam: "@cockroachdb/test-eng",
318+
expectedName: "vm_preemption",
319+
expectedMessagePrefix: testName + " failed",
320+
expectedLabels: []string{"T-testeng", "X-infra-flake"},
298321
},
299-
// 12. Verify preemption failure are routed to test-eng and marked as infra-flake, when the only error is
322+
// 11. Verify preemption failure are routed to test-eng and marked as infra-flake, when the only error is
300323
// preemption failure
301324
{
302325
nonReleaseBlocker: true,
303-
failures: []failure{{errors: []error{errVMPreemption}}},
304-
expectedPost: true,
305-
expectedLabels: []string{"T-testeng", "X-infra-flake"},
326+
failures: []failure{
327+
{errors: []error{vmPreemptionError("my_VM")}},
328+
},
329+
expectedPost: true,
330+
expectedTeam: "@cockroachdb/test-eng",
331+
expectedName: "vm_preemption",
332+
expectedMessagePrefix: testName + " failed",
333+
expectedLabels: []string{"T-testeng", "X-infra-flake"},
306334
},
307335
}
308336

@@ -312,7 +340,7 @@ func TestCreatePostRequest(t *testing.T) {
312340
clusterSpec := reg.MakeClusterSpec(1, spec.Arch(testCase.arch))
313341

314342
testSpec := &registry.TestSpec{
315-
Name: "github_test",
343+
Name: testName,
316344
Owner: OwnerUnitTest,
317345
Cluster: clusterSpec,
318346
NonReleaseBlocker: testCase.nonReleaseBlocker,
@@ -350,57 +378,35 @@ func TestCreatePostRequest(t *testing.T) {
350378
teamLoader: teamLoadFn,
351379
}
352380

381+
req, err := github.createPostRequest(
382+
testName, ti.start, ti.end, testSpec, testCase.failures,
383+
testCase.message, testCase.metamorphicBuild, testCase.coverageBuild,
384+
)
353385
if testCase.loadTeamsFailed {
354386
// Assert that if TEAMS.yaml cannot be loaded then function errors.
355-
_, err := github.createPostRequest("github_test", ti.start, ti.end, testSpec, testCase.failures, testCase.message, testCase.metamorphicBuild, testCase.coverageBuild)
356-
assert.Error(t, err, "Expected an error in createPostRequest when loading teams fails, but got nil")
357-
} else {
358-
req, err := github.createPostRequest("github_test", ti.start, ti.end, testSpec, testCase.failures, testCase.message, testCase.metamorphicBuild, testCase.coverageBuild)
359-
assert.NoError(t, err, "Expected no error in createPostRequest")
360-
361-
r := &issues.Renderer{}
362-
req.HelpCommand(r)
363-
file := fmt.Sprintf("help_command_createpost_%d.txt", idx+1)
364-
echotest.Require(t, r.String(), filepath.Join("testdata", file))
365-
366-
if testCase.expectedParams != nil {
367-
require.Equal(t, testCase.expectedParams, req.ExtraParams)
368-
}
369-
370-
expLabels := append([]string{"O-roachtest"}, testCase.expectedLabels...)
371-
sort.Strings(expLabels)
372-
labels := append([]string{}, req.Labels...)
373-
sort.Strings(expLabels)
374-
sort.Strings(labels)
375-
require.Equal(t, expLabels, labels)
376-
377-
expectedTeam := "@cockroachdb/unowned"
378-
expectedName := "github_test"
379-
expectedMessagePrefix := ""
380-
if failuresContainsError(testCase.failures, errVMPreemption) {
381-
expectedTeam = "@cockroachdb/test-eng"
382-
expectedName = "vm_preemption"
383-
expectedMessagePrefix = "test github_test failed due to "
384-
} else if errors.Is(testCase.failures[0].squashedErr, gce.ErrDNSOperation) {
385-
expectedTeam = "@cockroachdb/test-eng"
386-
expectedName = "dns_problem"
387-
expectedMessagePrefix = "test github_test failed due to "
388-
} else if errors.Is(testCase.failures[0].squashedErr, errClusterProvisioningFailed) {
389-
expectedTeam = "@cockroachdb/test-eng"
390-
expectedName = "cluster_creation"
391-
expectedMessagePrefix = "test github_test was skipped due to "
392-
} else if errors.Is(testCase.failures[0].squashedErr, rperrors.ErrSSH255) {
393-
expectedTeam = "@cockroachdb/test-eng"
394-
expectedName = "ssh_problem"
395-
expectedMessagePrefix = "test github_test failed due to "
396-
} else if errors.Is(testCase.failures[0].squashedErr, errDuringPostAssertions) {
397-
expectedMessagePrefix = "test github_test failed during post test assertions (see test-post-assertions.log) due to "
398-
}
399-
400-
require.Contains(t, req.MentionOnCreate, expectedTeam)
401-
require.Equal(t, expectedName, req.TestName)
402-
require.True(t, strings.HasPrefix(req.Message, expectedMessagePrefix), req.Message)
387+
require.Error(t, err)
388+
return
403389
}
390+
391+
require.NoError(t, err)
392+
393+
r := &issues.Renderer{}
394+
req.HelpCommand(r)
395+
file := fmt.Sprintf("help_command_createpost_%d.txt", idx+1)
396+
echotest.Require(t, r.String(), filepath.Join("testdata", file))
397+
398+
if testCase.expectedParams != nil {
399+
require.Equal(t, testCase.expectedParams, req.ExtraParams)
400+
}
401+
402+
expLabels := append([]string{"O-roachtest"}, testCase.expectedLabels...)
403+
sort.Strings(expLabels)
404+
sort.Strings(req.Labels)
405+
require.Equal(t, expLabels, req.Labels)
406+
407+
require.Contains(t, req.MentionOnCreate, testCase.expectedTeam)
408+
require.Equal(t, testCase.expectedName, req.TestName)
409+
require.Contains(t, req.Message, testCase.expectedMessagePrefix)
404410
})
405411
}
406412
}

pkg/cmd/roachtest/registry/BUILD.bazel

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ go_library(
44
name = "registry",
55
srcs = [
66
"encryption.go",
7+
"errors.go",
78
"filter.go",
89
"owners.go",
910
"registry_interface.go",

0 commit comments

Comments
 (0)