Skip to content

Commit 36b6fcc

Browse files
committed
include all partial errors in log
Signed-off-by: Justin Jung <[email protected]>
1 parent 6f08c10 commit 36b6fcc

4 files changed

+78
-43
lines changed

pkg/ring/replication_set.go

+9-16
Original file line numberDiff line numberDiff line change
@@ -71,39 +71,32 @@ func (r ReplicationSet) Do(ctx context.Context, delay time.Duration, zoneResults
7171
}(i, &r.Instances[i])
7272
}
7373

74-
trackerFailed := false
75-
cnt := 0
76-
77-
track:
78-
for !tracker.succeeded() {
74+
for !tracker.succeeded() && !tracker.finished() {
7975
select {
8076
case res := <-ch:
8177
tracker.done(res.instance, res.res, res.err)
8278
if res.err != nil {
83-
if tracker.failed() {
84-
if !partialDataEnabled || tracker.failedCompletely() {
85-
return nil, res.err
86-
}
87-
trackerFailed = true
79+
if tracker.failed() && (!partialDataEnabled || tracker.failedCompletely()) {
80+
return nil, res.err
8881
}
8982

9083
// force one of the delayed requests to start
9184
if delay > 0 && r.MaxUnavailableZones == 0 {
9285
forceStart <- struct{}{}
9386
}
9487
}
95-
cnt++
96-
if cnt == len(r.Instances) {
97-
break track
98-
}
9988

10089
case <-ctx.Done():
10190
return nil, ctx.Err()
10291
}
10392
}
10493

105-
if partialDataEnabled && trackerFailed {
106-
return tracker.getResults(), fmt.Errorf("failed to get data from %s: %w", tracker.failedInstances(), partialdata.ErrPartialData)
94+
if partialDataEnabled && tracker.failed() {
95+
finalErr := partialdata.ErrPartialData
96+
for _, partialErr := range tracker.getErrors() {
97+
finalErr = fmt.Errorf("%w: %w", finalErr, partialErr)
98+
}
99+
return tracker.getResults(), finalErr
107100
}
108101

109102
return tracker.getResults(), nil

pkg/ring/replication_set_test.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -203,7 +203,7 @@ func TestReplicationSet_Do(t *testing.T) {
203203
queryPartialData: true,
204204
want: []interface{}{1},
205205
expectedError: partialdata.ErrPartialData,
206-
errStrContains: []string{"failed to get data from", "10.0.0.1", "10.0.0.2"},
206+
errStrContains: []string{"10.0.0.1", "10.0.0.2", "zone failed"},
207207
},
208208
{
209209
name: "with partial data enabled, should fail on instances failing in all zones",

pkg/ring/replication_set_tracker.go

+32-17
Original file line numberDiff line numberDiff line change
@@ -1,35 +1,40 @@
11
package ring
22

3+
import "fmt"
4+
35
type replicationSetResultTracker interface {
46
// Signals an instance has done the execution, either successful (no error)
57
// or failed (with error). If successful, result will be recorded and can
68
// be accessed via getResults.
79
done(instance *InstanceDesc, result interface{}, err error)
810

11+
// Returns true if all instances are done executing
12+
finished() bool
13+
914
// Returns true if the minimum number of successful results have been received.
1015
succeeded() bool
1116

1217
// Returns true if the maximum number of failed executions have been reached.
1318
failed() bool
1419

15-
// Returns list of instance addresses that failed
16-
failedInstances() []string
17-
1820
// Returns true if executions failed in all instances or all zones.
1921
failedCompletely() bool
2022

2123
// Returns recorded results.
2224
getResults() []interface{}
25+
26+
// Returns errors
27+
getErrors() []error
2328
}
2429

2530
type defaultResultTracker struct {
2631
minSucceeded int
2732
numSucceeded int
2833
numErrors int
2934
maxErrors int
30-
failedInst []string
3135
results []interface{}
3236
numInstances int
37+
errors []error
3338
}
3439

3540
func newDefaultResultTracker(instances []InstanceDesc, maxErrors int) *defaultResultTracker {
@@ -38,7 +43,7 @@ func newDefaultResultTracker(instances []InstanceDesc, maxErrors int) *defaultRe
3843
numSucceeded: 0,
3944
numErrors: 0,
4045
maxErrors: maxErrors,
41-
failedInst: make([]string, 0, len(instances)),
46+
errors: make([]error, 0, len(instances)),
4247
results: make([]interface{}, 0, len(instances)),
4348
numInstances: len(instances),
4449
}
@@ -49,11 +54,15 @@ func (t *defaultResultTracker) done(instance *InstanceDesc, result interface{},
4954
t.numSucceeded++
5055
t.results = append(t.results, result)
5156
} else {
52-
t.failedInst = append(t.failedInst, instance.GetAddr())
57+
t.errors = append(t.errors, fmt.Errorf("(%s) %w", instance.GetAddr(), err))
5358
t.numErrors++
5459
}
5560
}
5661

62+
func (t *defaultResultTracker) finished() bool {
63+
return t.numSucceeded+t.numErrors == t.numInstances
64+
}
65+
5766
func (t *defaultResultTracker) succeeded() bool {
5867
return t.numSucceeded >= t.minSucceeded
5968
}
@@ -66,14 +75,14 @@ func (t *defaultResultTracker) failedCompletely() bool {
6675
return t.numInstances == t.numErrors
6776
}
6877

69-
func (t *defaultResultTracker) failedInstances() []string {
70-
return t.failedInst
71-
}
72-
7378
func (t *defaultResultTracker) getResults() []interface{} {
7479
return t.results
7580
}
7681

82+
func (t *defaultResultTracker) getErrors() []error {
83+
return t.errors
84+
}
85+
7786
// zoneAwareResultTracker tracks the results per zone.
7887
// All instances in a zone must succeed in order for the zone to succeed.
7988
type zoneAwareResultTracker struct {
@@ -84,8 +93,9 @@ type zoneAwareResultTracker struct {
8493
resultsPerZone map[string][]interface{}
8594
numInstances int
8695
zoneResultsQuorum bool
87-
failedInst []string
8896
zoneCount int
97+
doneCount int
98+
errors []error
8999
}
90100

91101
func newZoneAwareResultTracker(instances []InstanceDesc, maxUnavailableZones int, zoneResultsQuorum bool) *zoneAwareResultTracker {
@@ -95,7 +105,7 @@ func newZoneAwareResultTracker(instances []InstanceDesc, maxUnavailableZones int
95105
maxUnavailableZones: maxUnavailableZones,
96106
numInstances: len(instances),
97107
zoneResultsQuorum: zoneResultsQuorum,
98-
failedInst: make([]string, 0, len(instances)),
108+
errors: make([]error, 0, len(instances)),
99109
}
100110

101111
for _, instance := range instances {
@@ -111,7 +121,7 @@ func newZoneAwareResultTracker(instances []InstanceDesc, maxUnavailableZones int
111121
func (t *zoneAwareResultTracker) done(instance *InstanceDesc, result interface{}, err error) {
112122
if err != nil {
113123
t.failuresByZone[instance.Zone]++
114-
t.failedInst = append(t.failedInst, instance.Addr)
124+
t.errors = append(t.errors, fmt.Errorf("(%s) %w", instance.GetAddr(), err))
115125
} else {
116126
if _, ok := t.resultsPerZone[instance.Zone]; !ok {
117127
// If it is the first result in the zone, then total number of instances
@@ -122,6 +132,11 @@ func (t *zoneAwareResultTracker) done(instance *InstanceDesc, result interface{}
122132
}
123133

124134
t.waitingByZone[instance.Zone]--
135+
t.doneCount++
136+
}
137+
138+
func (t *zoneAwareResultTracker) finished() bool {
139+
return t.doneCount == t.numInstances
125140
}
126141

127142
func (t *zoneAwareResultTracker) succeeded() bool {
@@ -148,10 +163,6 @@ func (t *zoneAwareResultTracker) failedCompletely() bool {
148163
return failedZones == t.zoneCount
149164
}
150165

151-
func (t *zoneAwareResultTracker) failedInstances() []string {
152-
return t.failedInst
153-
}
154-
155166
func (t *zoneAwareResultTracker) getResults() []interface{} {
156167
results := make([]interface{}, 0, t.numInstances)
157168
if t.zoneResultsQuorum {
@@ -169,3 +180,7 @@ func (t *zoneAwareResultTracker) getResults() []interface{} {
169180
}
170181
return results
171182
}
183+
184+
func (t *zoneAwareResultTracker) getErrors() []error {
185+
return t.errors
186+
}

pkg/ring/replication_set_tracker_test.go

+36-9
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package ring
22

33
import (
44
"errors"
5+
"fmt"
56
"testing"
67

78
"github.com/stretchr/testify/assert"
@@ -154,7 +155,7 @@ func TestDefaultResultTracker(t *testing.T) {
154155
assert.Equal(t, []interface{}{[]int{1, 1, 1}, []int{2, 2, 2}, []int{3, 3, 3}}, tracker.getResults())
155156
},
156157
},
157-
"failedCompletely should return true only if all instances have failed, regardless of max errors": {
158+
"failedCompletely() should return true only if all instances have failed, regardless of max errors": {
158159
instances: []InstanceDesc{instance1, instance2, instance3},
159160
maxErrors: 1,
160161
run: func(t *testing.T, tracker *defaultResultTracker) {
@@ -174,15 +175,28 @@ func TestDefaultResultTracker(t *testing.T) {
174175
assert.True(t, tracker.failedCompletely())
175176
},
176177
},
177-
"failedInstances should work": {
178+
"finished() should return true only if all instances are done": {
178179
instances: []InstanceDesc{instance1, instance2},
179-
maxErrors: 2,
180+
maxErrors: 1,
180181
run: func(t *testing.T, tracker *defaultResultTracker) {
181182
tracker.done(&instance1, nil, errors.New("test"))
182-
assert.ElementsMatch(t, []string{"127.0.0.1"}, tracker.failedInstances())
183+
assert.False(t, tracker.finished())
183184

184185
tracker.done(&instance2, nil, errors.New("test"))
185-
assert.ElementsMatch(t, []string{"127.0.0.1", "127.0.0.2"}, tracker.failedInstances())
186+
assert.True(t, tracker.finished())
187+
},
188+
},
189+
"getErrors() should return list of all errors": {
190+
instances: []InstanceDesc{instance1, instance2},
191+
maxErrors: 1,
192+
run: func(t *testing.T, tracker *defaultResultTracker) {
193+
tracker.done(&instance1, nil, errors.New("test1"))
194+
err1 := fmt.Errorf("(%s) %w", instance1.GetAddr(), errors.New("test1"))
195+
assert.ElementsMatch(t, []error{err1}, tracker.getErrors())
196+
197+
tracker.done(&instance2, nil, errors.New("test2"))
198+
err2 := fmt.Errorf("(%s) %w", instance2.GetAddr(), errors.New("test2"))
199+
assert.ElementsMatch(t, []error{err1, err2}, tracker.getErrors())
186200
},
187201
},
188202
}
@@ -430,7 +444,7 @@ func TestZoneAwareResultTracker(t *testing.T) {
430444
assert.False(t, tracker.failed())
431445
},
432446
},
433-
"failedCompletely should return true only if all zones have failed, regardless of max unavailable zones": {
447+
"failedCompletely() should return true only if all zones have failed, regardless of max unavailable zones": {
434448
instances: []InstanceDesc{instance1, instance2, instance3, instance4, instance5, instance6},
435449
maxUnavailableZones: 1,
436450
run: func(t *testing.T, tracker *zoneAwareResultTracker) {
@@ -453,15 +467,28 @@ func TestZoneAwareResultTracker(t *testing.T) {
453467
assert.True(t, tracker.failedCompletely())
454468
},
455469
},
456-
"failedInstances should work": {
470+
"finished() should return true only if all instances are done": {
457471
instances: []InstanceDesc{instance1, instance2},
458472
maxUnavailableZones: 1,
459473
run: func(t *testing.T, tracker *zoneAwareResultTracker) {
460474
tracker.done(&instance1, nil, errors.New("test"))
461-
assert.ElementsMatch(t, []string{"127.0.0.1"}, tracker.failedInstances())
475+
assert.False(t, tracker.finished())
462476

463477
tracker.done(&instance2, nil, errors.New("test"))
464-
assert.ElementsMatch(t, []string{"127.0.0.1", "127.0.0.2"}, tracker.failedInstances())
478+
assert.True(t, tracker.finished())
479+
},
480+
},
481+
"getErrors() should return list of all errors": {
482+
instances: []InstanceDesc{instance1, instance2},
483+
maxUnavailableZones: 1,
484+
run: func(t *testing.T, tracker *zoneAwareResultTracker) {
485+
tracker.done(&instance1, nil, errors.New("test1"))
486+
err1 := fmt.Errorf("(%s) %w", instance1.GetAddr(), errors.New("test1"))
487+
assert.ElementsMatch(t, []error{err1}, tracker.getErrors())
488+
489+
tracker.done(&instance2, nil, errors.New("test2"))
490+
err2 := fmt.Errorf("(%s) %w", instance2.GetAddr(), errors.New("test2"))
491+
assert.ElementsMatch(t, []error{err1, err2}, tracker.getErrors())
465492
},
466493
},
467494
}

0 commit comments

Comments
 (0)