Skip to content

Commit 6f65c1c

Browse files
committed
refactor: split error handling and retry logic
With this commit `isRetriable` no longer overwrites / wraps the error that is passed to it. This was done to accommodate context from the circumstances in which the error occurred into the error itself to be able to match on those later on. This mechanism has proven to cause bugs and increase overall complexity by abusing the error type. Instead `isRetriable` now only returns whether a certain combination of parameters is considered retry-able, either because the circumstances allow for it or because the error matches one of the retry-able error classifiers. Resovles: cloudfoundry/routing-release#321
1 parent 0f9b398 commit 6f65c1c

File tree

6 files changed

+12
-190
lines changed

6 files changed

+12
-190
lines changed

proxy/fails/basic_classifiers.go

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,6 @@ import (
99
"strings"
1010
)
1111

12-
var IdempotentRequestEOFError = errors.New("EOF (via idempotent request)")
13-
14-
var IncompleteRequestError = errors.New("incomplete request")
15-
1612
var AttemptedTLSWithNonTLSBackend = ClassifierFunc(func(err error) bool {
1713
return errors.As(err, &tls.RecordHeaderError{})
1814
})
@@ -78,11 +74,3 @@ var UntrustedCert = ClassifierFunc(func(err error) bool {
7874
return false
7975
}
8076
})
81-
82-
var IdempotentRequestEOF = ClassifierFunc(func(err error) bool {
83-
return errors.Is(err, IdempotentRequestEOFError)
84-
})
85-
86-
var IncompleteRequest = ClassifierFunc(func(err error) bool {
87-
return errors.Is(err, IncompleteRequestError)
88-
})

proxy/fails/classifier_group.go

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,11 @@ type ClassifierGroup []Classifier
1010
//
1111
// Otherwise, there’s risk of a mutating non-idempotent request (e.g. send
1212
// payment) being silently retried without the client knowing.
13+
//
14+
// IMPORTANT: to truly determine whether a request is retry-able the function
15+
// round_tripper.isRetrieable must be used. It includes additional checks that
16+
// allow requests to be retried more often than it is allowed by the
17+
// classifiers.
1318
var RetriableClassifiers = ClassifierGroup{
1419
Dial,
1520
AttemptedTLSWithNonTLSBackend,
@@ -19,8 +24,6 @@ var RetriableClassifiers = ClassifierGroup{
1924
RemoteHandshakeTimeout,
2025
UntrustedCert,
2126
ExpiredOrNotYetValidCertFailure,
22-
IdempotentRequestEOF,
23-
IncompleteRequest,
2427
}
2528

2629
var FailableClassifiers = ClassifierGroup{

proxy/fails/classifier_group_test.go

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@ import (
44
"crypto/tls"
55
"crypto/x509"
66
"errors"
7-
"fmt"
87
"net"
98

109
"code.cloudfoundry.org/gorouter/proxy/fails"
@@ -33,25 +32,14 @@ var _ = Describe("ClassifierGroup", func() {
3332
rc := fails.RetriableClassifiers
3433

3534
Expect(rc.Classify(&net.OpError{Op: "dial"})).To(BeTrue())
36-
Expect(rc.Classify(fmt.Errorf("%w (%w)", fails.IncompleteRequestError, &net.OpError{Op: "dial"}))).To(BeTrue())
3735
Expect(rc.Classify(&net.OpError{Op: "remote error", Err: errors.New("tls: bad certificate")})).To(BeTrue())
38-
Expect(rc.Classify(fmt.Errorf("%w (%w)", fails.IncompleteRequestError, &net.OpError{Op: "remote error", Err: errors.New("tls: bad certificate")}))).To(BeTrue())
3936
Expect(rc.Classify(&net.OpError{Op: "remote error", Err: errors.New("tls: handshake failure")})).To(BeTrue())
40-
Expect(rc.Classify(fmt.Errorf("%w (%w)", fails.IncompleteRequestError, &net.OpError{Op: "remote error", Err: errors.New("tls: handshake failure")}))).To(BeTrue())
4137
Expect(rc.Classify(errors.New("net/http: TLS handshake timeout"))).To(BeTrue())
42-
Expect(rc.Classify(fmt.Errorf("%w (%w)", fails.IncompleteRequestError, errors.New("net/http: TLS handshake timeout")))).To(BeTrue())
4338
Expect(rc.Classify(tls.RecordHeaderError{})).To(BeTrue())
44-
Expect(rc.Classify(fmt.Errorf("%w (%w)", fails.IncompleteRequestError, tls.RecordHeaderError{}))).To(BeTrue())
4539
Expect(rc.Classify(x509.HostnameError{})).To(BeTrue())
46-
Expect(rc.Classify(fmt.Errorf("%w (%w)", fails.IncompleteRequestError, x509.HostnameError{}))).To(BeTrue())
4740
Expect(rc.Classify(x509.UnknownAuthorityError{})).To(BeTrue())
48-
Expect(rc.Classify(fmt.Errorf("%w (%w)", fails.IncompleteRequestError, x509.UnknownAuthorityError{}))).To(BeTrue())
4941
Expect(rc.Classify(x509.CertificateInvalidError{Reason: x509.Expired})).To(BeTrue())
50-
Expect(rc.Classify(fmt.Errorf("%w (%w)", fails.IncompleteRequestError, x509.CertificateInvalidError{Reason: x509.Expired}))).To(BeTrue())
5142
Expect(rc.Classify(errors.New("i'm a potato"))).To(BeFalse())
52-
Expect(rc.Classify(fails.IdempotentRequestEOFError)).To(BeTrue())
53-
Expect(rc.Classify(fails.IncompleteRequestError)).To(BeTrue())
54-
Expect(rc.Classify(fmt.Errorf("%w (%w)", fails.IncompleteRequestError, x509.HostnameError{}))).To(BeTrue())
5543
})
5644
})
5745

proxy/round_tripper/error_handler_test.go

Lines changed: 0 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@ import (
55
"crypto/tls"
66
"crypto/x509"
77
"errors"
8-
"fmt"
98
"net"
109
"net/http/httptest"
1110

@@ -137,22 +136,6 @@ var _ = Describe("HandleError", func() {
137136
})
138137
})
139138

140-
Context("HostnameMismatch wrapped in IncompleteRequestError", func() {
141-
BeforeEach(func() {
142-
wrappedErr := x509.HostnameError{Host: "the wrong one"}
143-
err = fmt.Errorf("%w (%w)", fails.IncompleteRequestError, wrappedErr)
144-
errorHandler.HandleError(responseWriter, err)
145-
})
146-
147-
It("has a 503 Status Code", func() {
148-
Expect(responseWriter.Status()).To(Equal(503))
149-
})
150-
151-
It("emits a backend_invalid_id metric", func() {
152-
Expect(metricReporter.CaptureBackendInvalidIDCallCount()).To(Equal(1))
153-
})
154-
})
155-
156139
Context("Untrusted Cert", func() {
157140
BeforeEach(func() {
158141
err = x509.UnknownAuthorityError{}
@@ -168,22 +151,6 @@ var _ = Describe("HandleError", func() {
168151
})
169152
})
170153

171-
Context("Untrusted Cert wrapped in IncompleteRequestError", func() {
172-
BeforeEach(func() {
173-
wrappedErr := x509.UnknownAuthorityError{}
174-
err = fmt.Errorf("%w (%w)", fails.IncompleteRequestError, wrappedErr)
175-
errorHandler.HandleError(responseWriter, err)
176-
})
177-
178-
It("has a 526 Status Code", func() {
179-
Expect(responseWriter.Status()).To(Equal(526))
180-
})
181-
182-
It("emits a backend_invalid_tls_cert metric", func() {
183-
Expect(metricReporter.CaptureBackendInvalidTLSCertCallCount()).To(Equal(1))
184-
})
185-
})
186-
187154
Context("Attempted TLS with non-TLS backend error", func() {
188155
BeforeEach(func() {
189156
err = tls.RecordHeaderError{Msg: "bad handshake"}
@@ -199,22 +166,6 @@ var _ = Describe("HandleError", func() {
199166
})
200167
})
201168

202-
Context("Attempted TLS with non-TLS backend error wrapped in IncompleteRequestError", func() {
203-
BeforeEach(func() {
204-
wrappedErr := tls.RecordHeaderError{Msg: "bad handshake"}
205-
err = fmt.Errorf("%w (%w)", fails.IncompleteRequestError, wrappedErr)
206-
errorHandler.HandleError(responseWriter, err)
207-
})
208-
209-
It("has a 525 Status Code", func() {
210-
Expect(responseWriter.Status()).To(Equal(525))
211-
})
212-
213-
It("emits a backend_tls_handshake_failed metric", func() {
214-
Expect(metricReporter.CaptureBackendTLSHandshakeFailedCallCount()).To(Equal(1))
215-
})
216-
})
217-
218169
Context("Remote handshake failure", func() {
219170
BeforeEach(func() {
220171
err = &net.OpError{Op: "remote error", Err: errors.New("tls: handshake failure")}
@@ -230,22 +181,6 @@ var _ = Describe("HandleError", func() {
230181
})
231182
})
232183

233-
Context("Remote handshake failure wrapped in IncompleteRequestError", func() {
234-
BeforeEach(func() {
235-
wrappedErr := &net.OpError{Op: "remote error", Err: errors.New("tls: handshake failure")}
236-
err = fmt.Errorf("%w (%w)", fails.IncompleteRequestError, wrappedErr)
237-
errorHandler.HandleError(responseWriter, err)
238-
})
239-
240-
It("has a 525 Status Code", func() {
241-
Expect(responseWriter.Status()).To(Equal(525))
242-
})
243-
244-
It("emits a backend_tls_handshake_failed metric", func() {
245-
Expect(metricReporter.CaptureBackendTLSHandshakeFailedCallCount()).To(Equal(1))
246-
})
247-
})
248-
249184
Context("Context Cancelled Error", func() {
250185
BeforeEach(func() {
251186
err = context.Canceled

proxy/round_tripper/proxy_round_tripper.go

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@ package round_tripper
33
import (
44
"context"
55
"errors"
6-
"fmt"
76
"io"
87
"io/ioutil"
98
"net/http"
@@ -177,7 +176,7 @@ func (rt *roundTripper) RoundTrip(originalRequest *http.Request) (*http.Response
177176
if err != nil {
178177
reqInfo.FailedAttempts++
179178
reqInfo.LastFailedAttemptFinishedAt = time.Now()
180-
retriable, err := rt.isRetriable(request, err, trace)
179+
retriable := rt.isRetriable(request, err, trace)
181180

182181
logger.Error("backend-endpoint-failed",
183182
zap.Error(err),
@@ -226,7 +225,7 @@ func (rt *roundTripper) RoundTrip(originalRequest *http.Request) (*http.Response
226225
if err != nil {
227226
reqInfo.FailedAttempts++
228227
reqInfo.LastFailedAttemptFinishedAt = time.Now()
229-
retriable, err := rt.isRetriable(request, err, trace)
228+
retriable := rt.isRetriable(request, err, trace)
230229

231230
logger.Error(
232231
"route-service-connection-failed",
@@ -482,24 +481,23 @@ func isIdempotent(request *http.Request) bool {
482481
return false
483482
}
484483

485-
func (rt *roundTripper) isRetriable(request *http.Request, err error, trace *requestTracer) (bool, error) {
484+
func (rt *roundTripper) isRetriable(request *http.Request, err error, trace *requestTracer) bool {
486485
// if the context has been cancelled we do not perform further retries
487486
if request.Context().Err() != nil {
488-
return false, fmt.Errorf("%w (%w)", request.Context().Err(), err)
487+
return false
489488
}
490489

491490
// io.EOF errors are considered safe to retry for certain requests
492491
// Replace the error here to track this state when classifying later.
493492
if err == io.EOF && isIdempotent(request) {
494-
err = fails.IdempotentRequestEOFError
493+
return true
495494
}
496495
// We can retry for sure if we never obtained a connection
497496
// since there is no way any data was transmitted. If headers could not
498497
// be written in full, the request should also be safe to retry.
499498
if !trace.GotConn() || !trace.WroteHeaders() {
500-
err = fmt.Errorf("%w (%w)", fails.IncompleteRequestError, err)
499+
return true
501500
}
502501

503-
retriable := rt.retriableClassifier.Classify(err)
504-
return retriable, err
502+
return rt.retriableClassifier.Classify(err)
505503
}

proxy/round_tripper/proxy_round_tripper_test.go

Lines changed: 0 additions & 90 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@ import (
44
"bytes"
55
"errors"
66
"fmt"
7-
"io"
87
"net"
98
"net/http"
109
"net/http/httptest"
@@ -23,7 +22,6 @@ import (
2322
"code.cloudfoundry.org/gorouter/config"
2423
"code.cloudfoundry.org/gorouter/handlers"
2524
"code.cloudfoundry.org/gorouter/metrics/fakes"
26-
"code.cloudfoundry.org/gorouter/proxy/fails"
2725
"code.cloudfoundry.org/gorouter/proxy/handler"
2826
"code.cloudfoundry.org/gorouter/proxy/round_tripper"
2927
"code.cloudfoundry.org/gorouter/proxy/utils"
@@ -434,94 +432,6 @@ var _ = Describe("ProxyRoundTripper", func() {
434432
})
435433
})
436434

437-
DescribeTable("when the backend fails with an empty response error (io.EOF)",
438-
func(reqBody io.ReadCloser, getBodyIsNil bool, reqMethod string, headers map[string]string, classify fails.ClassifierFunc, expectRetry bool) {
439-
badResponse := &http.Response{
440-
Header: make(map[string][]string),
441-
}
442-
badResponse.Header.Add(handlers.VcapRequestIdHeader, "some-request-id")
443-
444-
// The first request fails with io.EOF, the second (if retried) succeeds
445-
transport.RoundTripStub = func(*http.Request) (*http.Response, error) {
446-
switch transport.RoundTripCallCount() {
447-
case 1:
448-
return nil, io.EOF
449-
case 2:
450-
return &http.Response{StatusCode: http.StatusTeapot}, nil
451-
default:
452-
return nil, nil
453-
}
454-
}
455-
456-
retriableClassifier.ClassifyStub = classify
457-
req.Method = reqMethod
458-
req.Body = reqBody
459-
if !getBodyIsNil {
460-
req.GetBody = func() (io.ReadCloser, error) {
461-
return new(testBody), nil
462-
}
463-
}
464-
if headers != nil {
465-
for key, value := range headers {
466-
req.Header.Add(key, value)
467-
}
468-
}
469-
470-
res, err := proxyRoundTripper.RoundTrip(req)
471-
472-
if expectRetry {
473-
Expect(err).NotTo(HaveOccurred())
474-
Expect(transport.RoundTripCallCount()).To(Equal(2))
475-
Expect(retriableClassifier.ClassifyCallCount()).To(Equal(1))
476-
Expect(res.StatusCode).To(Equal(http.StatusTeapot))
477-
} else {
478-
Expect(errors.Is(err, io.EOF)).To(BeTrue())
479-
Expect(transport.RoundTripCallCount()).To(Equal(1))
480-
Expect(retriableClassifier.ClassifyCallCount()).To(Equal(1))
481-
}
482-
},
483-
484-
Entry("POST, body is empty: does not retry", nil, true, "POST", nil, fails.IdempotentRequestEOF, false),
485-
Entry("POST, body is not empty and GetBody is non-nil: does not retry", reqBody, false, "POST", nil, fails.IdempotentRequestEOF, false),
486-
Entry("POST, body is not empty: does not retry", reqBody, true, "POST", nil, fails.IdempotentRequestEOF, false),
487-
Entry("POST, body is http.NoBody: does not retry", http.NoBody, true, "POST", nil, fails.IdempotentRequestEOF, false),
488-
489-
Entry("POST, body is empty, X-Idempotency-Key header: attempts retry", nil, true, "POST", map[string]string{"X-Idempotency-Key": "abc123"}, fails.IncompleteRequest, true),
490-
Entry("POST, body is not empty and GetBody is non-nil, X-Idempotency-Key header: attempts retry", reqBody, false, "POST", map[string]string{"X-Idempotency-Key": "abc123"}, fails.IncompleteRequest, true),
491-
Entry("POST, body is not empty, X-Idempotency-Key header: does not retry", reqBody, true, "POST", map[string]string{"X-Idempotency-Key": "abc123"}, fails.IdempotentRequestEOF, false),
492-
Entry("POST, body is http.NoBody, X-Idempotency-Key header: does not retry", http.NoBody, true, "POST", map[string]string{"X-Idempotency-Key": "abc123"}, fails.IdempotentRequestEOF, false),
493-
494-
Entry("POST, body is empty, Idempotency-Key header: attempts retry", nil, true, "POST", map[string]string{"Idempotency-Key": "abc123"}, fails.IncompleteRequest, true),
495-
Entry("POST, body is not empty and GetBody is non-nil, Idempotency-Key header: attempts retry", reqBody, false, "POST", map[string]string{"Idempotency-Key": "abc123"}, fails.IncompleteRequest, true),
496-
Entry("POST, body is not empty, Idempotency-Key header: does not retry", reqBody, true, "POST", map[string]string{"Idempotency-Key": "abc123"}, fails.IdempotentRequestEOF, false),
497-
Entry("POST, body is http.NoBody, Idempotency-Key header: does not retry", http.NoBody, true, "POST", map[string]string{"Idempotency-Key": "abc123"}, fails.IdempotentRequestEOF, false),
498-
499-
Entry("GET, body is empty: attempts retry", nil, true, "GET", nil, fails.IncompleteRequest, true),
500-
Entry("GET, body is not empty and GetBody is non-nil: attempts retry", reqBody, false, "GET", nil, fails.IncompleteRequest, true),
501-
Entry("GET, body is not empty: does not retry", reqBody, true, "GET", nil, fails.IdempotentRequestEOF, false),
502-
Entry("GET, body is http.NoBody: does not retry", http.NoBody, true, "GET", nil, fails.IdempotentRequestEOF, false),
503-
504-
Entry("TRACE, body is empty: attempts retry", nil, true, "TRACE", nil, fails.IncompleteRequest, true),
505-
Entry("TRACE, body is not empty: does not retry", reqBody, true, "TRACE", nil, fails.IdempotentRequestEOF, false),
506-
Entry("TRACE, body is http.NoBody: does not retry", http.NoBody, true, "TRACE", nil, fails.IdempotentRequestEOF, false),
507-
Entry("TRACE, body is not empty and GetBody is non-nil: attempts retry", reqBody, false, "TRACE", nil, fails.IncompleteRequest, true),
508-
509-
Entry("HEAD, body is empty: attempts retry", nil, true, "HEAD", nil, fails.IncompleteRequest, true),
510-
Entry("HEAD, body is not empty: does not retry", reqBody, true, "HEAD", nil, fails.IdempotentRequestEOF, false),
511-
Entry("HEAD, body is http.NoBody: does not retry", http.NoBody, true, "HEAD", nil, fails.IdempotentRequestEOF, false),
512-
Entry("HEAD, body is not empty and GetBody is non-nil: attempts retry", reqBody, false, "HEAD", nil, fails.IncompleteRequest, true),
513-
514-
Entry("OPTIONS, body is empty: attempts retry", nil, true, "OPTIONS", nil, fails.IncompleteRequest, true),
515-
Entry("OPTIONS, body is not empty and GetBody is non-nil: attempts retry", reqBody, false, "OPTIONS", nil, fails.IncompleteRequest, true),
516-
Entry("OPTIONS, body is not empty: does not retry", reqBody, true, "OPTIONS", nil, fails.IdempotentRequestEOF, false),
517-
Entry("OPTIONS, body is http.NoBody: does not retry", http.NoBody, true, "OPTIONS", nil, fails.IdempotentRequestEOF, false),
518-
519-
Entry("<empty method>, body is empty: attempts retry", nil, true, "", nil, fails.IncompleteRequest, true),
520-
Entry("<empty method>, body is not empty and GetBody is non-nil: attempts retry", reqBody, false, "", nil, fails.IncompleteRequest, true),
521-
Entry("<empty method>, body is not empty: does not retry", reqBody, true, "", nil, fails.IdempotentRequestEOF, false),
522-
Entry("<empty method>, body is http.NoBody: does not retry", http.NoBody, true, "", nil, fails.IdempotentRequestEOF, false),
523-
)
524-
525435
Context("when there are no more endpoints available", func() {
526436
BeforeEach(func() {
527437
removed := routePool.Remove(endpoint)

0 commit comments

Comments
 (0)