Skip to content

Commit f2055e7

Browse files
authored
Merge pull request #387 from rohanpm/retry-rehttp
Implement retries at http RoundTripper level [RHELDST-22079]
2 parents 0915791 + 35495dc commit f2055e7

13 files changed

+236
-3
lines changed

README.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -230,6 +230,12 @@ gwpollinterval: 5000
230230
# When adding items onto an exodus-gw publish, what is the maximum number of
231231
# items we'll include in a single HTTP request.
232232
gwbatchsize: 10000
233+
234+
# How many times to retry failing HTTP requests.
235+
gwmaxattempts: 3
236+
237+
# Maximum duration (in milliseconds) between retries of HTTP requests.
238+
gwmaxbackoff: 20000
233239
```
234240
235241
In order to publish to exodus CDN it is necessary to configure all of the

go.mod

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ require (
1616
)
1717

1818
require (
19+
github.com/PuerkitoBio/rehttp v1.3.0 // indirect
1920
github.com/davecgh/go-spew v1.1.1 // indirect
2021
github.com/fatih/color v1.16.0 // indirect
2122
github.com/go-playground/locales v0.14.1 // indirect

go.sum

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
github.com/PuerkitoBio/rehttp v1.3.0 h1:w54Pb72MQn2eJrSdPsvGqXlAfiK1+NMTGDrOJJ4YvSU=
2+
github.com/PuerkitoBio/rehttp v1.3.0/go.mod h1:LUwKPoDbDIA2RL5wYZCNsQ90cx4OJ4AWBmq6KzWZL1s=
13
github.com/adrg/xdg v0.4.0 h1:RzRqFcjH4nE5C6oTAxhBtoE2IRyjBSa62SCbyPidvls=
24
github.com/adrg/xdg v0.4.0/go.mod h1:N6ag73EX4wyxeaoeHctc1mas01KZgsj5tYiAIwqJE/E=
35
github.com/alecthomas/assert/v2 v2.1.0 h1:tbredtNcQnoSd3QBhQWI7QZ3XHOVkw1Moklp2ojoH/0=
@@ -12,12 +14,15 @@ github.com/aphistic/sweet v0.2.0/go.mod h1:fWDlIh/isSE9n6EPsRmC0det+whmX6dJid3st
1214
github.com/aws/aws-sdk-go v1.20.6/go.mod h1:KmX6BPdI08NWTb3/sm4ZGu5ShLoqVDhKgpiN924inxo=
1315
github.com/aws/aws-sdk-go v1.48.3 h1:btYjT+opVFxUbRz+qSCjJe07cdX82BHmMX/FXYmoL7g=
1416
github.com/aws/aws-sdk-go v1.48.3/go.mod h1:LF8svs817+Nz+DmiMQKTO3ubZ/6IaTpq3TjupRn3Eqk=
17+
github.com/aybabtme/iocontrol v0.0.0-20150809002002-ad15bcfc95a0/go.mod h1:6L7zgvqo0idzI7IO8de6ZC051AfXb5ipkIJ7bIA2tGA=
1518
github.com/aybabtme/rgbterm v0.0.0-20170906152045-cc83f3b3ce59/go.mod h1:q/89r3U2H7sSsE2t6Kca0lfwTK8JdoNGS/yzM/4iH5I=
19+
github.com/benbjohnson/clock v1.1.0/go.mod h1:J11/hYXuz8f4ySSvYwY0FKfm+ezbsZBKZxNJlLklBHA=
1620
github.com/coreos/go-systemd/v22 v22.5.0 h1:RrqgGjYQKalulkV8NGVIfkXQf6YYmOyiJKk8iXXhfZs=
1721
github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc=
1822
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
1923
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
2024
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
25+
github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk=
2126
github.com/fatih/color v1.7.0/go.mod h1:Zm6kSWBoL9eyXnKyktHP6abPY2pDugNf5KwzbycvMj4=
2227
github.com/fatih/color v1.16.0 h1:zmkK9Ngbjj+K0yRhTVONQh1p/HknKYSlNT+vZCzyokM=
2328
github.com/fatih/color v1.16.0/go.mod h1:fL2Sau1YI5c0pdGEVCbKQbLXB6edEj1ZgiY4NijnWvE=
@@ -105,6 +110,7 @@ golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73r
105110
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
106111
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
107112
golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96bSt6lcn1PtDYWL6XObtHCRCNQM=
113+
golang.org/x/net v0.0.0-20210510120150-4163338589ed/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
108114
golang.org/x/net v0.18.0 h1:mIYleuAkSbHh0tCv7RvjL3F6ZVbLjq4+R7zbOn3Kokg=
109115
golang.org/x/net v0.18.0/go.mod h1:/czyP5RqHAH4odGYxBJ1qz0+CE5WZ+2j1YgoEo8F2jQ=
110116
golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
@@ -116,6 +122,7 @@ golang.org/x/sys v0.0.0-20190222072716-a9d3bda3a223/go.mod h1:STP8DvDyc/dI5b8T5h
116122
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
117123
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
118124
golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
125+
golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
119126
golang.org/x/sys v0.0.0-20210510120138-977fb7262007/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
120127
golang.org/x/sys v0.0.0-20211025201205-69cdffdb9359/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
121128
golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
@@ -126,6 +133,7 @@ golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9sn
126133
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
127134
golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
128135
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
136+
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
129137
golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ=
130138
golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
131139
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=

internal/conf/conf.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,12 @@ type Config interface {
4343
// Commit mode for publishes.
4444
GwCommit() string
4545

46+
// Maximum attempts for any HTTP request to exodus-gw.
47+
GwMaxAttempts() int
48+
49+
// Maximum backoff between retried HTTP requests, in milliseconds.
50+
GwMaxBackoff() int
51+
4652
// Execution mode for rsync.
4753
RsyncMode() string
4854

internal/conf/conf_test.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,8 @@ environments:
4141
gwkey: override-key
4242
gwpollinterval: 123
4343
gwcommit: cba
44+
gwmaxattempts: 50
45+
gwmaxbackoff: 60
4446
rsyncmode: mixed
4547
strip: dest:/foo/bar
4648
uploadthreads: 6
@@ -100,6 +102,8 @@ environments:
100102
assertEqual("global gwenv", cfg.GwEnv(), "global-env")
101103
assertEqual("global gwpollinterval", cfg.GwPollInterval(), 5000)
102104
assertEqual("global gwcommit", cfg.GwCommit(), "abc")
105+
assertEqual("global gwmaxattempts", cfg.GwMaxAttempts(), 3)
106+
assertEqual("global gwmaxbackoff", cfg.GwMaxBackoff(), 20000)
103107
assertEqual("global rsyncmode", cfg.RsyncMode(), "exodus")
104108
assertEqual("global strip", cfg.Strip(), "dest:/foo")
105109
assertEqual("global uploadthreads", cfg.UploadThreads(), 4)
@@ -109,6 +113,8 @@ environments:
109113
assertEqual("env gwkey", env.GwKey(), "override-key")
110114
assertEqual("env gwpollinterval", env.GwPollInterval(), 123)
111115
assertEqual("env gwcommit", env.GwCommit(), "cba")
116+
assertEqual("env gwmaxattempts", env.GwMaxAttempts(), 50)
117+
assertEqual("env gwmaxbackoff", env.GwMaxBackoff(), 60)
112118
assertEqual("env rsyncmode", env.RsyncMode(), "mixed")
113119
assertEqual("env strip", env.Strip(), "dest:/foo/bar")
114120
assertEqual("env uploadthreads", env.UploadThreads(), 6)

internal/conf/mock.go

Lines changed: 84 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

internal/conf/structs.go

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@ type sharedConfig struct {
1515
GwPollIntervalRaw int `yaml:"gwpollinterval"`
1616
GwBatchSizeRaw int `yaml:"gwbatchsize"`
1717
GwCommitRaw string `yaml:"gwcommit"`
18+
GwMaxAttemptsRaw int `yaml:"gwmaxattempts"`
19+
GwMaxBackoffRaw int `yaml:"gwmaxbackoff"`
1820
RsyncModeRaw string `yaml:"rsyncmode"`
1921
LogLevelRaw string `yaml:"loglevel"`
2022
LoggerRaw string `yaml:"logger"`
@@ -78,6 +80,14 @@ func (g *globalConfig) GwCommit() string {
7880
return g.GwCommitRaw
7981
}
8082

83+
func (g *globalConfig) GwMaxAttempts() int {
84+
return nonEmptyInt(g.GwMaxAttemptsRaw, 3)
85+
}
86+
87+
func (g *globalConfig) GwMaxBackoff() int {
88+
return nonEmptyInt(g.GwMaxBackoffRaw, 20000)
89+
}
90+
8191
func (g *globalConfig) UploadThreads() int {
8292
return nonEmptyInt(g.UploadThreadsRaw, 4)
8393
}
@@ -148,6 +158,14 @@ func (e *environment) GwCommit() string {
148158
return nonEmptyString(e.GwCommitRaw, e.parent.GwCommit())
149159
}
150160

161+
func (e *environment) GwMaxAttempts() int {
162+
return nonEmptyInt(e.GwMaxAttemptsRaw, e.parent.GwMaxAttempts())
163+
}
164+
165+
func (e *environment) GwMaxBackoff() int {
166+
return nonEmptyInt(e.GwMaxBackoffRaw, e.parent.GwMaxBackoff())
167+
}
168+
151169
func (e *environment) RsyncMode() string {
152170
return nonEmptyString(e.RsyncModeRaw, e.parent.RsyncMode())
153171
}

internal/diag/diag.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,8 @@ func logConfig(ctx context.Context, cfg conf.Config) {
6464
"gwenv", cfg.GwEnv(),
6565
"gwpollinterval", cfg.GwPollInterval(),
6666
"gwbatchsize", cfg.GwBatchSize(),
67+
"gwmaxattempts", cfg.GwMaxAttempts(),
68+
"gwmaxbackoff", cfg.GwMaxBackoff(),
6769
).Warn("exodus-gw")
6870

6971
logger.F(

internal/diag/diag_test.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@ func mockConfig(ctrl *gomock.Controller) conf.Config {
2424
e.GwEnv().Return("test-env").AnyTimes()
2525
e.GwPollInterval().Return(123).AnyTimes()
2626
e.GwBatchSize().Return(234).AnyTimes()
27+
e.GwMaxAttempts().Return(345).AnyTimes()
28+
e.GwMaxBackoff().Return(456).AnyTimes()
2729
e.RsyncMode().Return("mixed").AnyTimes()
2830
e.LogLevel().Return("debug").AnyTimes()
2931
e.Logger().Return("syslog").AnyTimes()

internal/gw/client.go

Lines changed: 70 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,9 @@ import (
1111
"os"
1212
"runtime"
1313
"sync"
14+
"time"
1415

16+
"github.com/PuerkitoBio/rehttp"
1517
"github.com/aws/aws-sdk-go/aws"
1618
"github.com/aws/aws-sdk-go/aws/awserr"
1719
"github.com/aws/aws-sdk-go/aws/credentials"
@@ -341,6 +343,64 @@ func (c *client) EnsureUploaded(
341343
return <-out
342344
}
343345

346+
func retryWithLogging(logger *log.Logger, fn rehttp.RetryFn) rehttp.RetryFn {
347+
// Wraps a rehttp.RetryFn to add warnings on retries.
348+
return func(attempt rehttp.Attempt) bool {
349+
willRetry := fn(attempt)
350+
status := "<none>"
351+
if attempt.Response != nil {
352+
// status is the HTTP response status and that
353+
// sometimes won't be present.
354+
status = attempt.Response.Status
355+
}
356+
357+
entry := logger.F(
358+
"url", attempt.Request.URL,
359+
"index", attempt.Index,
360+
"method", attempt.Request.Method,
361+
"status", status,
362+
"error", attempt.Error,
363+
)
364+
365+
if willRetry {
366+
entry.Warn("Retrying failed request")
367+
} else {
368+
// This is Debug because we get here even for successful
369+
// requests, so it's not normally worth logging.
370+
// But if we don't log at all, it's hard to find out which
371+
// errors are not getting retried in order to tune it.
372+
entry.Debug("Not retrying request")
373+
}
374+
375+
return willRetry
376+
}
377+
}
378+
379+
func retryTransport(ctx context.Context, cfg conf.Config, rt http.RoundTripper) http.RoundTripper {
380+
// Wrap a roundtripper with retries.
381+
logger := log.FromContext(ctx)
382+
383+
retryFn := rehttp.RetryAll(
384+
rehttp.RetryMaxRetries(cfg.GwMaxAttempts()),
385+
rehttp.RetryAny(
386+
rehttp.RetryStatuses(500, 502, 503, 504),
387+
rehttp.RetryTimeoutErr(),
388+
rehttp.RetryIsErr(func(err error) bool {
389+
return err == io.EOF
390+
}),
391+
),
392+
)
393+
retryFn = retryWithLogging(logger, retryFn)
394+
395+
return rehttp.NewTransport(rt,
396+
retryFn,
397+
rehttp.ExpJitterDelay(
398+
time.Duration(2)*time.Second,
399+
time.Duration(cfg.GwMaxBackoff())*time.Millisecond,
400+
),
401+
)
402+
}
403+
344404
func (impl) NewClient(ctx context.Context, cfg conf.Config) (Client, error) {
345405
cert, err := tls.LoadX509KeyPair(cfg.GwCert(), cfg.GwKey())
346406
if err != nil {
@@ -354,7 +414,15 @@ func (impl) NewClient(ctx context.Context, cfg conf.Config) (Client, error) {
354414
Certificates: []tls.Certificate{cert},
355415
},
356416
}
357-
out.httpClient = &http.Client{Transport: &transport}
417+
418+
// This client is passed into AWS SDK and it should not add any
419+
// retry logic because the AWS SDK already does that:
420+
s3HttpClient := &http.Client{Transport: &transport}
421+
422+
// This client is used outside of the AWS SDK (i.e. for requests
423+
// to "publish" API) and it should wrap the transport to enable
424+
// retries for certain types of error.
425+
out.httpClient = &http.Client{Transport: retryTransport(ctx, cfg, &transport)}
358426

359427
awsLogLevel := aws.LogOff
360428
if cfg.Verbosity() > 2 || cfg.LogLevel() == "trace" {
@@ -368,7 +436,7 @@ func (impl) NewClient(ctx context.Context, cfg conf.Config) (Client, error) {
368436
S3ForcePathStyle: aws.Bool(true),
369437
Region: aws.String("us-east-1"),
370438
Credentials: credentials.AnonymousCredentials,
371-
HTTPClient: out.httpClient,
439+
HTTPClient: s3HttpClient,
372440
Logger: log.FromContext(ctx),
373441
LogLevel: aws.LogLevel(awsLogLevel),
374442
},

internal/gw/client_publish_errors_test.go

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,28 @@ func TestClientPublishErrors(t *testing.T) {
119119
}
120120
})
121121

122+
t.Run("can recover by retrying", func(t *testing.T) {
123+
// Force an EOF, then a 500 error, and finally a successful
124+
// response. The caller should only see the success.
125+
gw.nextHTTPError = io.EOF
126+
gw.nextHTTPResponse = &http.Response{
127+
Status: "500 Internal Server Error",
128+
StatusCode: 500,
129+
Body: io.NopCloser(strings.NewReader("some error")),
130+
}
131+
132+
gw.publishes["some-id"] = &fakePublish{id: "some-id"}
133+
134+
p, err := clientIface.GetPublish(ctx, "some-id")
135+
if err != nil {
136+
t.Errorf("failed to get publish, err = %v", err)
137+
}
138+
id := p.ID()
139+
if id != "some-id" {
140+
t.Errorf("got unexpected id %s", id)
141+
}
142+
})
143+
122144
t.Run("missing link for commit", func(t *testing.T) {
123145
// Create a publish object directly without filling in any Links.
124146
publish := publish{client: clientIface.(*client)}

0 commit comments

Comments
 (0)