Skip to content

Commit 5cc95fb

Browse files
author
Kubernetes Submit Queue
authored
Merge pull request kubernetes#53417 from crassirostris/audit-defaults
Automatic merge from submit-queue. If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>. Adjust defaults of audit webhook backends This PR: - increases the default buffer size to contain at lease on the order of magnitude audit events than it's possible to have simultaneous requests (500 AFAIR) - increase the default batch size. From our load tests .95 size of the log entry is under 2.5KB, therefore 400 entry will sum up to ~1MB request, which sounds reasonable - increase the initial backoff size. AFAIU, if the initial value is zero, all retries will be used in under 15 seconds (with 0.2 jitter and 1.5 factor), while the backend or a proxy can be unavailable for some reason for 30 seconds and more. - add throttling to the batching audit webhook A PR to make these parameters configurable will follow-up @hzxuzhonghu implemented throttling part of this PR ```release-note Adjust batching audit webhook default parameters: increase queue size, batch size, and initial backoff. Add throttling to the batching audit webhook. Default rate limit is 10 QPS. ``` /cc @sttts @tallclair @CaoShuFeng @ericchiang @piosz
2 parents 4f00d3a + 6bce120 commit 5cc95fb

File tree

3 files changed

+28
-7
lines changed

3 files changed

+28
-7
lines changed

staging/src/k8s.io/apiserver/Godeps/Godeps.json

Lines changed: 4 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

staging/src/k8s.io/apiserver/plugin/pkg/audit/webhook/BUILD

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,8 @@ go_library(
4141
"//vendor/k8s.io/apiserver/pkg/apis/audit/v1beta1:go_default_library",
4242
"//vendor/k8s.io/apiserver/pkg/audit:go_default_library",
4343
"//vendor/k8s.io/apiserver/pkg/util/webhook:go_default_library",
44+
"//vendor/k8s.io/client-go/rest:go_default_library",
45+
"//vendor/k8s.io/client-go/util/flowcontrol:go_default_library",
4446
],
4547
)
4648

staging/src/k8s.io/apiserver/plugin/pkg/audit/webhook/webhook.go

Lines changed: 22 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,8 @@ import (
3434
auditv1beta1 "k8s.io/apiserver/pkg/apis/audit/v1beta1"
3535
"k8s.io/apiserver/pkg/audit"
3636
"k8s.io/apiserver/pkg/util/webhook"
37+
"k8s.io/client-go/rest"
38+
"k8s.io/client-go/util/flowcontrol"
3739
)
3840

3941
const (
@@ -58,9 +60,13 @@ const (
5860
//
5961
// TODO(ericchiang): Make these value configurable. Maybe through a
6062
// kubeconfig extension?
61-
defaultBatchBufferSize = 1000 // Buffer up to 1000 events before blocking.
62-
defaultBatchMaxSize = 100 // Only send 100 events at a time.
63-
defaultBatchMaxWait = time.Minute // Send events at least once a minute.
63+
defaultBatchBufferSize = 10000 // Buffer up to 10000 events before starting discarding.
64+
defaultBatchMaxSize = 400 // Only send up to 400 events at a time.
65+
defaultBatchMaxWait = 30 * time.Second // Send events at least twice a minute.
66+
defaultInitialBackoff = 10 * time.Second // Wait at least 10 seconds before retrying.
67+
68+
defaultBatchThrottleQPS = 10 // Limit the send rate by 10 QPS.
69+
defaultBatchThrottleBurst = 15 // Allow up to 15 QPS burst.
6470
)
6571

6672
// The plugin name reported in error metrics.
@@ -100,7 +106,8 @@ func init() {
100106
}
101107

102108
func loadWebhook(configFile string, groupVersion schema.GroupVersion) (*webhook.GenericWebhook, error) {
103-
return webhook.NewGenericWebhook(registry, audit.Codecs, configFile, []schema.GroupVersion{groupVersion}, 0)
109+
return webhook.NewGenericWebhook(registry, audit.Codecs, configFile,
110+
[]schema.GroupVersion{groupVersion}, defaultInitialBackoff)
104111
}
105112

106113
func newBlockingWebhook(configFile string, groupVersion schema.GroupVersion) (*blockingBackend, error) {
@@ -151,6 +158,7 @@ func newBatchWebhook(configFile string, groupVersion schema.GroupVersion) (*batc
151158
maxBatchSize: defaultBatchMaxSize,
152159
maxBatchWait: defaultBatchMaxWait,
153160
shutdownCh: make(chan struct{}),
161+
throttle: flowcontrol.NewTokenBucketRateLimiter(defaultBatchThrottleQPS, defaultBatchThrottleBurst),
154162
}, nil
155163
}
156164

@@ -178,6 +186,9 @@ type batchBackend struct {
178186
// all requests have been completed and no new will be spawned, since the
179187
// sending routine is not running anymore.
180188
reqMutex sync.RWMutex
189+
190+
// Limits the number of requests sent to the backend per second.
191+
throttle flowcontrol.RateLimiter
181192
}
182193

183194
func (b *batchBackend) Run(stopCh <-chan struct{}) error {
@@ -303,6 +314,10 @@ func (b *batchBackend) sendBatchEvents(events []auditinternal.Event) {
303314

304315
list := auditinternal.EventList{Items: events}
305316

317+
if b.throttle != nil {
318+
b.throttle.Accept()
319+
}
320+
306321
// Locking reqMutex for read will guarantee that the shutdown process will
307322
// block until the goroutine started below is finished. At the same time, it
308323
// will not prevent other batches from being proceed further this point.
@@ -314,9 +329,9 @@ func (b *batchBackend) sendBatchEvents(events []auditinternal.Event) {
314329
defer b.reqMutex.RUnlock()
315330
defer runtime.HandleCrash()
316331

317-
err := webhook.WithExponentialBackoff(0, func() error {
318-
return b.w.RestClient.Post().Body(&list).Do().Error()
319-
})
332+
err := b.w.WithExponentialBackoff(func() rest.Result {
333+
return b.w.RestClient.Post().Body(&list).Do()
334+
}).Error()
320335
if err != nil {
321336
impacted := make([]*auditinternal.Event, len(events))
322337
for i := range events {

0 commit comments

Comments
 (0)