Skip to content

Commit d11c45d

Browse files
authored
Merge branch 'etcd-io:main' into migrate-experimental-snapshot-catch-up-entries
2 parents 9a9ae8c + 8c263d2 commit d11c45d

File tree

5 files changed

+80
-25
lines changed

5 files changed

+80
-25
lines changed

Diff for: tests/robustness/README.md

+16-13
Original file line numberDiff line numberDiff line change
@@ -8,19 +8,20 @@ The purpose of these tests is to rigorously validate that etcd maintains its [KV
88

99
## Robustness track record
1010

11-
| Correctness / Consistency issue | Report | Introduced in | Discovered by | Reproducible by robustness test | Command |
12-
|-----------------------------------------------------------------|----------|-----------------|---------------|-------------------------------------------------|-----------------------------------|
13-
| Inconsistent revision caused by crash during high load [#13766] | Mar 2022 | v3.5 | User | Yes, report preceded robustness tests | `make test-robustness-issue13766` |
14-
| Single node cluster can loose a write on crash [#14370] | Aug 2022 | v3.4 or earlier | User | Yes, report preceded robustness tests | `make test-robustness-issue14370` |
15-
| Enabling auth can lead to inconsistency [#14571] | Oct 2022 | v3.4 or earlier | User | No, authorization is not covered. | |
16-
| Inconsistent revision caused by crash during defrag [#14685] | Nov 2022 | v3.5 | Robustness | Yes, after covering defragmentation. | `make test-robustness-issue14685` |
17-
| Watch progress notification not synced with steam [#15220] | Jan 2023 | v3.4 or earlier | User | Yes, after covering watch progress notification | |
18-
| Watch traveling back in time after network partition [#15271] | Feb 2023 | v3.4 or earlier | Robustness | Yes, after covering network partitions | `make test-robustness-issue15271` |
19-
| Duplicated watch event due to bug in TXN caching [#17247] | Jan 2024 | main branch | Robustness | Yes, prevented regression in v3.6 | |
20-
| Watch events lost during stream starvation [#17529] | Mar 2024 | v3.4 or earlier | User | Yes, after covering of slow watch | `make test-robustness-issue17529` |
21-
| Revision decreasing caused by crash during compaction [#17780] | Apr 2024 | v3.4 or earlier | Robustness | Yes, after covering compaction | |
22-
| Watch dropping an event when compacting on delete [#18089] | May 2024 | v3.4 or earlier | Robustness | Yes, after covering of compaction | `make test-robustness-issue18089` |
23-
| Inconsistency when reading compacted revision in TXN [#18667] | Oct 2024 | v3.4 or earlier | User | | |
11+
| Correctness / Consistency issue | Report | Introduced in | Discovered by | Reproducible by robustness test | Command |
12+
| ----------------------------------------------------------------- | ---------- | ----------------- | --------------- | ------------------------------------------------- | ----------------------------------- |
13+
| Inconsistent revision caused by crash during high load [#13766] | Mar 2022 | v3.5 | User | Yes, report preceded robustness tests | `make test-robustness-issue13766` |
14+
| Single node cluster can loose a write on crash [#14370] | Aug 2022 | v3.4 or earlier | User | Yes, report preceded robustness tests | `make test-robustness-issue14370` |
15+
| Enabling auth can lead to inconsistency [#14571] | Oct 2022 | v3.4 or earlier | User | No, authorization is not covered. | |
16+
| Inconsistent revision caused by crash during defrag [#14685] | Nov 2022 | v3.5 | Robustness | Yes, after covering defragmentation. | `make test-robustness-issue14685` |
17+
| Watch progress notification not synced with steam [#15220] | Jan 2023 | v3.4 or earlier | User | Yes, after covering watch progress notification | |
18+
| Watch traveling back in time after network partition [#15271] | Feb 2023 | v3.4 or earlier | Robustness | Yes, after covering network partitions | `make test-robustness-issue15271` |
19+
| Duplicated watch event due to bug in TXN caching [#17247] | Jan 2024 | main branch | Robustness | Yes, prevented regression in v3.6 | |
20+
| Watch events lost during stream starvation [#17529] | Mar 2024 | v3.4 or earlier | User | Yes, after covering of slow watch | `make test-robustness-issue17529` |
21+
| Revision decreasing caused by crash during compaction [#17780] | Apr 2024 | v3.4 or earlier | Robustness | Yes, after covering compaction | |
22+
| Watch dropping an event when compacting on delete [#18089] | May 2024 | v3.4 or earlier | Robustness | Yes, after covering of compaction | `make test-robustness-issue18089` |
23+
| Inconsistency when reading compacted revision in TXN [#18667] | Oct 2024 | v3.4 or earlier | User | | |
24+
| Missing delete event on watch opened on same revision as compaction [#19179] | Jan 2025 | v3.4 or earlier | Robustness | Yes, after covering of compaction | `make test-robustness-issue19179` |
2425

2526
[#13766]: https://github.com/etcd-io/etcd/issues/13766
2627
[#14370]: https://github.com/etcd-io/etcd/issues/14370
@@ -33,6 +34,8 @@ The purpose of these tests is to rigorously validate that etcd maintains its [KV
3334
[#17780]: https://github.com/etcd-io/etcd/issues/17780
3435
[#18089]: https://github.com/etcd-io/etcd/issues/18089
3536
[#18667]: https://github.com/etcd-io/etcd/issues/18667
37+
[#19179]: https://github.com/etcd-io/etcd/issues/19179
38+
3639

3740
## How Robustness Tests Work
3841

Diff for: tests/robustness/makefile.mk

+5
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,11 @@ test-robustness-issue18089: /tmp/etcd-v3.5.12-beforeSendWatchResponse/bin
5454
GO_TEST_FLAGS='-v -run=TestRobustnessRegression/Issue18089 -count 100 -failfast --bin-dir=/tmp/etcd-v3.5.12-beforeSendWatchResponse/bin' make test-robustness && \
5555
echo "Failed to reproduce" || echo "Successful reproduction"
5656

57+
.PHONY: test-robustness-issue19179
58+
test-robustness-issue19179: /tmp/etcd-v3.5.17-failpoints/bin
59+
GO_TEST_FLAGS='-v -run=TestRobustnessRegression/Issue19179 -count 200 -failfast --bin-dir=/tmp/etcd-v3.5.17-failpoints/bin' make test-robustness && \
60+
echo "Failed to reproduce" || echo "Successful reproduction"
61+
5762
# Failpoints
5863

5964
GOPATH = $(shell go env GOPATH)

Diff for: tests/robustness/scenarios/scenarios.go

+31
Original file line numberDiff line numberDiff line change
@@ -224,6 +224,37 @@ func Regression(t *testing.T) []TestScenario {
224224
e2e.WithGoFailEnabled(true),
225225
),
226226
})
227+
228+
// NOTE:
229+
//
230+
// 1. All keys have only two revisions: creation and tombstone. With
231+
// a small compaction batch limit, it's easy to separate a key's two
232+
// revisions into different batch runs. If the compaction revision is a
233+
// tombstone and the creation revision was deleted in a previous
234+
// compaction run, we may encounter issue 19179.
235+
//
236+
// 2. It can be easily reproduced when using a lower QPS with a lower
237+
// burstable value. A higher QPS can generate more new keys than
238+
// expected, making it difficult to determine an optimal compaction
239+
// batch limit within a larger key space.
240+
scenarios = append(scenarios, TestScenario{
241+
Name: "Issue19179",
242+
Profile: traffic.Profile{
243+
MinimalQPS: 50,
244+
MaximalQPS: 100,
245+
BurstableQPS: 100,
246+
ClientCount: 8,
247+
MaxNonUniqueRequestConcurrency: 3,
248+
}.WithoutCompaction(),
249+
Failpoint: failpoint.BatchCompactBeforeSetFinishedCompactPanic,
250+
Traffic: traffic.KubernetesCreateDelete,
251+
Cluster: *e2e.NewConfig(
252+
e2e.WithClusterSize(1),
253+
e2e.WithExperimentalCompactionBatchLimit(50),
254+
e2e.WithSnapshotCount(1000),
255+
e2e.WithGoFailEnabled(true),
256+
),
257+
})
227258
scenarios = append(scenarios, TestScenario{
228259
Name: "Issue18089",
229260
Profile: traffic.LowTraffic.WithCompactionPeriod(100 * time.Millisecond), // Use frequent compaction for high reproduce rate

Diff for: tests/robustness/traffic/kubernetes.go

+24-11
Original file line numberDiff line numberDiff line change
@@ -35,17 +35,30 @@ import (
3535
"go.etcd.io/etcd/tests/v3/robustness/random"
3636
)
3737

38-
var Kubernetes Traffic = kubernetesTraffic{
39-
averageKeyCount: 10,
40-
resource: "pods",
41-
namespace: "default",
42-
// Please keep the sum of weights equal 100.
43-
writeChoices: []random.ChoiceWeight[KubernetesRequestType]{
44-
{Choice: KubernetesUpdate, Weight: 90},
45-
{Choice: KubernetesDelete, Weight: 5},
46-
{Choice: KubernetesCreate, Weight: 5},
47-
},
48-
}
38+
var (
39+
Kubernetes Traffic = kubernetesTraffic{
40+
averageKeyCount: 10,
41+
resource: "pods",
42+
namespace: "default",
43+
// Please keep the sum of weights equal 100.
44+
writeChoices: []random.ChoiceWeight[KubernetesRequestType]{
45+
{Choice: KubernetesUpdate, Weight: 90},
46+
{Choice: KubernetesDelete, Weight: 5},
47+
{Choice: KubernetesCreate, Weight: 5},
48+
},
49+
}
50+
51+
KubernetesCreateDelete Traffic = kubernetesTraffic{
52+
averageKeyCount: 10,
53+
resource: "pods",
54+
namespace: "default",
55+
// Please keep the sum of weights equal 100.
56+
writeChoices: []random.ChoiceWeight[KubernetesRequestType]{
57+
{Choice: KubernetesDelete, Weight: 40},
58+
{Choice: KubernetesCreate, Weight: 60},
59+
},
60+
}
61+
)
4962

5063
type kubernetesTraffic struct {
5164
averageKeyCount int

Diff for: tests/robustness/traffic/traffic.go

+4-1
Original file line numberDiff line numberDiff line change
@@ -41,12 +41,14 @@ var (
4141
LowTraffic = Profile{
4242
MinimalQPS: 100,
4343
MaximalQPS: 200,
44+
BurstableQPS: 1000,
4445
ClientCount: 8,
4546
MaxNonUniqueRequestConcurrency: 3,
4647
}
4748
HighTrafficProfile = Profile{
4849
MinimalQPS: 100,
4950
MaximalQPS: 1000,
51+
BurstableQPS: 1000,
5052
ClientCount: 8,
5153
MaxNonUniqueRequestConcurrency: 3,
5254
}
@@ -59,7 +61,7 @@ func SimulateTraffic(ctx context.Context, t *testing.T, lg *zap.Logger, clus *e2
5961
lm := identity.NewLeaseIDStorage()
6062
reports := []report.ClientReport{}
6163
// Use the highest MaximalQPS of all traffic profiles as burst otherwise actual traffic may be accidentally limited
62-
limiter := rate.NewLimiter(rate.Limit(profile.MaximalQPS), 1000)
64+
limiter := rate.NewLimiter(rate.Limit(profile.MaximalQPS), profile.BurstableQPS)
6365

6466
cc, err := client.NewRecordingClient(endpoints, ids, baseTime)
6567
require.NoError(t, err)
@@ -178,6 +180,7 @@ func (ts *trafficStats) QPS() float64 {
178180
type Profile struct {
179181
MinimalQPS float64
180182
MaximalQPS float64
183+
BurstableQPS int
181184
MaxNonUniqueRequestConcurrency int
182185
ClientCount int
183186
ForbidCompaction bool

0 commit comments

Comments
 (0)