Skip to content

Commit a2b9d5b

Browse files
committed
fix: re-raise CancelledError during sleep to handle real task cancellation
If CancelledError occurs during backoff sleep, it's a real task cancellation (app shutdown, external task.cancel()), not a gRPC error. gRPC can't interrupt our sleep - only external cancellation can. This prevents the reconnector from swallowing legitimate cancellation requests.
1 parent f203133 commit a2b9d5b

File tree

8 files changed

+440
-14
lines changed

8 files changed

+440
-14
lines changed
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
#!/bin/sh -e
2+
3+
get_random_container() {
4+
# Get a list of all containers starting with ydb-database-*
5+
containers=$(docker ps --format '{{.Names}}' | grep '^ydb-database-')
6+
7+
# Convert the list to a newline-separated string
8+
containers=$(echo "$containers" | tr ' ' '\n')
9+
10+
# Count the number of containers
11+
containersCount=$(echo "$containers" | wc -l)
12+
13+
# Generate a random number between 0 and containersCount - 1
14+
randomIndex=$(shuf -i 0-$(($containersCount - 1)) -n 1)
15+
16+
# Get the container name at the random index
17+
nodeForChaos=$(echo "$containers" | sed -n "$(($randomIndex + 1))p")
18+
}
19+
20+
21+
sleep 20
22+
23+
echo "Start CHAOS YDB cluster!"
24+
25+
for i in $(seq 1 5)
26+
do
27+
28+
for j in $(seq 1 5)
29+
do
30+
echo "[$(date)]: docker stop/start iteration $i"
31+
32+
get_random_container
33+
34+
sh -c "docker stop ${nodeForChaos} -t 10"
35+
sh -c "docker start ${nodeForChaos}"
36+
37+
sleep 20
38+
done
39+
40+
for j in $(seq 1 3)
41+
do
42+
echo "[$(date)]: docker restart iteration $i"
43+
44+
get_random_container
45+
46+
sh -c "docker restart ${nodeForChaos} -t 0"
47+
48+
sleep 20
49+
done
50+
done
51+
52+
get_random_container
53+
54+
echo "[$(date)]: docker kill -s SIGKILL ${nodeForChaos}"
55+
56+
sh -c "docker kill -s SIGKILL ${nodeForChaos}"
Lines changed: 283 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,283 @@
1+
x-runtime: &runtime
2+
hostname: localhost
3+
platform: linux/amd64
4+
privileged: true
5+
network_mode: host
6+
7+
x-ydb-node: &ydb-node
8+
image: cr.yandex/crptqonuodf51kdj7a7d/ydb:24.4.4.12
9+
restart: always
10+
<<: *runtime
11+
volumes:
12+
- ./ydb.yaml:/opt/ydb/cfg/config.yaml
13+
14+
name: ydb
15+
16+
services:
17+
static-0:
18+
<<: *ydb-node
19+
container_name: ydb-static-0
20+
command:
21+
- /opt/ydb/bin/ydbd
22+
- server
23+
- --grpc-port
24+
- "2135"
25+
- --mon-port
26+
- "8765"
27+
- --ic-port
28+
- "19001"
29+
- --yaml-config
30+
- /opt/ydb/cfg/config.yaml
31+
- --node
32+
- static
33+
- --label
34+
- deployment=docker
35+
ports:
36+
- 2135:2135
37+
- 8765:8765
38+
- 19001:19001
39+
healthcheck:
40+
test: bash -c "exec 6<> /dev/tcp/localhost/2135"
41+
interval: 10s
42+
timeout: 1s
43+
retries: 3
44+
start_period: 30s
45+
46+
static-init:
47+
<<: *ydb-node
48+
restart: on-failure
49+
container_name: ydb-static-init
50+
command:
51+
- /opt/ydb/bin/ydbd
52+
- -s
53+
- grpc://localhost:2135
54+
- admin
55+
- blobstorage
56+
- config
57+
- init
58+
- --yaml-file
59+
- /opt/ydb/cfg/config.yaml
60+
depends_on:
61+
static-0:
62+
condition: service_healthy
63+
64+
tenant-init:
65+
<<: *ydb-node
66+
restart: on-failure
67+
container_name: ydb-tenant-init
68+
command:
69+
- /opt/ydb/bin/ydbd
70+
- -s
71+
- grpc://localhost:2135
72+
- admin
73+
- database
74+
- /Root/testdb
75+
- create
76+
- ssd:1
77+
depends_on:
78+
static-init:
79+
condition: service_completed_successfully
80+
81+
database-1:
82+
<<: *ydb-node
83+
container_name: ydb-database-1
84+
command:
85+
- /opt/ydb/bin/ydbd
86+
- server
87+
- --grpc-port
88+
- "2136"
89+
- --mon-port
90+
- "8766"
91+
- --ic-port
92+
- "19002"
93+
- --yaml-config
94+
- /opt/ydb/cfg/config.yaml
95+
- --tenant
96+
- /Root/testdb
97+
- --node-broker
98+
- grpc://localhost:2135
99+
- --label
100+
- deployment=docker
101+
ports:
102+
- 2136:2136
103+
- 8766:8766
104+
- 19002:19002
105+
healthcheck:
106+
test: bash -c "exec 6<> /dev/tcp/localhost/2136"
107+
interval: 10s
108+
timeout: 1s
109+
retries: 3
110+
start_period: 30s
111+
depends_on:
112+
static-0:
113+
condition: service_healthy
114+
static-init:
115+
condition: service_completed_successfully
116+
tenant-init:
117+
condition: service_completed_successfully
118+
119+
# database-2:
120+
# <<: *ydb-node
121+
# container_name: ydb-database-2
122+
# command:
123+
# - /opt/ydb/bin/ydbd
124+
# - server
125+
# - --grpc-port
126+
# - "2137"
127+
# - --mon-port
128+
# - "8767"
129+
# - --ic-port
130+
# - "19003"
131+
# - --yaml-config
132+
# - /opt/ydb/cfg/config.yaml
133+
# - --tenant
134+
# - /Root/testdb
135+
# - --node-broker
136+
# - grpc://localhost:2135
137+
# - --label
138+
# - deployment=docker
139+
# ports:
140+
# - 2137:2137
141+
# - 8767:8767
142+
# - 19003:19003
143+
# healthcheck:
144+
# test: bash -c "exec 6<> /dev/tcp/localhost/2137"
145+
# interval: 10s
146+
# timeout: 1s
147+
# retries: 3
148+
# start_period: 30s
149+
# depends_on:
150+
# static-0:
151+
# condition: service_healthy
152+
# static-init:
153+
# condition: service_completed_successfully
154+
# tenant-init:
155+
# condition: service_completed_successfully
156+
157+
# database-3:
158+
# <<: *ydb-node
159+
# container_name: ydb-database-3
160+
# command:
161+
# - /opt/ydb/bin/ydbd
162+
# - server
163+
# - --grpc-port
164+
# - "2138"
165+
# - --mon-port
166+
# - "8768"
167+
# - --ic-port
168+
# - "19004"
169+
# - --yaml-config
170+
# - /opt/ydb/cfg/config.yaml
171+
# - --tenant
172+
# - /Root/testdb
173+
# - --node-broker
174+
# - grpc://localhost:2135
175+
# - --label
176+
# - deployment=docker
177+
# ports:
178+
# - 2138:2138
179+
# - 8768:8768
180+
# - 19004:19004
181+
# healthcheck:
182+
# test: bash -c "exec 6<> /dev/tcp/localhost/2138"
183+
# interval: 10s
184+
# timeout: 1s
185+
# retries: 3
186+
# start_period: 30s
187+
# depends_on:
188+
# static-0:
189+
# condition: service_healthy
190+
# static-init:
191+
# condition: service_completed_successfully
192+
# tenant-init:
193+
# condition: service_completed_successfully
194+
195+
# database-4:
196+
# <<: *ydb-node
197+
# container_name: ydb-database-4
198+
# command:
199+
# - /opt/ydb/bin/ydbd
200+
# - server
201+
# - --grpc-port
202+
# - "2139"
203+
# - --mon-port
204+
# - "8769"
205+
# - --ic-port
206+
# - "19005"
207+
# - --yaml-config
208+
# - /opt/ydb/cfg/config.yaml
209+
# - --tenant
210+
# - /Root/testdb
211+
# - --node-broker
212+
# - grpc://localhost:2135
213+
# - --label
214+
# - deployment=docker
215+
# ports:
216+
# - 2139:2139
217+
# - 8769:8769
218+
# - 19005:19005
219+
# healthcheck:
220+
# test: bash -c "exec 6<> /dev/tcp/localhost/2139"
221+
# interval: 10s
222+
# timeout: 1s
223+
# retries: 3
224+
# start_period: 30s
225+
# depends_on:
226+
# static-0:
227+
# condition: service_healthy
228+
# static-init:
229+
# condition: service_completed_successfully
230+
# tenant-init:
231+
# condition: service_completed_successfully
232+
233+
# database-5:
234+
# <<: *ydb-node
235+
# container_name: ydb-database-5
236+
# command:
237+
# - /opt/ydb/bin/ydbd
238+
# - server
239+
# - --grpc-port
240+
# - "2140"
241+
# - --mon-port
242+
# - "8770"
243+
# - --ic-port
244+
# - "19006"
245+
# - --yaml-config
246+
# - /opt/ydb/cfg/config.yaml
247+
# - --tenant
248+
# - /Root/testdb
249+
# - --node-broker
250+
# - grpc://localhost:2135
251+
# - --label
252+
# - deployment=docker
253+
# ports:
254+
# - 2140:2140
255+
# - 8770:8770
256+
# - 19006:19006
257+
# healthcheck:
258+
# test: bash -c "exec 6<> /dev/tcp/localhost/2140"
259+
# interval: 10s
260+
# timeout: 1s
261+
# retries: 3
262+
# start_period: 30s
263+
# depends_on:
264+
# static-0:
265+
# condition: service_healthy
266+
# static-init:
267+
# condition: service_completed_successfully
268+
# tenant-init:
269+
# condition: service_completed_successfully
270+
271+
chaos:
272+
image: docker:latest
273+
restart: on-failure
274+
container_name: ydb-chaos
275+
<<: *runtime
276+
entrypoint: ["/bin/sh", "-c", "chmod +x /opt/ydb/chaos.sh && ls -la /opt/ydb && /opt/ydb/chaos.sh"]
277+
volumes:
278+
- ./chaos.sh:/opt/ydb/chaos.sh
279+
- ./ydb.yaml:/opt/ydb/cfg/config.yaml
280+
- /var/run/docker.sock:/var/run/docker.sock
281+
depends_on:
282+
static-0:
283+
condition: service_healthy

0 commit comments

Comments
 (0)