Skip to content

Commit 2c0eb17

Browse files
committed
drtprod: yaml config for drt clusters
All the YAML configurations and scripts for the drt clusters are present in this PR Fixes: #125381 Epic: None
1 parent bec01ef commit 2c0eb17

File tree

6 files changed

+417
-0
lines changed

6 files changed

+417
-0
lines changed
Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
# Yaml for creating and configuring the drt-chaos and workload-chaos clusters. This also configures the datadog.
2+
environment:
3+
ROACHPROD_GCE_DEFAULT_SERVICE_ACCOUNT: [email protected]
4+
ROACHPROD_DNS: drt.crdb.io
5+
ROACHPROD_GCE_DNS_DOMAIN: drt.crdb.io
6+
ROACHPROD_GCE_DNS_ZONE: drt
7+
ROACHPROD_GCE_DEFAULT_PROJECT: cockroach-drt
8+
CLUSTER: drt-chaos
9+
WORKLOAD_CLUSTER: workload-chaos
10+
11+
targets:
12+
- target_name: $CLUSTER
13+
steps:
14+
- command: create
15+
args:
16+
- $CLUSTER
17+
flags:
18+
clouds: gce
19+
gce-managed: true
20+
gce-enable-multiple-stores: true
21+
gce-zones: "us-east1-d,us-east1-b,us-east1-c"
22+
nodes: 6
23+
gce-machine-type: n2-standard-16
24+
local-ssd: true
25+
gce-local-ssd-count: 4
26+
username: drt
27+
lifetime: 8760h
28+
gce-image: "ubuntu-2204-jammy-v20240319"
29+
on_rollback:
30+
- command: destroy
31+
args:
32+
- $CLUSTER
33+
- command: sync
34+
flags:
35+
clouds: gce
36+
- command: stage
37+
args:
38+
- $CLUSTER
39+
- cockroach
40+
- script: "pkg/cmd/drtprod/configs/setup_datadog_cluster"
41+
- command: start
42+
args:
43+
- $CLUSTER
44+
- "--binary"
45+
- "./cockroach"
46+
flags:
47+
enable-fluent-sink: true
48+
restart: false
49+
sql-port: 26257
50+
on_rollback:
51+
- command: stop
52+
args:
53+
- $CLUSTER
54+
- command: run
55+
args:
56+
- $CLUSTER
57+
- --
58+
- "sudo systemctl unmask cron.service ; sudo systemctl enable cron.service ; echo \"crontab -l ; echo '@reboot sleep 100 && ~/cockroach.sh' | crontab -\" > t.sh ; sh t.sh ; rm t.sh"
59+
- target_name: $WORKLOAD_CLUSTER
60+
steps:
61+
- command: create
62+
args:
63+
- $WORKLOAD_CLUSTER
64+
flags:
65+
clouds: gce
66+
gce-zones: "us-east1-c"
67+
nodes: 1
68+
gce-machine-type: n2-standard-8
69+
os-volume-size: 100
70+
username: workload
71+
lifetime: 8760h
72+
on_rollback:
73+
- command: destroy
74+
args:
75+
- $WORKLOAD_CLUSTER
76+
- command: sync
77+
flags:
78+
clouds: gce
79+
- command: stage
80+
args:
81+
- $WORKLOAD_CLUSTER
82+
- cockroach
83+
- command: stage
84+
args:
85+
- $WORKLOAD_CLUSTER
86+
- workload
87+
- script: "pkg/cmd/drtprod/configs/setup_datadog_workload"
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
# Yaml for destroying the drt-chaos and workload-chaos clusters.
2+
environment:
3+
ROACHPROD_GCE_DEFAULT_SERVICE_ACCOUNT: [email protected]
4+
ROACHPROD_DNS: drt.crdb.io
5+
ROACHPROD_GCE_DNS_DOMAIN: drt.crdb.io
6+
ROACHPROD_GCE_DNS_ZONE: drt
7+
ROACHPROD_GCE_DEFAULT_PROJECT: cockroach-drt
8+
CLUSTER: drt-chaos
9+
WORKLOAD_CLUSTER: workload-chaos
10+
11+
targets:
12+
- target_name: $CLUSTER
13+
steps:
14+
- command: destroy
15+
args:
16+
- $CLUSTER
17+
- target_name: $WORKLOAD_CLUSTER
18+
steps:
19+
- command: destroy
20+
args:
21+
- $WORKLOAD_CLUSTER
Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
# Yaml for creating and configuring the drt-large and workload-large clusters. This also configures the datadog.
2+
environment:
3+
ROACHPROD_GCE_DEFAULT_SERVICE_ACCOUNT: [email protected]
4+
ROACHPROD_DNS: drt.crdb.io
5+
ROACHPROD_GCE_DNS_DOMAIN: drt.crdb.io
6+
ROACHPROD_GCE_DNS_ZONE: drt
7+
ROACHPROD_GCE_DEFAULT_PROJECT: cockroach-drt
8+
CLUSTER: drt-large
9+
WORKLOAD_CLUSTER: workload-large
10+
11+
targets:
12+
- target_name: $CLUSTER
13+
steps:
14+
- command: create
15+
args:
16+
- $CLUSTER
17+
flags:
18+
clouds: gce
19+
gce-managed: true
20+
gce-enable-multiple-stores: true
21+
gce-zones: "northamerica-northeast2-a:2,northamerica-northeast2-b:2,northamerica-northeast2-c:1,us-east5-a:2,us-east5-b:2,us-east5-c:1,us-east1-b:2,us-east1-c:2,us-east1-d:1"
22+
nodes: 15
23+
gce-machine-type: n2-standard-16
24+
local-ssd: true
25+
gce-local-ssd-count: 4
26+
os-volume-size: 100
27+
username: drt
28+
lifetime: 8760h
29+
on_rollback:
30+
- command: destroy
31+
args:
32+
- $CLUSTER
33+
- command: sync
34+
flags:
35+
clouds: gce
36+
- command: stage
37+
args:
38+
- $CLUSTER
39+
- cockroach
40+
- script: "pkg/cmd/drtprod/configs/setup_datadog_cluster"
41+
- command: start
42+
args:
43+
- $CLUSTER
44+
- "--binary"
45+
- "./cockroach"
46+
flags:
47+
enable-fluent-sink: true
48+
store-count: 4
49+
restart: false
50+
sql-port: 26257
51+
on_rollback:
52+
- command: stop
53+
args:
54+
- $CLUSTER
55+
- command: run
56+
args:
57+
- $CLUSTER
58+
- --
59+
- "sudo systemctl unmask cron.service ; sudo systemctl enable cron.service ; echo \"crontab -l ; echo '@reboot sleep 100 && ~/cockroach.sh' | crontab -\" > t.sh ; sh t.sh ; rm t.sh"
60+
- command: sql
61+
args:
62+
- $CLUSTER:1
63+
- --
64+
- -e
65+
- "ALTER RANGE timeseries CONFIGURE ZONE USING num_replicas=5,num_voters=5"
66+
- command: sql
67+
args:
68+
- $CLUSTER:1
69+
- --
70+
- -e
71+
- "ALTER RANGE default CONFIGURE ZONE USING num_replicas=5,num_voters=5"
72+
- target_name: $WORKLOAD_CLUSTER
73+
steps:
74+
- command: create
75+
args:
76+
- $WORKLOAD_CLUSTER
77+
flags:
78+
clouds: gce
79+
gce-zones: "northamerica-northeast2-a,us-east5-a,us-east1-b"
80+
nodes: 3
81+
gce-machine-type: n2d-standard-4
82+
os-volume-size: 100
83+
username: workload
84+
lifetime: 8760h
85+
on_rollback:
86+
- command: destroy
87+
args:
88+
- $WORKLOAD_CLUSTER
89+
- command: sync
90+
flags:
91+
clouds: gce
92+
- command: stage
93+
args:
94+
- $WORKLOAD_CLUSTER
95+
- cockroach
96+
- command: stage
97+
args:
98+
- $WORKLOAD_CLUSTER
99+
- workload
100+
- script: "pkg/cmd/drtprod/configs/setup_datadog_workload"
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
# Yaml for destroying the drt-large and workload-large clusters.
2+
environment:
3+
ROACHPROD_GCE_DEFAULT_SERVICE_ACCOUNT: [email protected]
4+
ROACHPROD_DNS: drt.crdb.io
5+
ROACHPROD_GCE_DNS_DOMAIN: drt.crdb.io
6+
ROACHPROD_GCE_DNS_ZONE: drt
7+
ROACHPROD_GCE_DEFAULT_PROJECT: cockroach-drt
8+
CLUSTER: drt-large
9+
WORKLOAD_CLUSTER: workload-large
10+
11+
targets:
12+
- target_name: $CLUSTER
13+
steps:
14+
- command: destroy
15+
args:
16+
- $CLUSTER
17+
- target_name: $WORKLOAD_CLUSTER
18+
steps:
19+
- command: destroy
20+
args:
21+
- $WORKLOAD_CLUSTER
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
#!/bin/bash
2+
3+
# Sets up datadog for the drt clusters.
4+
# NOTE - This uses CLUSTER environment variable, if not set the script fails
5+
6+
if [ -z "${CLUSTER}" ]; then
7+
echo "environment CLUSTER is not set"
8+
exit 1
9+
fi
10+
11+
# TODO - this command does not work. We need to replace this with the actual dd_api_key for the script to work
12+
13+
dd_api_key="$(gcloud --project=cockroach-drt secrets versions access latest --secret datadog-api-key)"
14+
15+
16+
if [ -z "${dd_api_key}" ]; then
17+
echo "Missing Datadog API key!"
18+
exit 1
19+
fi
20+
21+
dd_site="us5.datadoghq.com"
22+
23+
roachprod ssh $CLUSTER -- "sudo mkdir -p /etc/fluent-bit && sudo tee /etc/fluent-bit/config-override.yaml > /dev/null << EOF
24+
---
25+
pipeline:
26+
inputs:
27+
- name: tail
28+
path: /var/log/audit/audit.log
29+
tag: audit
30+
key: message
31+
storage.type: filesystem
32+
alias: audit
33+
outputs:
34+
- name: datadog
35+
match: audit
36+
host: http-intake.logs.${dd_site}
37+
tls: on
38+
compress: gzip
39+
apikey: ${dd_api_key}
40+
dd_source: audit
41+
dd_service: drt-cockroachdb
42+
dd_tags: env:development,cluster:${cluster%:*},service:drt-cockroachdb,team:drt
43+
alias: audit
44+
storage.total_limit_size: 25MB
45+
EOF"
46+
47+
roachprod ssh $CLUSTER -- "sudo tee /etc/profile.d/99-datadog.sh > /dev/null << EOF
48+
export DD_SITE=${dd_site}
49+
export DD_API_KEY=${dd_api_key}
50+
export DD_TAGS=env:development,cluster${CLUSTER%:*},team:drt,service:drt-cockroachdb
51+
EOF"
52+
53+
roachprod opentelemetry-start $CLUSTER \
54+
--datadog-api-key "${dd_api_key}" \
55+
--datadog-tags 'service:drt-cockroachdb,team:drt'
56+
57+
roachprod fluent-bit-start $CLUSTER \
58+
--datadog-api-key "${dd_api_key}" \
59+
--datadog-service drt-cockroachdb \
60+
--datadog-tags 'service:drt-cockroachdb,team:drt'
61+
62+
echo
63+
echo "Updated $CLUSTER configuration to send telemetry data to Datadog."
64+
echo
65+
echo "If this was the first time this script was run against $CLUSTER then"
66+
echo "CockroachDB must be restarted to reload its logging configuration."
67+
echo
68+
69+
exit 0

0 commit comments

Comments
 (0)