Skip to content

Commit 686752b

Browse files
authored
Discretionary single-node ClickHouse zones (#6800)
Automatically allocate one _single-node_ ClickHouse zone. This is almost completely independent of the ongoing multi-node ClickHouse Server/Keeper work, except that we try to respect the new `ClickhousePolicy::deploy_with_standalone` flag if present. Validated on `a4x2` by manually expunging the sole ClickHouse zone in the current blueprint and then regenerating.
1 parent b0639b0 commit 686752b

16 files changed

+535
-370
lines changed

common/src/policy.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,10 @@ pub const INTERNAL_DNS_REDUNDANCY: usize = 3;
4040
/// value.
4141
pub const RESERVED_INTERNAL_DNS_REDUNDANCY: usize = 5;
4242

43+
/// The amount of redundancy for single-node ClickHouse servers
44+
/// (*not* replicated aka multi-node clusters).
45+
pub const SINGLE_NODE_CLICKHOUSE_REDUNDANCY: usize = 1;
46+
4347
/// The amount of redundancy for clickhouse servers
4448
///
4549
/// Clickhouse servers contain lazily replicated data

nexus/reconfigurator/planning/src/blueprint_builder/builder.rs

Lines changed: 55 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ use nexus_types::inventory::Collection;
3939
use omicron_common::address::get_sled_address;
4040
use omicron_common::address::get_switch_zone_address;
4141
use omicron_common::address::ReservedRackSubnet;
42+
use omicron_common::address::CLICKHOUSE_HTTP_PORT;
4243
use omicron_common::address::CP_SERVICES_RESERVED_ADDRESSES;
4344
use omicron_common::address::DNS_HTTP_PORT;
4445
use omicron_common::address::DNS_PORT;
@@ -1248,6 +1249,58 @@ impl<'a> BlueprintBuilder<'a> {
12481249
Ok(EnsureMultiple::Changed { added: num_crdb_to_add, removed: 0 })
12491250
}
12501251

1252+
fn sled_add_zone_clickhouse(
1253+
&mut self,
1254+
sled_id: SledUuid,
1255+
) -> Result<Ensure, Error> {
1256+
let id = self.rng.zone_rng.next();
1257+
let underlay_address = self.sled_alloc_ip(sled_id)?;
1258+
let address =
1259+
SocketAddrV6::new(underlay_address, CLICKHOUSE_HTTP_PORT, 0, 0);
1260+
let pool_name =
1261+
self.sled_select_zpool(sled_id, ZoneKind::Clickhouse)?;
1262+
let zone_type =
1263+
BlueprintZoneType::Clickhouse(blueprint_zone_type::Clickhouse {
1264+
address,
1265+
dataset: OmicronZoneDataset { pool_name: pool_name.clone() },
1266+
});
1267+
1268+
let zone = BlueprintZoneConfig {
1269+
disposition: BlueprintZoneDisposition::InService,
1270+
id,
1271+
underlay_address,
1272+
filesystem_pool: Some(pool_name),
1273+
zone_type,
1274+
};
1275+
self.sled_add_zone(sled_id, zone)?;
1276+
Ok(Ensure::Added)
1277+
}
1278+
1279+
pub fn sled_ensure_zone_multiple_clickhouse(
1280+
&mut self,
1281+
sled_id: SledUuid,
1282+
desired_zone_count: usize,
1283+
) -> Result<EnsureMultiple, Error> {
1284+
// How many single-node ClickHouse zones do we want to add?
1285+
let count =
1286+
self.sled_num_running_zones_of_kind(sled_id, ZoneKind::Clickhouse);
1287+
let to_add = match desired_zone_count.checked_sub(count) {
1288+
Some(0) => return Ok(EnsureMultiple::NotNeeded),
1289+
Some(n) => n,
1290+
None => {
1291+
return Err(Error::Planner(anyhow!(
1292+
"removing a single-node ClickHouse zone not yet supported \
1293+
(sled {sled_id} has {count}; \
1294+
planner wants {desired_zone_count})"
1295+
)));
1296+
}
1297+
};
1298+
for _ in 0..to_add {
1299+
self.sled_add_zone_clickhouse(sled_id)?;
1300+
}
1301+
Ok(EnsureMultiple::Changed { added: to_add, removed: 0 })
1302+
}
1303+
12511304
pub fn sled_ensure_zone_multiple_clickhouse_server(
12521305
&mut self,
12531306
sled_id: SledUuid,
@@ -1275,8 +1328,8 @@ impl<'a> BlueprintBuilder<'a> {
12751328
let underlay_ip = self.sled_alloc_ip(sled_id)?;
12761329
let pool_name =
12771330
self.sled_select_zpool(sled_id, ZoneKind::ClickhouseServer)?;
1278-
let port = omicron_common::address::CLICKHOUSE_HTTP_PORT;
1279-
let address = SocketAddrV6::new(underlay_ip, port, 0, 0);
1331+
let address =
1332+
SocketAddrV6::new(underlay_ip, CLICKHOUSE_HTTP_PORT, 0, 0);
12801333
let zone_type = BlueprintZoneType::ClickhouseServer(
12811334
blueprint_zone_type::ClickhouseServer {
12821335
address,

nexus/reconfigurator/planning/src/example.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -285,6 +285,10 @@ impl ExampleSystemBuilder {
285285
vec![],
286286
)
287287
.unwrap();
288+
if i == 0 {
289+
let _ = builder
290+
.sled_ensure_zone_multiple_clickhouse(sled_id, 1);
291+
}
288292
let _ = builder
289293
.sled_ensure_zone_multiple_internal_dns(
290294
sled_id,

nexus/reconfigurator/planning/src/planner.rs

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -353,6 +353,7 @@ impl<'a> Planner<'a> {
353353

354354
for zone_kind in [
355355
DiscretionaryOmicronZone::BoundaryNtp,
356+
DiscretionaryOmicronZone::Clickhouse,
356357
DiscretionaryOmicronZone::ClickhouseKeeper,
357358
DiscretionaryOmicronZone::ClickhouseServer,
358359
DiscretionaryOmicronZone::CockroachDb,
@@ -434,6 +435,9 @@ impl<'a> Planner<'a> {
434435
DiscretionaryOmicronZone::BoundaryNtp => {
435436
self.input.target_boundary_ntp_zone_count()
436437
}
438+
DiscretionaryOmicronZone::Clickhouse => {
439+
self.input.target_clickhouse_zone_count()
440+
}
437441
DiscretionaryOmicronZone::ClickhouseKeeper => {
438442
self.input.target_clickhouse_keeper_zone_count()
439443
}
@@ -530,6 +534,12 @@ impl<'a> Planner<'a> {
530534
DiscretionaryOmicronZone::BoundaryNtp => self
531535
.blueprint
532536
.sled_promote_internal_ntp_to_boundary_ntp(sled_id)?,
537+
DiscretionaryOmicronZone::Clickhouse => {
538+
self.blueprint.sled_ensure_zone_multiple_clickhouse(
539+
sled_id,
540+
new_total_zone_count,
541+
)?
542+
}
533543
DiscretionaryOmicronZone::ClickhouseKeeper => {
534544
self.blueprint.sled_ensure_zone_multiple_clickhouse_keeper(
535545
sled_id,
@@ -2440,6 +2450,75 @@ mod test {
24402450
logctx.cleanup_successful();
24412451
}
24422452

2453+
/// Check that the planner can replace a single-node ClickHouse zone.
2454+
/// This is completely distinct from (and much simpler than) the replicated
2455+
/// (multi-node) case.
2456+
#[test]
2457+
fn test_single_node_clickhouse() {
2458+
static TEST_NAME: &str = "test_single_node_clickhouse";
2459+
let logctx = test_setup_log(TEST_NAME);
2460+
2461+
// Use our example system as a starting point.
2462+
let (collection, input, blueprint1) = example(&logctx.log, TEST_NAME);
2463+
2464+
// We should start with one ClickHouse zone. Find out which sled it's on.
2465+
let clickhouse_sleds = blueprint1
2466+
.all_omicron_zones(BlueprintZoneFilter::All)
2467+
.filter_map(|(sled, zone)| {
2468+
zone.zone_type.is_clickhouse().then(|| Some(sled))
2469+
})
2470+
.collect::<Vec<_>>();
2471+
assert_eq!(
2472+
clickhouse_sleds.len(),
2473+
1,
2474+
"can't find ClickHouse zone in initial blueprint"
2475+
);
2476+
let clickhouse_sled = clickhouse_sleds[0].expect("missing sled id");
2477+
2478+
// Expunge the sled hosting ClickHouse and re-plan. The planner should
2479+
// immediately replace the zone with one on another (non-expunged) sled.
2480+
let mut input_builder = input.into_builder();
2481+
input_builder
2482+
.sleds_mut()
2483+
.get_mut(&clickhouse_sled)
2484+
.expect("can't find sled")
2485+
.policy = SledPolicy::Expunged;
2486+
let input = input_builder.build();
2487+
let blueprint2 = Planner::new_based_on(
2488+
logctx.log.clone(),
2489+
&blueprint1,
2490+
&input,
2491+
"test_blueprint2",
2492+
&collection,
2493+
)
2494+
.expect("failed to create planner")
2495+
.with_rng_seed((TEST_NAME, "bp2"))
2496+
.plan()
2497+
.expect("failed to re-plan");
2498+
2499+
let diff = blueprint2.diff_since_blueprint(&blueprint1);
2500+
println!("1 -> 2 (expunged sled):\n{}", diff.display());
2501+
assert_eq!(
2502+
blueprint2
2503+
.all_omicron_zones(BlueprintZoneFilter::ShouldBeRunning)
2504+
.filter(|(sled, zone)| *sled != clickhouse_sled
2505+
&& zone.zone_type.is_clickhouse())
2506+
.count(),
2507+
1,
2508+
"can't find replacement ClickHouse zone"
2509+
);
2510+
2511+
// Test a no-op planning iteration.
2512+
assert_planning_makes_no_changes(
2513+
&logctx.log,
2514+
&blueprint2,
2515+
&input,
2516+
TEST_NAME,
2517+
);
2518+
2519+
logctx.cleanup_successful();
2520+
}
2521+
24432522
/// Deploy all keeper nodes server nodes at once for a new cluster.
24442523
/// Then add keeper nodes 1 at a time.
24452524
#[test]

nexus/reconfigurator/planning/src/planner/omicron_zone_placement.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ use std::mem;
1515
#[cfg_attr(test, derive(test_strategy::Arbitrary))]
1616
pub(crate) enum DiscretionaryOmicronZone {
1717
BoundaryNtp,
18+
Clickhouse,
1819
ClickhouseKeeper,
1920
ClickhouseServer,
2021
CockroachDb,
@@ -31,6 +32,7 @@ impl DiscretionaryOmicronZone {
3132
) -> Option<Self> {
3233
match zone_type {
3334
BlueprintZoneType::BoundaryNtp(_) => Some(Self::BoundaryNtp),
35+
BlueprintZoneType::Clickhouse(_) => Some(Self::Clickhouse),
3436
BlueprintZoneType::ClickhouseKeeper(_) => {
3537
Some(Self::ClickhouseKeeper)
3638
}
@@ -43,7 +45,6 @@ impl DiscretionaryOmicronZone {
4345
BlueprintZoneType::Nexus(_) => Some(Self::Nexus),
4446
BlueprintZoneType::Oximeter(_) => Some(Self::Oximeter),
4547
// Zones that we should place but don't yet.
46-
BlueprintZoneType::Clickhouse(_)
4748
| BlueprintZoneType::CruciblePantry(_)
4849
// Zones that get special handling for placement (all sleds get
4950
// them, although internal NTP has some interactions with boundary
@@ -58,6 +59,7 @@ impl From<DiscretionaryOmicronZone> for ZoneKind {
5859
fn from(zone: DiscretionaryOmicronZone) -> Self {
5960
match zone {
6061
DiscretionaryOmicronZone::BoundaryNtp => Self::BoundaryNtp,
62+
DiscretionaryOmicronZone::Clickhouse => Self::Clickhouse,
6163
DiscretionaryOmicronZone::ClickhouseKeeper => {
6264
Self::ClickhouseKeeper
6365
}

nexus/reconfigurator/planning/tests/output/blueprint_builder_initial_diff.txt

Lines changed: 29 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -24,17 +24,18 @@ to: blueprint e4aeb3b3-272f-4967-be34-2d34daa46aa1
2424
------------------------------------------------------------------------------------------
2525
zone type zone id disposition underlay IP
2626
------------------------------------------------------------------------------------------
27+
clickhouse 44afce85-3377-4b20-a398-517c1579df4d in service fd00:1122:3344:103::23
2728
crucible 38b047ea-e3de-4859-b8e0-70cac5871446 in service fd00:1122:3344:103::2c
2829
crucible 4644ea0c-0ec3-41be-a356-660308e1c3fc in service fd00:1122:3344:103::2b
2930
crucible 55f4d117-0b9d-4256-a2c0-f46d3ed5fff9 in service fd00:1122:3344:103::24
3031
crucible 5c6a4628-8831-483b-995f-79b9126c4d04 in service fd00:1122:3344:103::27
3132
crucible 6a01210c-45ed-41a5-9230-8e05ecf5dd8f in service fd00:1122:3344:103::28
32-
crucible 7004cab9-dfc0-43ba-92d3-58d4ced66025 in service fd00:1122:3344:103::23
3333
crucible 79552859-fbd3-43bb-a9d3-6baba25558f8 in service fd00:1122:3344:103::25
3434
crucible 90696819-9b53-485a-9c65-ca63602e843e in service fd00:1122:3344:103::26
3535
crucible c99525b3-3680-4df6-9214-2ee3e1020e8b in service fd00:1122:3344:103::29
3636
crucible f42959d3-9eef-4e3b-b404-6177ce3ec7a1 in service fd00:1122:3344:103::2a
37-
internal_dns 44afce85-3377-4b20-a398-517c1579df4d in service fd00:1122:3344:1::1
37+
crucible fb36b9dc-273a-4bc3-aaa9-19ee4d0ef552 in service fd00:1122:3344:103::2d
38+
internal_dns 7004cab9-dfc0-43ba-92d3-58d4ced66025 in service fd00:1122:3344:1::1
3839
internal_ntp c81c9d4a-36d7-4796-9151-f564d3735152 in service fd00:1122:3344:103::21
3940
nexus b2573120-9c91-4ed7-8b4f-a7bfe8dbc807 in service fd00:1122:3344:103::22
4041

@@ -61,19 +62,19 @@ to: blueprint e4aeb3b3-272f-4967-be34-2d34daa46aa1
6162
------------------------------------------------------------------------------------------
6263
zone type zone id disposition underlay IP
6364
------------------------------------------------------------------------------------------
64-
crucible 0faa9350-2c02-47c7-a0a6-9f4afd69152c in service fd00:1122:3344:101::2a
65-
crucible 29278a22-1ba1-4117-bfdb-39fcb9ae7fd1 in service fd00:1122:3344:101::2c
66-
crucible 5b44003e-1a3d-4152-b606-872c72efce0e in service fd00:1122:3344:101::23
67-
crucible 943fea7a-9458-4935-9dc7-01ee5cfe5a02 in service fd00:1122:3344:101::27
68-
crucible a5a0b7a9-37c9-4dbd-8393-ec7748ada3b0 in service fd00:1122:3344:101::29
69-
crucible aa25add8-60b0-4ace-ac60-15adcdd32d50 in service fd00:1122:3344:101::28
70-
crucible aac3ab51-9e2b-4605-9bf6-e3eb3681c2b5 in service fd00:1122:3344:101::2b
71-
crucible b6f2dd1e-7f98-4a68-9df2-b33c69d1f7ea in service fd00:1122:3344:101::25
72-
crucible dc22d470-dc46-436b-9750-25c8d7d369e2 in service fd00:1122:3344:101::24
73-
crucible f7e434f9-6d4a-476b-a9e2-48d6ee28a08e in service fd00:1122:3344:101::26
74-
internal_dns 95c3b6d1-2592-4252-b5c1-5d0faf3ce9c9 in service fd00:1122:3344:2::1
75-
internal_ntp fb36b9dc-273a-4bc3-aaa9-19ee4d0ef552 in service fd00:1122:3344:101::21
76-
nexus a9a6a974-8953-4783-b815-da46884f2c02 in service fd00:1122:3344:101::22
65+
crucible 0faa9350-2c02-47c7-a0a6-9f4afd69152c in service fd00:1122:3344:101::29
66+
crucible 29278a22-1ba1-4117-bfdb-39fcb9ae7fd1 in service fd00:1122:3344:101::2b
67+
crucible 943fea7a-9458-4935-9dc7-01ee5cfe5a02 in service fd00:1122:3344:101::26
68+
crucible 9b722fea-a186-4bc3-bc37-ce7f6de6a796 in service fd00:1122:3344:101::2c
69+
crucible a5a0b7a9-37c9-4dbd-8393-ec7748ada3b0 in service fd00:1122:3344:101::28
70+
crucible aa25add8-60b0-4ace-ac60-15adcdd32d50 in service fd00:1122:3344:101::27
71+
crucible aac3ab51-9e2b-4605-9bf6-e3eb3681c2b5 in service fd00:1122:3344:101::2a
72+
crucible b6f2dd1e-7f98-4a68-9df2-b33c69d1f7ea in service fd00:1122:3344:101::24
73+
crucible dc22d470-dc46-436b-9750-25c8d7d369e2 in service fd00:1122:3344:101::23
74+
crucible f7e434f9-6d4a-476b-a9e2-48d6ee28a08e in service fd00:1122:3344:101::25
75+
internal_dns 5b44003e-1a3d-4152-b606-872c72efce0e in service fd00:1122:3344:2::1
76+
internal_ntp a9a6a974-8953-4783-b815-da46884f2c02 in service fd00:1122:3344:101::21
77+
nexus 95c3b6d1-2592-4252-b5c1-5d0faf3ce9c9 in service fd00:1122:3344:101::22
7778

7879

7980
sled be7f4375-2a6b-457f-b1a4-3074a715e5fe (active):
@@ -98,19 +99,19 @@ to: blueprint e4aeb3b3-272f-4967-be34-2d34daa46aa1
9899
------------------------------------------------------------------------------------------
99100
zone type zone id disposition underlay IP
100101
------------------------------------------------------------------------------------------
101-
crucible 248db330-56e6-4c7e-b5ff-9cd6cbcb210a in service fd00:1122:3344:102::29
102-
crucible 353b0aff-4c71-4fae-a6bd-adcb1d2a1a1d in service fd00:1122:3344:102::26
103-
crucible 6a5901b1-f9d7-425c-8ecb-a786c900f217 in service fd00:1122:3344:102::24
104-
crucible b3583b5f-4a62-4471-9be7-41e61578de4c in service fd00:1122:3344:102::27
105-
crucible b97bdef5-ed14-4e11-9d3b-3379c18ea694 in service fd00:1122:3344:102::2c
106-
crucible bac92034-b9e6-4e8b-9ffb-dbba9caec88d in service fd00:1122:3344:102::25
107-
crucible c240ec8c-cec5-4117-944d-faeb5672d568 in service fd00:1122:3344:102::2b
108-
crucible cf766535-9b6f-4263-a83a-86f45f7b005b in service fd00:1122:3344:102::2a
109-
crucible d9653001-f671-4905-a410-6a7abc358318 in service fd00:1122:3344:102::28
110-
crucible edaca77e-5806-446a-b00c-125962cd551d in service fd00:1122:3344:102::23
111-
internal_dns 65d03287-e43f-45f4-902e-0a5e4638f31a in service fd00:1122:3344:3::1
112-
internal_ntp 9b722fea-a186-4bc3-bc37-ce7f6de6a796 in service fd00:1122:3344:102::21
113-
nexus 4330134c-41b9-4097-aa0b-3eaefa06d473 in service fd00:1122:3344:102::22
102+
crucible 248db330-56e6-4c7e-b5ff-9cd6cbcb210a in service fd00:1122:3344:102::28
103+
crucible 353b0aff-4c71-4fae-a6bd-adcb1d2a1a1d in service fd00:1122:3344:102::25
104+
crucible 6a5901b1-f9d7-425c-8ecb-a786c900f217 in service fd00:1122:3344:102::23
105+
crucible b3583b5f-4a62-4471-9be7-41e61578de4c in service fd00:1122:3344:102::26
106+
crucible b7bf29a5-ef5f-4942-a3be-e943f7e6be80 in service fd00:1122:3344:102::2c
107+
crucible b97bdef5-ed14-4e11-9d3b-3379c18ea694 in service fd00:1122:3344:102::2b
108+
crucible bac92034-b9e6-4e8b-9ffb-dbba9caec88d in service fd00:1122:3344:102::24
109+
crucible c240ec8c-cec5-4117-944d-faeb5672d568 in service fd00:1122:3344:102::2a
110+
crucible cf766535-9b6f-4263-a83a-86f45f7b005b in service fd00:1122:3344:102::29
111+
crucible d9653001-f671-4905-a410-6a7abc358318 in service fd00:1122:3344:102::27
112+
internal_dns edaca77e-5806-446a-b00c-125962cd551d in service fd00:1122:3344:3::1
113+
internal_ntp 4330134c-41b9-4097-aa0b-3eaefa06d473 in service fd00:1122:3344:102::21
114+
nexus 65d03287-e43f-45f4-902e-0a5e4638f31a in service fd00:1122:3344:102::22
114115

115116

116117
COCKROACHDB SETTINGS:

0 commit comments

Comments
 (0)