diff --git a/Documentation/Helm-Charts/operator-chart.md b/Documentation/Helm-Charts/operator-chart.md index 45f360ea1ffc..c6ae597217c4 100644 --- a/Documentation/Helm-Charts/operator-chart.md +++ b/Documentation/Helm-Charts/operator-chart.md @@ -136,6 +136,7 @@ The following table lists the configurable parameters of the rook-operator chart | `csi.topology.domainLabels` | domainLabels define which node labels to use as domains for CSI nodeplugins to advertise their domains | `nil` | | `csi.topology.enabled` | Enable topology based provisioning | `false` | | `currentNamespaceOnly` | Whether the operator should watch cluster CRD in its own namespace or not | `false` | +| `customHostnameLabel` | Custom label to identify node hostname. If not set `kubernetes.io/hostname` will be used | `nil` | | `disableDeviceHotplug` | Disable automatic orchestration when new devices are discovered. | `false` | | `discover.nodeAffinity` | The node labels for affinity of `discover-agent` [^1] | `nil` | | `discover.podLabels` | Labels to add to the discover pods | `nil` | diff --git a/Makefile b/Makefile index 64ee14c20e41..da2bbbd2a61f 100644 --- a/Makefile +++ b/Makefile @@ -161,9 +161,12 @@ fmt: ## Check formatting of go sources. yamllint: yamllint -c .yamllint deploy/examples/ --no-warnings +.PHONY: lint +lint: yamllint pylint shellcheck vet ## Run various linters + .PHONY: pylint pylint: - pylint $(shell find $(ROOT_DIR) -name '*.py') -E + pylint $(shell find $(ROOT_DIR) -name '*.py') -E .PHONY: shellcheck shellcheck: diff --git a/deploy/charts/rook-ceph/templates/deployment.yaml b/deploy/charts/rook-ceph/templates/deployment.yaml index 3df7bf610401..beaa8ceeaf09 100644 --- a/deploy/charts/rook-ceph/templates/deployment.yaml +++ b/deploy/charts/rook-ceph/templates/deployment.yaml @@ -88,6 +88,10 @@ spec: {{- else }} - name: ROOK_HOSTPATH_REQUIRES_PRIVILEGED value: "{{ .Values.hostpathRequiresPrivileged }}" +{{- end }} +{{- if .Values.customHostnameLabel }} + - name: ROOK_CUSTOM_HOSTNAME_LABEL + value: {{ .Values.customHostnameLabel }} {{- end }} - name: ROOK_DISABLE_DEVICE_HOTPLUG value: "{{ .Values.disableDeviceHotplug }}" diff --git a/deploy/charts/rook-ceph/values.yaml b/deploy/charts/rook-ceph/values.yaml index 2151064545d8..2122ef61f6b5 100644 --- a/deploy/charts/rook-ceph/values.yaml +++ b/deploy/charts/rook-ceph/values.yaml @@ -623,6 +623,9 @@ discover: # cpu: 100m # memory: 128Mi +# -- Custom label to identify node hostname. If not set `kubernetes.io/hostname` will be used +customHostnameLabel: + # -- Runs Ceph Pods as privileged to be able to write to `hostPaths` in OpenShift with SELinux restrictions. hostpathRequiresPrivileged: false diff --git a/deploy/examples/operator.yaml b/deploy/examples/operator.yaml index d9d9bfc13f94..b15b60e18898 100644 --- a/deploy/examples/operator.yaml +++ b/deploy/examples/operator.yaml @@ -646,6 +646,10 @@ spec: - name: ROOK_UNREACHABLE_NODE_TOLERATION_SECONDS value: "5" + # Custom label to identify node hostname. If not set `kubernetes.io/hostname` will be used + - name: ROOK_CUSTOM_HOSTNAME_LABEL + value: "" + # The name of the node to pass with the downward API - name: NODE_NAME valueFrom: diff --git a/pkg/operator/ceph/cluster/cleanup.go b/pkg/operator/ceph/cluster/cleanup.go index 9f7c2e0ef833..5961451d0eb4 100644 --- a/pkg/operator/ceph/cluster/cleanup.go +++ b/pkg/operator/ceph/cluster/cleanup.go @@ -75,7 +75,7 @@ func (c *ClusterController) startCleanUpJobs(cluster *cephv1.CephCluster, cephHo logger.Infof("starting clean up job on node %q", hostName) jobName := k8sutil.TruncateNodeNameForJob("cluster-cleanup-job-%s", hostName) podSpec := c.cleanUpJobTemplateSpec(cluster, monSecret, clusterFSID) - podSpec.Spec.NodeSelector = map[string]string{v1.LabelHostname: hostName} + podSpec.Spec.NodeSelector = map[string]string{k8sutil.LabelHostname(): hostName} labels := controller.AppLabels(CleanupAppName, cluster.Namespace) labels[CleanupAppName] = "true" job := &batch.Job{ diff --git a/pkg/operator/ceph/cluster/mgr/spec.go b/pkg/operator/ceph/cluster/mgr/spec.go index 2e002733e389..e82a6f23fa4e 100644 --- a/pkg/operator/ceph/cluster/mgr/spec.go +++ b/pkg/operator/ceph/cluster/mgr/spec.go @@ -90,7 +90,7 @@ func (c *Cluster) makeDeployment(mgrConfig *mgrConfig) (*apps.Deployment, error) mon.CephSecretVolume()) // Stretch the mgrs across hosts by default, or across a bigger failure domain for when zones are required like in case of stretched cluster - topologyKey := v1.LabelHostname + topologyKey := k8sutil.LabelHostname() if c.spec.ZonesRequired() { topologyKey = mon.GetFailureDomainLabel(c.spec) } diff --git a/pkg/operator/ceph/cluster/mon/mon.go b/pkg/operator/ceph/cluster/mon/mon.go index 8375163e8d5b..b0d2d94e705c 100644 --- a/pkg/operator/ceph/cluster/mon/mon.go +++ b/pkg/operator/ceph/cluster/mon/mon.go @@ -672,7 +672,7 @@ func scheduleMonitor(c *Cluster, mon *monConfig) (*apps.Deployment, error) { // setup affinity settings for pod scheduling p := c.getMonPlacement(mon.Zone) p.ApplyToPodSpec(&d.Spec.Template.Spec) - k8sutil.SetNodeAntiAffinityForPod(&d.Spec.Template.Spec, requiredDuringScheduling(&c.spec), v1.LabelHostname, + k8sutil.SetNodeAntiAffinityForPod(&d.Spec.Template.Spec, requiredDuringScheduling(&c.spec), k8sutil.LabelHostname(), map[string]string{k8sutil.AppAttr: AppName}, nil) // setup storage on the canary since scheduling will be affected when @@ -1412,10 +1412,15 @@ func (c *Cluster) startMon(m *monConfig, schedule *controller.MonScheduleInfo) e if m.UseHostNetwork || !pvcExists { p.PodAffinity = nil p.PodAntiAffinity = nil - k8sutil.SetNodeAntiAffinityForPod(&d.Spec.Template.Spec, requiredDuringScheduling(&c.spec), v1.LabelHostname, - map[string]string{k8sutil.AppAttr: AppName}, existingDeployment.Spec.Template.Spec.NodeSelector) + nodeSelector := existingDeployment.Spec.Template.Spec.NodeSelector + if schedule != nil && schedule.Hostname != "" { + // update nodeSelector in case if ROOK_CUSTOM_HOSTNAME_LABEL was changed: + nodeSelector = map[string]string{k8sutil.LabelHostname(): schedule.Hostname} + } + k8sutil.SetNodeAntiAffinityForPod(&d.Spec.Template.Spec, requiredDuringScheduling(&c.spec), k8sutil.LabelHostname(), + map[string]string{k8sutil.AppAttr: AppName}, nodeSelector) } else { - k8sutil.SetNodeAntiAffinityForPod(&d.Spec.Template.Spec, requiredDuringScheduling(&c.spec), v1.LabelHostname, + k8sutil.SetNodeAntiAffinityForPod(&d.Spec.Template.Spec, requiredDuringScheduling(&c.spec), k8sutil.LabelHostname(), map[string]string{k8sutil.AppAttr: AppName}, nil) } return c.updateMon(m, d) @@ -1445,9 +1450,9 @@ func (c *Cluster) startMon(m *monConfig, schedule *controller.MonScheduleInfo) e // Schedule the mon on a specific host if specified, or else allow it to be portable according to the PV p.PodAffinity = nil p.PodAntiAffinity = nil - nodeSelector = map[string]string{v1.LabelHostname: schedule.Hostname} + nodeSelector = map[string]string{k8sutil.LabelHostname(): schedule.Hostname} } - k8sutil.SetNodeAntiAffinityForPod(&d.Spec.Template.Spec, requiredDuringScheduling(&c.spec), v1.LabelHostname, + k8sutil.SetNodeAntiAffinityForPod(&d.Spec.Template.Spec, requiredDuringScheduling(&c.spec), k8sutil.LabelHostname(), map[string]string{k8sutil.AppAttr: AppName}, nodeSelector) logger.Debugf("Starting mon: %+v", d.Name) diff --git a/pkg/operator/ceph/cluster/mon/node.go b/pkg/operator/ceph/cluster/mon/node.go index 09c2d9eb4013..05864b927a1f 100644 --- a/pkg/operator/ceph/cluster/mon/node.go +++ b/pkg/operator/ceph/cluster/mon/node.go @@ -19,6 +19,7 @@ package mon import ( "github.com/pkg/errors" opcontroller "github.com/rook/rook/pkg/operator/ceph/controller" + "github.com/rook/rook/pkg/operator/k8sutil" v1 "k8s.io/api/core/v1" ) @@ -29,7 +30,7 @@ const ( func getNodeInfoFromNode(n v1.Node) (*opcontroller.MonScheduleInfo, error) { nr := &opcontroller.MonScheduleInfo{ Name: n.Name, - Hostname: n.Labels[v1.LabelHostname], + Hostname: n.Labels[k8sutil.LabelHostname()], } // If the host networking is setup such that a different IP should be used diff --git a/pkg/operator/ceph/cluster/nodedaemon/crash.go b/pkg/operator/ceph/cluster/nodedaemon/crash.go index a95bc046239b..027d98d380d7 100644 --- a/pkg/operator/ceph/cluster/nodedaemon/crash.go +++ b/pkg/operator/ceph/cluster/nodedaemon/crash.go @@ -44,9 +44,9 @@ const ( // createOrUpdateCephCrash is a wrapper around controllerutil.CreateOrUpdate func (r *ReconcileNode) createOrUpdateCephCrash(node corev1.Node, tolerations []corev1.Toleration, cephCluster cephv1.CephCluster, cephVersion *cephver.CephVersion) (controllerutil.OperationResult, error) { // Create or Update the deployment default/foo - nodeHostnameLabel, ok := node.ObjectMeta.Labels[corev1.LabelHostname] + nodeHostnameLabel, ok := node.ObjectMeta.Labels[k8sutil.LabelHostname()] if !ok { - return controllerutil.OperationResultNone, errors.Errorf("label key %q does not exist on node %q", corev1.LabelHostname, node.GetName()) + return controllerutil.OperationResultNone, errors.Errorf("label key %q does not exist on node %q", k8sutil.LabelHostname(), node.GetName()) } deploy := &appsv1.Deployment{ ObjectMeta: metav1.ObjectMeta{ @@ -66,21 +66,21 @@ func (r *ReconcileNode) createOrUpdateCephCrash(node corev1.Node, tolerations [] // labels for the pod, the deployment, and the deploymentSelector deploymentLabels := map[string]string{ - corev1.LabelHostname: nodeHostnameLabel, - k8sutil.AppAttr: CrashCollectorAppName, - NodeNameLabel: node.GetName(), + k8sutil.LabelHostname(): nodeHostnameLabel, + k8sutil.AppAttr: CrashCollectorAppName, + NodeNameLabel: node.GetName(), } deploymentLabels[config.CrashType] = "crash" deploymentLabels[controller.DaemonIDLabel] = "crash" deploymentLabels[k8sutil.ClusterAttr] = cephCluster.GetNamespace() selectorLabels := map[string]string{ - corev1.LabelHostname: nodeHostnameLabel, - k8sutil.AppAttr: CrashCollectorAppName, - NodeNameLabel: node.GetName(), + k8sutil.LabelHostname(): nodeHostnameLabel, + k8sutil.AppAttr: CrashCollectorAppName, + NodeNameLabel: node.GetName(), } - nodeSelector := map[string]string{corev1.LabelHostname: nodeHostnameLabel} + nodeSelector := map[string]string{k8sutil.LabelHostname(): nodeHostnameLabel} // Deployment selector is immutable so we set this value only if // a new object is going to be created diff --git a/pkg/operator/ceph/cluster/nodedaemon/exporter.go b/pkg/operator/ceph/cluster/nodedaemon/exporter.go index 4c30320ce385..cbd0495e50e3 100644 --- a/pkg/operator/ceph/cluster/nodedaemon/exporter.go +++ b/pkg/operator/ceph/cluster/nodedaemon/exporter.go @@ -67,9 +67,9 @@ func (r *ReconcileNode) createOrUpdateCephExporter(node corev1.Node, tolerations return controllerutil.OperationResultNone, nil } - nodeHostnameLabel, ok := node.Labels[corev1.LabelHostname] + nodeHostnameLabel, ok := node.Labels[k8sutil.LabelHostname()] if !ok { - return controllerutil.OperationResultNone, errors.Errorf("label key %q does not exist on node %q", corev1.LabelHostname, node.GetName()) + return controllerutil.OperationResultNone, errors.Errorf("label key %q does not exist on node %q", k8sutil.LabelHostname(), node.GetName()) } deploy := &appsv1.Deployment{ ObjectMeta: metav1.ObjectMeta{ @@ -91,20 +91,20 @@ func (r *ReconcileNode) createOrUpdateCephExporter(node corev1.Node, tolerations // labels for the pod, the deployment, and the deploymentSelector deploymentLabels := map[string]string{ - corev1.LabelHostname: nodeHostnameLabel, - k8sutil.AppAttr: cephExporterAppName, - NodeNameLabel: node.GetName(), + k8sutil.LabelHostname(): nodeHostnameLabel, + k8sutil.AppAttr: cephExporterAppName, + NodeNameLabel: node.GetName(), } deploymentLabels[controller.DaemonIDLabel] = "exporter" deploymentLabels[k8sutil.ClusterAttr] = cephCluster.GetNamespace() selectorLabels := map[string]string{ - corev1.LabelHostname: nodeHostnameLabel, - k8sutil.AppAttr: cephExporterAppName, - NodeNameLabel: node.GetName(), + k8sutil.LabelHostname(): nodeHostnameLabel, + k8sutil.AppAttr: cephExporterAppName, + NodeNameLabel: node.GetName(), } - nodeSelector := map[string]string{corev1.LabelHostname: nodeHostnameLabel} + nodeSelector := map[string]string{k8sutil.LabelHostname(): nodeHostnameLabel} // Deployment selector is immutable so we set this value only if // a new object is going to be created diff --git a/pkg/operator/ceph/cluster/osd/key_rotation.go b/pkg/operator/ceph/cluster/osd/key_rotation.go index db0aca375e80..5ad7afdcb88f 100644 --- a/pkg/operator/ceph/cluster/osd/key_rotation.go +++ b/pkg/operator/ceph/cluster/osd/key_rotation.go @@ -57,7 +57,7 @@ func applyKeyRotationPlacement(spec *v1.PodSpec, labels map[string]string) { LabelSelector: &metav1.LabelSelector{ MatchLabels: labels, }, - TopologyKey: v1.LabelHostname, + TopologyKey: k8sutil.LabelHostname(), }, }, } diff --git a/pkg/operator/ceph/cluster/osd/osd.go b/pkg/operator/ceph/cluster/osd/osd.go index e1bf09cb3e9b..04d91ca8e254 100644 --- a/pkg/operator/ceph/cluster/osd/osd.go +++ b/pkg/operator/ceph/cluster/osd/osd.go @@ -583,7 +583,7 @@ func (c *Cluster) getPVCHostName(pvcName string) (string, error) { for _, d := range deployments.Items { selectors := d.Spec.Template.Spec.NodeSelector for label, value := range selectors { - if label == corev1.LabelHostname { + if label == k8sutil.LabelHostname() { return value, nil } } @@ -734,10 +734,18 @@ func getNodeOrPVCName(d *appsv1.Deployment) (string, error) { return v, nil // OSD is on PVC } for k, v := range d.Spec.Template.Spec.NodeSelector { - if k == corev1.LabelHostname { + if k == k8sutil.LabelHostname() { return v, nil } } + // try to fallback on previous hostname label + // NodeSelector always has a single entry + if len(d.Spec.Template.Spec.NodeSelector) == 1 { + for _, v := range d.Spec.Template.Spec.NodeSelector { + return v, nil + } + } + return "", errors.Errorf("failed to find node/PVC name for OSD deployment %q: %+v", d.Name, d) } @@ -858,7 +866,7 @@ func getNode(ctx context.Context, clientset kubernetes.Interface, nodeName strin // try to find by the node by matching the provided nodeName node, err = clientset.CoreV1().Nodes().Get(ctx, nodeName, metav1.GetOptions{}) if kerrors.IsNotFound(err) { - listOpts := metav1.ListOptions{LabelSelector: fmt.Sprintf("%q=%q", corev1.LabelHostname, nodeName)} + listOpts := metav1.ListOptions{LabelSelector: fmt.Sprintf("%q=%q", k8sutil.LabelHostname(), nodeName)} nodeList, err := clientset.CoreV1().Nodes().List(ctx, listOpts) if err != nil || len(nodeList.Items) < 1 { return nil, errors.Wrapf(err, "could not find node %q hostname label", nodeName) diff --git a/pkg/operator/ceph/cluster/osd/provision_spec.go b/pkg/operator/ceph/cluster/osd/provision_spec.go index 61f31664ae79..01b70b62e66f 100644 --- a/pkg/operator/ceph/cluster/osd/provision_spec.go +++ b/pkg/operator/ceph/cluster/osd/provision_spec.go @@ -52,7 +52,7 @@ func (c *Cluster) makeJob(osdProps osdProperties, provisionConfig *provisionConf podSpec.Spec.InitContainers = append(podSpec.Spec.InitContainers, c.getPVCWalInitContainer("/wal", osdProps)) } } else { - podSpec.Spec.NodeSelector = map[string]string{v1.LabelHostname: osdProps.crushHostname} + podSpec.Spec.NodeSelector = map[string]string{k8sutil.LabelHostname(): osdProps.crushHostname} } job := &batch.Job{ diff --git a/pkg/operator/ceph/cluster/osd/spec.go b/pkg/operator/ceph/cluster/osd/spec.go index 79c066bf290e..de030b98a45b 100644 --- a/pkg/operator/ceph/cluster/osd/spec.go +++ b/pkg/operator/ceph/cluster/osd/spec.go @@ -757,7 +757,7 @@ func (c *Cluster) makeDeployment(osdProps osdProperties, osd *OSDInfo, provision return nil, err } } else { - deployment.Spec.Template.Spec.NodeSelector = map[string]string{v1.LabelHostname: osdProps.crushHostname} + deployment.Spec.Template.Spec.NodeSelector = map[string]string{k8sutil.LabelHostname(): osdProps.crushHostname} } k8sutil.AddRookVersionLabelToDeployment(deployment) cephv1.GetOSDAnnotations(c.spec.Annotations).ApplyToObjectMeta(&deployment.ObjectMeta) diff --git a/pkg/operator/ceph/cluster/osd/spec_test.go b/pkg/operator/ceph/cluster/osd/spec_test.go index f04495fb46f6..ab941eb97c7f 100644 --- a/pkg/operator/ceph/cluster/osd/spec_test.go +++ b/pkg/operator/ceph/cluster/osd/spec_test.go @@ -71,10 +71,18 @@ func TestPodContainer(t *testing.T) { } func TestDaemonset(t *testing.T) { - testPodDevices(t, "", "sda", true) - testPodDevices(t, "/var/lib/mydatadir", "sdb", false) - testPodDevices(t, "", "", true) - testPodDevices(t, "", "", false) + t.Run(("device name and all devices"), func(t *testing.T) { + testPodDevices(t, "", "sda", true) + }) + t.Run(("data dir and device name"), func(t *testing.T) { + testPodDevices(t, "/var/lib/mydatadir", "sdb", false) + }) + t.Run(("all devices"), func(t *testing.T) { + testPodDevices(t, "", "", true) + }) + t.Run(("no data dir and device name"), func(t *testing.T) { + testPodDevices(t, "", "", false) + }) } func testPodDevices(t *testing.T, dataDir, deviceName string, allDevices bool) { @@ -493,6 +501,17 @@ func testPodDevices(t *testing.T, dataDir, deviceName string, allDevices bool) { assert.Equal(t, int32(900), deployment.Spec.Template.Spec.Containers[0].LivenessProbe.InitialDelaySeconds) assert.Equal(t, int32(1000), deployment.Spec.Template.Spec.Containers[0].StartupProbe.InitialDelaySeconds) }) + + // test custom topology label + t.Setenv("ROOK_CUSTOM_HOSTNAME_LABEL", "my_custom_hostname_label") + deployment, err = c.makeDeployment(osdProp, osd, dataPathMap) + assert.Nil(t, err) + assert.NotNil(t, deployment) + assert.Equal(t, "rook-ceph-osd-0", deployment.Name) + assert.Equal(t, c.clusterInfo.Namespace, deployment.Namespace) + assert.Equal(t, serviceAccountName, deployment.Spec.Template.Spec.ServiceAccountName) + assert.Equal(t, int32(1), *(deployment.Spec.Replicas)) + assert.Equal(t, "node1", deployment.Spec.Template.Spec.NodeSelector["my_custom_hostname_label"]) } func verifyEnvVar(t *testing.T, envVars []corev1.EnvVar, expectedName, expectedValue string, expectedFound bool) { diff --git a/pkg/operator/ceph/cluster/osd/topology/topology.go b/pkg/operator/ceph/cluster/osd/topology/topology.go index ce4022e167af..9126bff09dd2 100644 --- a/pkg/operator/ceph/cluster/osd/topology/topology.go +++ b/pkg/operator/ceph/cluster/osd/topology/topology.go @@ -25,6 +25,7 @@ import ( "github.com/coreos/pkg/capnslog" "github.com/rook/rook/pkg/daemon/ceph/client" + "github.com/rook/rook/pkg/operator/k8sutil" corev1 "k8s.io/api/core/v1" ) @@ -46,7 +47,6 @@ var ( const ( topologyLabelPrefix = "topology.rook.io/" - labelHostname = "kubernetes.io/hostname" ) // ExtractOSDTopologyFromLabels extracts rook topology from labels and returns a map from topology type to value @@ -74,16 +74,15 @@ func allKubernetesTopologyLabelsOrdered() []string { append([]string{corev1.LabelTopologyRegion, corev1.LabelTopologyZone}, rookTopologyLabelsOrdered()...), - labelHostname, // host is the lowest level in the crush map hierarchy + k8sutil.LabelHostname(), // host is the lowest level in the crush map hierarchy ) } func kubernetesTopologyLabelToCRUSHLabel(label string) string { - crushLabel := strings.Split(label, "/") - if crushLabel[len(crushLabel)-1] == "hostname" { - // kubernetes uses "kubernetes.io/hostname" whereas CRUSH uses "host" + if label == k8sutil.LabelHostname() { return "host" } + crushLabel := strings.Split(label, "/") return crushLabel[len(crushLabel)-1] } @@ -140,7 +139,7 @@ func formatTopologyAffinity(label, value string) string { // GetDefaultTopologyLabels returns the supported default topology labels. func GetDefaultTopologyLabels() string { - Labels := []string{corev1.LabelHostname, corev1.LabelZoneRegionStable, corev1.LabelZoneFailureDomainStable} + Labels := []string{k8sutil.LabelHostname(), corev1.LabelZoneRegionStable, corev1.LabelZoneFailureDomainStable} for _, label := range CRUSHTopologyLabels { Labels = append(Labels, topologyLabelPrefix+label) } diff --git a/pkg/operator/ceph/cluster/osd/topology/topology_test.go b/pkg/operator/ceph/cluster/osd/topology/topology_test.go index 33a5c4cdd7e0..2446dd298b9f 100644 --- a/pkg/operator/ceph/cluster/osd/topology/topology_test.go +++ b/pkg/operator/ceph/cluster/osd/topology/topology_test.go @@ -44,6 +44,7 @@ func TestCleanTopologyLabels(t *testing.T) { nodeLabels := map[string]string{ corev1.LabelZoneRegionStable: "r.region", "kubernetes.io/hostname": "host.name", + "my_custom_hostname_label": "host.custom.name", "topology.rook.io/rack": "r.rack", "topology.rook.io/row": "r.row", "topology.rook.io/datacenter": "d.datacenter", @@ -62,6 +63,18 @@ func TestCleanTopologyLabels(t *testing.T) { assert.Equal(t, "", topology["pod"]) assert.Equal(t, "", topology["room"]) + t.Setenv("ROOK_CUSTOM_HOSTNAME_LABEL", "my_custom_hostname_label") + topology, affinity = ExtractOSDTopologyFromLabels(nodeLabels) + assert.Equal(t, 6, len(topology)) + assert.Equal(t, "r-region", topology["region"]) + assert.Equal(t, "host-custom-name", topology["host"]) + assert.Equal(t, "r-rack", topology["rack"]) + assert.Equal(t, "r-row", topology["row"]) + assert.Equal(t, "d-datacenter", topology["datacenter"]) + assert.Equal(t, "topology.rook.io/chassis=test", affinity) + assert.Equal(t, "test", topology["chassis"]) + assert.Equal(t, "", topology["pod"]) + assert.Equal(t, "", topology["room"]) } func TestTopologyLabels(t *testing.T) { @@ -126,4 +139,18 @@ func TestGetDefaultTopologyLabels(t *testing.T) { "topology.rook.io/room," + "topology.rook.io/datacenter" assert.Equal(t, expectedLabels, GetDefaultTopologyLabels()) + + t.Setenv("ROOK_CUSTOM_HOSTNAME_LABEL", "my_custom_hostname_label") + expectedLabels = "my_custom_hostname_label," + + "topology.kubernetes.io/region," + + "topology.kubernetes.io/zone," + + "topology.rook.io/chassis," + + "topology.rook.io/rack," + + "topology.rook.io/row," + + "topology.rook.io/pdu," + + "topology.rook.io/pod," + + "topology.rook.io/room," + + "topology.rook.io/datacenter" + assert.Equal(t, expectedLabels, GetDefaultTopologyLabels()) + } diff --git a/pkg/operator/ceph/cluster/watcher.go b/pkg/operator/ceph/cluster/watcher.go index a99d9f545d58..57363bfa26fd 100644 --- a/pkg/operator/ceph/cluster/watcher.go +++ b/pkg/operator/ceph/cluster/watcher.go @@ -101,7 +101,7 @@ func (c *clientCluster) onK8sNode(ctx context.Context, object runtime.Object, op } if !k8sutil.GetNodeSchedulable(*node, false) { - logger.Debugf("node watcher: skipping cluster update. added node %q is unschedulable", node.Labels[corev1.LabelHostname]) + logger.Debugf("node watcher: skipping cluster update. added node %q is unschedulable", node.Labels[k8sutil.LabelHostname()]) return false } @@ -126,7 +126,7 @@ func (c *clientCluster) onK8sNode(ctx context.Context, object runtime.Object, op err := k8sutil.ValidNode(*node, cephv1.GetOSDPlacement(cluster.Spec.Placement), cluster.Spec.Storage.ScheduleAlways) if err == nil { nodeName := node.Name - hostname, ok := node.Labels[corev1.LabelHostname] + hostname, ok := node.Labels[k8sutil.LabelHostname()] if ok && hostname != "" { nodeName = hostname } @@ -148,7 +148,7 @@ func (c *clientCluster) onK8sNode(ctx context.Context, object runtime.Object, op // Reconcile if there are no OSDs in the CRUSH map and if the host does not exist in the CRUSH map. if osds == "" { - logger.Infof("node watcher: adding node %q to cluster %q", node.Labels[corev1.LabelHostname], cluster.Namespace) + logger.Infof("node watcher: adding node %q to cluster %q", node.Labels[k8sutil.LabelHostname()], cluster.Namespace) return true } diff --git a/pkg/operator/ceph/csi/util.go b/pkg/operator/ceph/csi/util.go index f2b4c379c756..4c3e70827155 100644 --- a/pkg/operator/ceph/csi/util.go +++ b/pkg/operator/ceph/csi/util.go @@ -197,7 +197,7 @@ func GetPodAntiAffinity(key, value string) corev1.PodAntiAffinity { }, }, }, - TopologyKey: corev1.LabelHostname, + TopologyKey: k8sutil.LabelHostname(), }, }, } diff --git a/pkg/operator/ceph/object/spec.go b/pkg/operator/ceph/object/spec.go index 32fc57eb3981..2d33c79e00cd 100644 --- a/pkg/operator/ceph/object/spec.go +++ b/pkg/operator/ceph/object/spec.go @@ -252,7 +252,7 @@ func (c *clusterConfig) makeRGWPodSpec(rgwConfig *rgwConfig) (v1.PodTemplateSpec // If host networking is not enabled, preferred pod anti-affinity is added to the rgw daemons labels := getLabels(c.store.Name, c.store.Namespace, false) - k8sutil.SetNodeAntiAffinityForPod(&podSpec, c.store.Spec.IsHostNetwork(c.clusterSpec), v1.LabelHostname, labels, nil) + k8sutil.SetNodeAntiAffinityForPod(&podSpec, c.store.Spec.IsHostNetwork(c.clusterSpec), k8sutil.LabelHostname(), labels, nil) podTemplateSpec := v1.PodTemplateSpec{ ObjectMeta: metav1.ObjectMeta{ diff --git a/pkg/operator/k8sutil/labels.go b/pkg/operator/k8sutil/labels.go index c17c6980a6c5..b7b16e650640 100644 --- a/pkg/operator/k8sutil/labels.go +++ b/pkg/operator/k8sutil/labels.go @@ -19,6 +19,8 @@ package k8sutil import ( "os" "strings" + + corev1 "k8s.io/api/core/v1" ) // ParseStringToLabels parse a label selector string into a map[string]string @@ -58,3 +60,11 @@ func AddRecommendedLabels(labels map[string]string, appName, parentName, resourc labels["app.kubernetes.io/created-by"] = "rook-ceph-operator" labels["rook.io/operator-namespace"] = os.Getenv(PodNamespaceEnvVar) } + +// LabelHostname returns label name to identify k8s node hostname +func LabelHostname() string { + if label := os.Getenv("ROOK_CUSTOM_HOSTNAME_LABEL"); label != "" { + return label + } + return corev1.LabelHostname +} diff --git a/pkg/operator/k8sutil/node.go b/pkg/operator/k8sutil/node.go index 2dcce962cd90..a6e8ab1e8c18 100644 --- a/pkg/operator/k8sutil/node.go +++ b/pkg/operator/k8sutil/node.go @@ -101,7 +101,7 @@ func GetValidNodes(ctx context.Context, rookStorage cephv1.StorageScopeSpec, cli // Typically these will be the same name, but sometimes they are not such as when nodes have a longer // dns name, but the hostname is short. func GetNodeNameFromHostname(ctx context.Context, clientset kubernetes.Interface, hostName string) (string, error) { - options := metav1.ListOptions{LabelSelector: fmt.Sprintf("%s=%s", v1.LabelHostname, hostName)} + options := metav1.ListOptions{LabelSelector: fmt.Sprintf("%s=%s", LabelHostname(), hostName)} nodes, err := clientset.CoreV1().Nodes().List(ctx, options) if err != nil { return hostName, err @@ -123,7 +123,7 @@ func GetNodeHostName(ctx context.Context, clientset kubernetes.Interface, nodeNa } func GetNodeHostNameLabel(node *v1.Node) (string, error) { - hostname, ok := node.Labels[v1.LabelHostname] + hostname, ok := node.Labels[LabelHostname()] if !ok { return "", fmt.Errorf("hostname not found on the node") } @@ -141,7 +141,7 @@ func GetNodeHostNames(ctx context.Context, clientset kubernetes.Interface) (map[ nodeMap := map[string]string{} for _, node := range nodes.Items { - nodeMap[node.Name] = node.Labels[v1.LabelHostname] + nodeMap[node.Name] = node.Labels[LabelHostname()] } return nodeMap, nil } @@ -275,9 +275,10 @@ func rookNodeMatchesKubernetesNode(rookNode cephv1.Node, kubernetesNode v1.Node) } func normalizeHostname(kubernetesNode v1.Node) string { - hostname := kubernetesNode.Labels[v1.LabelHostname] + hostname := kubernetesNode.Labels[LabelHostname()] if len(hostname) == 0 { // fall back to the node name if the hostname label is not set + logger.Warningf("hostname label %q is missing for node %q. Fallback to node name", LabelHostname(), kubernetesNode.Name) hostname = kubernetesNode.Name } return hostname @@ -301,7 +302,7 @@ func GetKubernetesNodesMatchingRookNodes(ctx context.Context, rookNodes []cephv1 } } if !nodeFound { - logger.Warningf("failed to find matching kubernetes node for %q. Check the CephCluster's config and confirm each 'name' field in spec.storage.nodes matches their 'kubernetes.io/hostname' label", rn.Name) + logger.Warningf("failed to find matching kubernetes node for %q. Check the CephCluster's config and confirm each 'name' field in spec.storage.nodes matches their %q label", rn.Name, LabelHostname()) } } return nodes, nil diff --git a/pkg/operator/k8sutil/node_test.go b/pkg/operator/k8sutil/node_test.go index c3cff521dfb7..54dc4b5df831 100644 --- a/pkg/operator/k8sutil/node_test.go +++ b/pkg/operator/k8sutil/node_test.go @@ -336,6 +336,22 @@ func TestRookNodesMatchingKubernetesNodes(t *testing.T) { // no k8s nodes specified retNodes = RookNodesMatchingKubernetesNodes(rookStorage, []v1.Node{}) assert.Len(t, retNodes, 0) + + // custom node hostname label + t.Setenv("ROOK_CUSTOM_HOSTNAME_LABEL", "my_custom_hostname_label") + n0.Labels["my_custom_hostname_label"] = "node0-custom-hostname" + k8sNodes[0] = n0 + + rookStorage.Nodes = []cephv1.Node{ + {Name: "node0"}, + {Name: "node1"}, + {Name: "node2"}} + retNodes = RookNodesMatchingKubernetesNodes(rookStorage, k8sNodes) + assert.Len(t, retNodes, 3) + // this should return nodes named by hostname if that is available + assert.Contains(t, retNodes, cephv1.Node{Name: "node0-custom-hostname"}) + assert.Contains(t, retNodes, cephv1.Node{Name: "node1"}) + assert.Contains(t, retNodes, cephv1.Node{Name: "node2"}) } func TestGenerateNodeAffinity(t *testing.T) {