Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
8b97e35
update specs
May 5, 2026
3fbddd2
update function spec
May 5, 2026
9fd9cec
update functional spec
May 6, 2026
61a376f
update spec
May 6, 2026
bbba505
update spec
May 6, 2026
c01fedd
1. add volume resize API/status schema, regenerate CRDs/deepcopy, and…
May 6, 2026
c154351
add resize RBAC prerequisites (PVC status, PV, events)
May 6, 2026
9892d97
feat: PR2.1 resize validation fencing and spec alignment
May 6, 2026
23a9b16
implement PVC resize engine with strategy-based patching, checkpoint …
May 6, 2026
9342e09
feat: StatefulSet sync and pod restart orchestration
May 6, 2026
062be65
feat: verification phase, completion semantics, and deferred handoff …
May 6, 2026
37a909d
Fix resize sync immutability path and namespace-scope StorageClass ac…
May 6, 2026
fefe138
bug fix for RBAC
May 6, 2026
95b0097
test(controller): add envtest coverage for volume resize request vali…
May 7, 2026
246d585
update spec location
May 8, 2026
230ffdf
improvement based on copilot review
May 11, 2026
681351b
MLE-28304 : add-test-suites for Volume Resizing (#164)
rwinieski May 13, 2026
2cbd1b4
update contents in charts
May 13, 2026
c9e8568
feat(volume-resize): align pause semantics and harden retry/restart b…
May 19, 2026
1dac62e
Potential fix for pull request finding
pengzhouml May 20, 2026
f1208a5
Potential fix for pull request finding
pengzhouml May 20, 2026
8ccb467
Potential fix for pull request finding
pengzhouml May 20, 2026
a8cb1ca
Potential fix for pull request finding
pengzhouml May 20, 2026
fdcf6bb
Use nondeterministic resize retry jitter
Copilot May 20, 2026
2d586f4
Potential fix for pull request finding
pengzhouml May 20, 2026
b072ff9
Potential fix for pull request finding
pengzhouml May 20, 2026
9a6dd20
Revert persistence.enabled to false in quick-start.yaml
Copilot May 20, 2026
29d1100
Fix test summary tracker to correctly handle skipped tests
Copilot May 20, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 26 additions & 4 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ ifeq ($(VERIFY_HUGE_PAGES), true)
minikube start

@echo "=====Running e2e test including hugepages test"
go test -v -count=1 -timeout 30m ./test/e2e -verifyHugePages
IMG=$(IMG) go test -v -count=1 -timeout 30m ./test/e2e -verifyHugePages

@echo "=====Resetting hugepages value to 0"
sudo sysctl -w vm.nr_hugepages=0
Expand All @@ -170,13 +170,13 @@ ifeq ($(VERIFY_HUGE_PAGES), true)
minikube start
else
@echo "=====Running e2e test without hugepages test"
go test -v -count=1 -timeout 30m ./test/e2e
IMG=$(IMG) go test -v -count=1 -timeout 30m ./test/e2e
endif

.PHONY: e2e-test-istio # Run Istio ambient mode e2e tests
e2e-test-istio:
@echo "=====Running Istio ambient mode e2e tests"
E2E_ISTIO_AMBIENT=true go test -v -count=1 -timeout 30m ./test/e2e -run "Test(Istio|NonIstio)"
IMG=$(IMG) E2E_ISTIO_AMBIENT=true go test -v -count=1 -timeout 30m ./test/e2e -run "Test(Istio|NonIstio)"

# NOTE: There is intentionally no `e2e-test-namespace` target here.
# The `test/e2e` suite always deploys the operator via `make deploy`
Expand All @@ -189,13 +189,23 @@ e2e-test-istio:
.PHONY: e2e-test-cluster ## Run e2e tests against a cluster-scoped operator install (alias for `e2e-test`)
e2e-test-cluster:
@echo "=====Running e2e tests in cluster-scoped mode"
go test -v -count=1 -timeout 30m ./test/e2e
IMG=$(IMG) go test -v -count=1 -timeout 30m ./test/e2e

.PHONY: e2e-test-helm-namespace ## Run namespace-scoped e2e tests via Helm chart install (validates Role/RoleBinding, no ClusterRole, insecure metrics on :8080)
e2e-test-helm-namespace:
@echo "=====Running namespace-scoped e2e tests via Helm chart====="
E2E_DOCKER_IMAGE=$(IMG) go test -v -count=1 -timeout 45m ./test/e2e-helm

.PHONY: e2e-test-volume-resize ## Run ONLY the cluster-scoped volume resize test (two namespaces in parallel)
e2e-test-volume-resize:
@echo "=====Running cluster-scoped volume-resize e2e test (parallel, 2 namespaces)====="
IMG=$(IMG) go test -v -count=1 -timeout 30m ./test/e2e -run TestVolumeResizeClusterScoped

.PHONY: e2e-test-helm-volume-resize ## Run ONLY the namespace-scoped volume resize test via Helm (two watched namespaces in parallel)
e2e-test-helm-volume-resize:
@echo "=====Running namespace-scoped volume-resize e2e test via Helm (parallel, 2 watched namespaces)====="
E2E_DOCKER_IMAGE=$(IMG) go test -v -count=1 -timeout 30m ./test/e2e-helm -run TestVolumeResizeNamespaceScoped

.PHONY: e2e-setup-minikube
e2e-setup-minikube: kustomize controller-gen build docker-build
minikube version
Expand All @@ -204,6 +214,18 @@ e2e-setup-minikube: kustomize controller-gen build docker-build
minikube addons enable ingress
minikube addons enable storage-provisioner
minikube addons enable default-storageclass
@echo "=====Enabling CSI hostpath driver (required for PVC volume expansion tests)"
minikube addons enable volumesnapshots
minikube addons enable csi-hostpath-driver
@echo "=====Making csi-hostpath-sc the default StorageClass (allowVolumeExpansion=true)"
kubectl patch storageclass standard -p '{"metadata":{"annotations":{"storageclass.kubernetes.io/is-default-class":"false"}}}' || true
kubectl patch storageclass csi-hostpath-sc -p '{"metadata":{"annotations":{"storageclass.kubernetes.io/is-default-class":"true"}}}'
@echo "=====Ensuring csi-hostpath-sc has allowVolumeExpansion=true (some minikube versions ship it disabled)"
kubectl patch storageclass csi-hostpath-sc -p '{"allowVolumeExpansion":true}'
@echo "=====Verifying allowVolumeExpansion is set on csi-hostpath-sc"
@test "$$(kubectl get storageclass csi-hostpath-sc -o jsonpath='{.allowVolumeExpansion}')" = "true" \
|| (echo "ERROR: csi-hostpath-sc.allowVolumeExpansion is not true after patch" && exit 1)
kubectl get storageclass
minikube image load $(IMG)
minikube image load $(E2E_MARKLOGIC_IMAGE_VERSION)
minikube image load "docker.io/haproxytech/haproxy-alpine:3.2"
Expand Down
15 changes: 13 additions & 2 deletions api/v1/common_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,23 @@ type ContainerProbe struct {
FailureThreshold int32 `json:"failureThreshold,omitempty"`
}

// VolumeResizeStrategy defines how PVC resize requests are submitted.
type VolumeResizeStrategy string

const (
VolumeResizeStrategyParallel VolumeResizeStrategy = "parallel"
VolumeResizeStrategySequential VolumeResizeStrategy = "sequential"
)

// Storage is the inteface to add pvc and pv support in marklogic
type Persistence struct {
Enabled bool `json:"enabled,omitempty"`
// +kubebuilder:validation:Required
Size string `json:"size,omitempty"`
StorageClassName string `json:"storageClassName,omitempty"`
Size string `json:"size,omitempty"`
// +kubebuilder:validation:Enum=parallel;sequential
// +kubebuilder:default:=parallel
ResizeStrategy VolumeResizeStrategy `json:"resizeStrategy,omitempty"`
StorageClassName string `json:"storageClassName,omitempty"`
// +kubebuilder:default:={ReadWriteOnce}
AccessModes []corev1.PersistentVolumeAccessMode `json:"accessModes,omitempty"`
Annotations map[string]string `json:"annotations,omitempty"`
Expand Down
114 changes: 111 additions & 3 deletions api/v1/marklogicgroup_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -84,13 +84,121 @@ type MarklogicGroupSpec struct {
// InternalState defines the observed state of MarklogicGroup
type InternalState string

type VolumeResizePhase string

const (
VolumeResizePhaseValidating VolumeResizePhase = "Validating"
VolumeResizePhaseResizingPVCs VolumeResizePhase = "ResizingPVCs"
VolumeResizePhaseWaitingForPVCResize VolumeResizePhase = "WaitingForPVCResize"
VolumeResizePhaseSynchronizingStatefulSet VolumeResizePhase = "SynchronizingStatefulSet"
VolumeResizePhaseRestartingPods VolumeResizePhase = "RestartingPods"
VolumeResizePhaseWaitingForPodsReady VolumeResizePhase = "WaitingForPodsReady"
VolumeResizePhaseVerifyingResizeOutcome VolumeResizePhase = "VerifyingResizeOutcome"
VolumeResizePhaseCompleted VolumeResizePhase = "Completed"
VolumeResizePhaseStalled VolumeResizePhase = "Stalled"
VolumeResizePhaseFailed VolumeResizePhase = "Failed"
)

type VolumeResizeReason string

const (
VolumeResizeReasonResizeFailed VolumeResizeReason = "ResizeFailed"
VolumeResizeReasonPartialResizeFailure VolumeResizeReason = "PartialResizeFailure"
VolumeResizeReasonResizeRateLimited VolumeResizeReason = "ResizeRateLimited"
VolumeResizeReasonStorageQuotaExceeded VolumeResizeReason = "StorageQuotaExceeded"
VolumeResizeReasonResizeForbidden VolumeResizeReason = "ResizeForbidden"
VolumeResizeReasonInvalidResizeRequest VolumeResizeReason = "InvalidResizeRequest"
VolumeResizeReasonStorageClassNotExpandable VolumeResizeReason = "StorageClassNotExpandable"
VolumeResizeReasonShrinkNotSupported VolumeResizeReason = "ShrinkNotSupported"
VolumeResizeReasonPVCNotBound VolumeResizeReason = "PVCNotBound"
VolumeResizeReasonConcurrentResize VolumeResizeReason = "ConcurrentResize"
VolumeResizeReasonStatefulSetSyncFailed VolumeResizeReason = "StatefulSetSyncFailed"
VolumeResizeReasonPodRecoveryFailed VolumeResizeReason = "PodRecoveryFailed"
VolumeResizeReasonTemplateUpdateInterrupted VolumeResizeReason = "TemplateUpdateInterrupted"
VolumeResizeReasonMarkLogicHealthCheckFailed VolumeResizeReason = "MarkLogicHealthCheckFailed"
VolumeResizeReasonPaused VolumeResizeReason = "Paused"
VolumeResizeReasonMaxRetriesExceeded VolumeResizeReason = "MaxRetriesExceeded"
VolumeResizeReasonMaxOperationTimeExceeded VolumeResizeReason = "MaxOperationTimeExceeded"
)

type PVCResizeState string

const (
PVCResizeStatePending PVCResizeState = "Pending"
PVCResizeStateResizeSubmitted PVCResizeState = "ResizeSubmitted"
PVCResizeStateWaitingForCheckpoint PVCResizeState = "WaitingForCheckpoint"
PVCResizeStateCheckpointed PVCResizeState = "Checkpointed"
PVCResizeStateRestartPending PVCResizeState = "RestartPending"
PVCResizeStateRestarted PVCResizeState = "Restarted"
PVCResizeStateFailed PVCResizeState = "Failed"
)

type PVCResizeCheckpointType string

const (
PVCResizeCheckpointTypeOnlineComplete PVCResizeCheckpointType = "OnlineComplete"
PVCResizeCheckpointTypeOfflinePending PVCResizeCheckpointType = "OfflinePending"
PVCResizeCheckpointTypeOfflineComplete PVCResizeCheckpointType = "OfflineComplete"
)

type PVCResizeStatus struct {
Name string `json:"name,omitempty"`
PodName string `json:"podName,omitempty"`
RequestedSize string `json:"requestedSize,omitempty"`
ObservedCapacity string `json:"observedCapacity,omitempty"`
// +kubebuilder:validation:Enum=Pending;ResizeSubmitted;WaitingForCheckpoint;Checkpointed;RestartPending;Restarted;Failed
State PVCResizeState `json:"state,omitempty"`
// +kubebuilder:validation:Enum=OnlineComplete;OfflinePending;OfflineComplete
CheckpointType PVCResizeCheckpointType `json:"checkpointType,omitempty"`
RestartRequired bool `json:"restartRequired,omitempty"`
LastReason string `json:"lastReason,omitempty"`
LastMessage string `json:"lastMessage,omitempty"`
LastTransitionTime *metav1.Time `json:"lastTransitionTime,omitempty"`
}

type FailedPVCStatus struct {
Name string `json:"name,omitempty"`
Reason string `json:"reason,omitempty"`
Message string `json:"message,omitempty"`
}

type VolumeResizeStatus struct {
OperationID string `json:"operationID,omitempty"`
ObservedGeneration int64 `json:"observedGeneration,omitempty"`
// +kubebuilder:validation:Enum=Validating;ResizingPVCs;WaitingForPVCResize;SynchronizingStatefulSet;RestartingPods;WaitingForPodsReady;VerifyingResizeOutcome;Completed;Stalled;Failed
Phase VolumeResizePhase `json:"phase,omitempty"`
Message string `json:"message,omitempty"`
// +kubebuilder:validation:Enum=ResizeFailed;PartialResizeFailure;ResizeRateLimited;StorageQuotaExceeded;ResizeForbidden;InvalidResizeRequest;StorageClassNotExpandable;ShrinkNotSupported;PVCNotBound;ConcurrentResize;StatefulSetSyncFailed;PodRecoveryFailed;TemplateUpdateInterrupted;MarkLogicHealthCheckFailed;Paused;MaxRetriesExceeded;MaxOperationTimeExceeded
Reason VolumeResizeReason `json:"reason,omitempty"`
CurrentSize string `json:"currentSize,omitempty"`
TargetSize string `json:"targetSize,omitempty"`
DeferredTargetSize string `json:"deferredTargetSize,omitempty"`
DeferredObservedGeneration int64 `json:"deferredObservedGeneration,omitempty"`
// +kubebuilder:validation:Enum=parallel;sequential
ResizeStrategy VolumeResizeStrategy `json:"resizeStrategy,omitempty"`
TotalPVCs int32 `json:"totalPvcs,omitempty"`
PVCsCheckpointed int32 `json:"pvcsCheckpointed,omitempty"`
ActivePVC string `json:"activePVC,omitempty"`
PVCStatuses []PVCResizeStatus `json:"pvcStatuses,omitempty"`
FailedPVCs []FailedPVCStatus `json:"failedPVCs,omitempty"`
// Internal crash-recovery workflow markers for resize reconciliation.
Markers []string `json:"markers,omitempty"`
Warnings []string `json:"warnings,omitempty"`
RetryCount int32 `json:"retryCount,omitempty"`
NextRetryTime *metav1.Time `json:"nextRetryTime,omitempty"`
LastTransitionTime *metav1.Time `json:"lastTransitionTime,omitempty"`
FirstStartedTime *metav1.Time `json:"firstStartedTime,omitempty"`
CompletionTime *metav1.Time `json:"completionTime,omitempty"`
}

// MarklogicGroupStatus defines the observed state of MarklogicGroup
type MarklogicGroupStatus struct {
// INSERT ADDITIONAL STATUS FIELD - define observed state of cluster
// Important: Run "make" to regenerate code after modifying this file
Conditions []metav1.Condition `json:"conditions,omitempty"`
Stage string `json:"stage,omitempty"`
MarkLogicPods []corev1.ObjectReference `json:"active,omitempty"`
Conditions []metav1.Condition `json:"conditions,omitempty"`
Stage string `json:"stage,omitempty"`
MarkLogicPods []corev1.ObjectReference `json:"active,omitempty"`
VolumeResizeStatus *VolumeResizeStatus `json:"volumeResizeStatus,omitempty"`

// +optional
MarklogicGroupStatus InternalState `json:"markLogicGroupStatus,omitempty"`
Expand Down
98 changes: 98 additions & 0 deletions api/v1/marklogicgroup_types_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
package v1

import (
"testing"
"time"

metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

func TestMarklogicGroupDeepCopyVolumeResizeStatus(t *testing.T) {
now := metav1.NewTime(time.Now())

group := &MarklogicGroup{
Spec: MarklogicGroupSpec{
Persistence: &Persistence{
Enabled: true,
Size: "20Gi",
ResizeStrategy: VolumeResizeStrategySequential,
},
},
Status: MarklogicGroupStatus{
VolumeResizeStatus: &VolumeResizeStatus{
OperationID: "op-1",
ObservedGeneration: 7,
Phase: VolumeResizePhaseWaitingForPVCResize,
Reason: VolumeResizeReasonPVCNotBound,
CurrentSize: "20Gi",
TargetSize: "50Gi",
ResizeStrategy: VolumeResizeStrategySequential,
TotalPVCs: 2,
PVCsCheckpointed: 1,
PVCStatuses: []PVCResizeStatus{
{
Name: "data-0",
PodName: "pod-0",
RequestedSize: "50Gi",
ObservedCapacity: "20Gi",
State: PVCResizeStateWaitingForCheckpoint,
CheckpointType: PVCResizeCheckpointTypeOfflinePending,
RestartRequired: true,
LastReason: "PVCNotBound",
LastMessage: "waiting for pvc to bind",
LastTransitionTime: &now,
},
},
FailedPVCs: []FailedPVCStatus{
{Name: "data-1", Reason: "ResizeFailed", Message: "api rejected resize"},
},
Markers: []string{"pr4.sync.started"},
Warnings: []string{"storage provider delay"},
LastTransitionTime: &now,
},
},
}

copied := group.DeepCopy()
if copied == group {
t.Fatalf("expected DeepCopy to return a new instance")
}

if copied.Spec.Persistence == group.Spec.Persistence {
t.Fatalf("expected persistence to be deeply copied")
}

if copied.Status.VolumeResizeStatus == group.Status.VolumeResizeStatus {
t.Fatalf("expected volume resize status to be deeply copied")
}

if copied.Status.VolumeResizeStatus.LastTransitionTime == group.Status.VolumeResizeStatus.LastTransitionTime {
t.Fatalf("expected resize timestamps to be deeply copied")
}

group.Spec.Persistence.ResizeStrategy = VolumeResizeStrategyParallel
group.Status.VolumeResizeStatus.PVCStatuses[0].Name = "data-modified"
group.Status.VolumeResizeStatus.FailedPVCs[0].Reason = "updated"
group.Status.VolumeResizeStatus.Markers[0] = "updated marker"
group.Status.VolumeResizeStatus.Warnings[0] = "updated warning"

if copied.Spec.Persistence.ResizeStrategy != VolumeResizeStrategySequential {
t.Fatalf("unexpected copied resize strategy: %s", copied.Spec.Persistence.ResizeStrategy)
}

if copied.Status.VolumeResizeStatus.PVCStatuses[0].Name != "data-0" {
t.Fatalf("unexpected copied pvc status name: %s", copied.Status.VolumeResizeStatus.PVCStatuses[0].Name)
}

if copied.Status.VolumeResizeStatus.FailedPVCs[0].Reason != "ResizeFailed" {
t.Fatalf("unexpected copied failed pvc reason: %s", copied.Status.VolumeResizeStatus.FailedPVCs[0].Reason)
}

if copied.Status.VolumeResizeStatus.Markers[0] != "pr4.sync.started" {
t.Fatalf("unexpected copied marker: %s", copied.Status.VolumeResizeStatus.Markers[0])
}

if copied.Status.VolumeResizeStatus.Warnings[0] != "storage provider delay" {
t.Fatalf("unexpected copied warning: %s", copied.Status.VolumeResizeStatus.Warnings[0])
}
}
Loading
Loading