Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bug 2254547: object: improve the error handling for multisite objs #561

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 41 additions & 25 deletions pkg/operator/ceph/object/objectstore.go
Original file line number Diff line number Diff line change
Expand Up @@ -392,29 +392,55 @@ func getZoneEndpoints(objContext *Context, serviceEndpoint string) ([]string, bo
return zoneEndpointsList, isEndpointAlreadyExists, nil
}

func createMultisiteConfigurations(objContext *Context, configType, configTypeArg string, args ...string) error {
args = append([]string{configType}, args...)
args = append(args, configTypeArg)
// get the multisite config before creating
output, getConfigErr := RunAdminCommandNoMultisite(objContext, true, configType, "get", configTypeArg)
if getConfigErr == nil {
return nil
}

if kerrors.IsNotFound(getConfigErr) {
// the pod used to exec command (act as a proxy) is not found/ready yet
// caller can nicely handle error and not overflow logs with misleading error messages
return getConfigErr
}

code, err := exec.ExtractExitCode(getConfigErr)
if err != nil {
return errorOrIsNotFound(getConfigErr, "'radosgw-admin %q get' failed with code %q, for reason %q, error: (%v)", configType, strconv.Itoa(code), output, string(kerrors.ReasonForError(err)))
}
// ENOENT means "No such file or directory"
if code != int(syscall.ENOENT) {
code := strconv.Itoa(code)
return errors.Wrapf(getConfigErr, "'radosgw-admin %q get' failed with code %q, for reason %q", configType, code, output)
}

// create the object if it doesn't exist yet
output, err = RunAdminCommandNoMultisite(objContext, false, args...)
if err != nil {
return errorOrIsNotFound(err, "failed to create ceph %q %q, for reason %q", configType, configTypeArg, output)
}
logger.Debugf("created %q %q", configType, configTypeArg)

return nil
}

func createMultisite(objContext *Context, endpointArg string) error {
logger.Debugf("creating realm, zone group, zone for object-store %v", objContext.Name)

realmArg := fmt.Sprintf("--rgw-realm=%s", objContext.Realm)
zoneGroupArg := fmt.Sprintf("--rgw-zonegroup=%s", objContext.ZoneGroup)
zoneArg := fmt.Sprintf("--rgw-zone=%s", objContext.Zone)

// create the realm if it doesn't exist yet
output, err := RunAdminCommandNoMultisite(objContext, true, "realm", "get", realmArg)
err := createMultisiteConfigurations(objContext, "realm", realmArg, "create")
if err != nil {
// ENOENT means "No such file or directory"
if code, err := exec.ExtractExitCode(err); err == nil && code == int(syscall.ENOENT) {
output, err = RunAdminCommandNoMultisite(objContext, false, "realm", "create", realmArg)
if err != nil {
return errorOrIsNotFound(err, "failed to create ceph realm %q, for reason %q", objContext.ZoneGroup, output)
}
logger.Debugf("created realm %q", objContext.Realm)
} else {
return errorOrIsNotFound(err, "'radosgw-admin realm get' failed with code %d, for reason %q. %v", strconv.Itoa(code), output, string(kerrors.ReasonForError(err)))
}
return err
}

// create the zonegroup if it doesn't exist yet
output, err = RunAdminCommandNoMultisite(objContext, true, "zonegroup", "get", realmArg, zoneGroupArg)
output, err := RunAdminCommandNoMultisite(objContext, true, "zonegroup", "get", realmArg, zoneGroupArg)
if err != nil {
// ENOENT means "No such file or directory"
if code, err := exec.ExtractExitCode(err); err == nil && code == int(syscall.ENOENT) {
Expand All @@ -428,19 +454,9 @@ func createMultisite(objContext *Context, endpointArg string) error {
}
}

// create the zone if it doesn't exist yet
output, err = runAdminCommand(objContext, true, "zone", "get")
err = createMultisiteConfigurations(objContext, "zone", zoneArg, "create", "--master", endpointArg, realmArg, zoneGroupArg)
if err != nil {
// ENOENT means "No such file or directory"
if code, err := exec.ExtractExitCode(err); err == nil && code == int(syscall.ENOENT) {
output, err = runAdminCommand(objContext, false, "zone", "create", "--master", endpointArg)
if err != nil {
return errorOrIsNotFound(err, "failed to create ceph zone %q, for reason %q", objContext.Zone, output)
}
logger.Debugf("created zone %q", objContext.Zone)
} else {
return errorOrIsNotFound(err, "'radosgw-admin zone get' failed with code %d, for reason %q", strconv.Itoa(code), output)
}
return err
}

if err := commitConfigChanges(objContext); err != nil {
Expand Down
73 changes: 73 additions & 0 deletions pkg/operator/ceph/object/objectstore_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,14 @@ import (
"github.com/rook/rook/pkg/daemon/ceph/client"
cephver "github.com/rook/rook/pkg/operator/ceph/version"
"github.com/rook/rook/pkg/operator/k8sutil"
"github.com/rook/rook/pkg/util/exec"
exectest "github.com/rook/rook/pkg/util/exec/test"
"github.com/stretchr/testify/assert"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
k8sfake "k8s.io/client-go/kubernetes/fake"
kexec "k8s.io/utils/exec"
)

const (
Expand Down Expand Up @@ -669,6 +671,77 @@ func Test_createMultisite(t *testing.T) {
}
}

func getExecutor() []exec.Executor {
executor := []exec.Executor{&exectest.MockExecutor{
MockExecuteCommandWithTimeout: func(timeout time.Duration, command string, args ...string) (string, error) {
if args[0] == "realm" {
return `{
"id": "237e6250-5f7d-4b85-9359-8cb2b1848507",
"name": "realm-a",
"current_period": "df665ecb-1762-47a9-9c66-f938d251c02a",
"epoch": 2
}`, nil
}
return "", nil
},
},
&exectest.MockExecutor{
MockExecuteCommandWithTimeout: func(timeout time.Duration, command string, args ...string) (string, error) {
if args[0] == "realm" {
return `{}`, errors.Errorf("Error from server (NotFound): pods not found")
}
return "", nil
},
},
&exectest.MockExecutor{
MockExecuteCommandWithTimeout: func(timeout time.Duration, command string, args ...string) (string, error) {
if args[0] == "realm" {
return `{}`, &kexec.CodeExitError{Err: errors.New("some error"), Code: 4}
}
return "", nil
},
},
&exectest.MockExecutor{
MockExecuteCommandWithTimeout: func(timeout time.Duration, command string, args ...string) (string, error) {
if args[0] == "realm" {
return `{}`, &kexec.CodeExitError{Err: errors.New("some other error"), Code: 2}
}
return "", nil
},
},
}
return executor
}

func getreturnErrString() []string {
returnErr := []string{
"",
"'radosgw-admin [\"realm\" \"-1\" \"{}. \" \"\"] get' failed with code %!q(MISSING), for reason %!q(MISSING), error: (%!v(MISSING)): Error from server (NotFound): pods not found",
"'radosgw-admin \"realm\" get' failed with code \"4\", for reason \"{}. \": some error",
"failed to create ceph [\"realm\" \"--rgw-realm=\" \"{}. \"] %!q(MISSING), for reason %!q(MISSING): some other error",
}
return returnErr
}

func Test_createMultisiteConfigurations(t *testing.T) {
executor := getExecutor()
returnErrString := getreturnErrString()
for i := 0; i < 4; i++ {
ctx := &clusterd.Context{
Executor: executor[i],
}
objContext := NewContext(ctx, &client.ClusterInfo{Namespace: "my-cluster"}, "my-store")
realmArg := fmt.Sprintf("--rgw-realm=%s", objContext.Realm)

err := createMultisiteConfigurations(objContext, "realm", realmArg, "create")
if i == 0 {
assert.NoError(t, err)
} else {
assert.Contains(t, err.Error(), returnErrString[i])
}
}
}

func TestGetRealmKeySecret(t *testing.T) {
ns := "my-ns"
realmName := "my-realm"
Expand Down
2 changes: 1 addition & 1 deletion tests/framework/clients/object.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ func (o *ObjectOperation) Create(namespace, storeName string, replicaCount int32
}

// Starting an object store takes longer than the average operation, so add more retries
err := o.k8sh.WaitForLabeledPodsToRunWithRetries(fmt.Sprintf("rook_object_store=%s", storeName), namespace, 40)
err := o.k8sh.WaitForLabeledPodsToRunWithRetries(fmt.Sprintf("rook_object_store=%s", storeName), namespace, 80)
if err != nil {
return fmt.Errorf("rgw did not start via crd. %+v", err)
}
Expand Down
2 changes: 1 addition & 1 deletion tests/integration/ceph_base_object_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ func createCephObjectStore(t *testing.T, helper *clients.TestClient, k8sh *utils

// Check object store status
t.Run("verify object store status", func(t *testing.T) {
retryCount := 30
retryCount := 40
i := 0
for i = 0; i < retryCount; i++ {
objectStore, err := k8sh.RookClientset.CephV1().CephObjectStores(namespace).Get(ctx, storeName, metav1.GetOptions{})
Expand Down
Loading