Skip to content

Commit

Permalink
pb-4669: Adding support for partial success backups
Browse files Browse the repository at this point in the history
- A backup involving multiple PVC, if one of the PVC backup
  fails, backup will mark that PVC as Failed and proceed to
  next PVCs rather than failing the entire backup
- For the failed PVC, corresponding resources are not backed up.
- New status called PartialSuccess is introduced to signify the
  same
  • Loading branch information
prashanthpx committed Feb 1, 2024
1 parent c041411 commit d1f1c8f
Show file tree
Hide file tree
Showing 4 changed files with 152 additions and 38 deletions.
42 changes: 35 additions & 7 deletions drivers/volume/portworx/portworx.go
Original file line number Diff line number Diff line change
Expand Up @@ -3343,14 +3343,20 @@ func (p *portworx) StartBackup(backup *storkapi.ApplicationBackup,
}
return true, nil
})

if err != nil || cloudBackupCreateErr != nil {
if isCloudBackupServerBusyError(cloudBackupCreateErr) {
return volumeInfos, &storkvolume.ErrStorageProviderBusy{Reason: cloudBackupCreateErr.Error()}
volumeInfo.Status = storkapi.ApplicationBackupStatusFailed
volumeInfo.Reason = cloudBackupCreateErr.Error()
volumeInfos = append(volumeInfos, volumeInfo)
continue
}
if _, ok := cloudBackupCreateErr.(*ost_errors.ErrExists); !ok {
return nil, fmt.Errorf("failed to start backup for %v (%v/%v): %v",
volumeInfo.Status = storkapi.ApplicationBackupStatusFailed
volumeInfo.Reason = fmt.Sprintf("%v", cloudBackupCreateErr)
volumeInfos = append(volumeInfos, volumeInfo)
logrus.Infof("failed to start backup for %v (%v/%v): %v",
volume, pvc.Namespace, pvc.Name, cloudBackupCreateErr)
continue
}
} else if err == nil {
// Only add volumeInfos if this was a successful backup
Expand All @@ -3371,31 +3377,53 @@ func (p *portworx) GetBackupStatus(backup *storkapi.ApplicationBackup) ([]*stork
volumeInfos := make([]*storkapi.ApplicationBackupVolumeInfo, 0)
for _, vInfo := range backup.Status.Volumes {
if vInfo.DriverName != storkvolume.PortworxDriverName {
volumeInfos = append(volumeInfos, vInfo)
continue
}
// Skip for volumes which are in failed state as there is no need to proceed
// further and we have to return the orginal volInfo back to caller
if vInfo.Status == storkapi.ApplicationBackupStatusFailed {
volumeInfos = append(volumeInfos, vInfo)
continue
}
token, err := p.getUserToken(vInfo.Options, vInfo.Namespace)
if err != nil {
return nil, fmt.Errorf("failed to fetch portworx user token: %v", err)
logrus.Errorf("failed to fetch portworx user token: %v", err)
vInfo.Reason = fmt.Sprintf("failed to fetch portworx user token: %v", err)
vInfo.Status = storkapi.ApplicationBackupStatusFailed
volumeInfos = append(volumeInfos, vInfo)
continue
}
volDriver, ok := driverMap[token]
if !ok {
volDriver, _, err = p.getUserVolDriverFromToken(token)
if err != nil {
return nil, err
vInfo.Status = storkapi.ApplicationBackupStatusFailed
vInfo.Reason = fmt.Sprintf("%v", err)
logrus.Errorf("%v", err)
volumeInfos = append(volumeInfos, vInfo)
continue
}
driverMap[token] = volDriver
}

cloudBackupClient, err := p.getCloudBackupClient()
if err != nil {
return nil, err
vInfo.Status = storkapi.ApplicationBackupStatusFailed
vInfo.Reason = fmt.Sprintf("%v", err)
volumeInfos = append(volumeInfos, vInfo)
logrus.Errorf("%v", err)
}
ctx, cancel := context.WithTimeout(context.Background(), cloudBackupTimeout)
defer cancel()
if len(token) > 0 {
ctx, err = p.addTokenToContext(ctx, token)
if err != nil {
return nil, err
vInfo.Status = storkapi.ApplicationBackupStatusFailed
vInfo.Reason = fmt.Sprintf("%v", err)
volumeInfos = append(volumeInfos, vInfo)
logrus.Errorf("%v", err)

}
}

Expand Down
5 changes: 5 additions & 0 deletions pkg/apis/stork/v1alpha1/applicationbackup.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ type ApplicationBackupStatus struct {
TotalSize uint64 `json:"totalSize"`
ResourceCount int `json:"resourceCount"`
LargeResourceEnabled bool `json:"largeResourceEnabled"`
FailedVolCount int `json:"failedVolCount"`
}

// ObjectInfo contains info about an object being backed up or restored
Expand All @@ -83,6 +84,8 @@ type ObjectInfo struct {
// ApplicationBackupResourceInfo is the info for the backup of a resource
type ApplicationBackupResourceInfo struct {
ObjectInfo `json:",inline"`
Status ApplicationBackupStatusType `json:"status"`
Reason string `json:"reason"`
}

// ApplicationBackupVolumeInfo is the info for the backup of a volume
Expand Down Expand Up @@ -120,6 +123,8 @@ const (
ApplicationBackupStatusPartialSuccess ApplicationBackupStatusType = "PartialSuccess"
// ApplicationBackupStatusSuccessful for when backup has completed successfully
ApplicationBackupStatusSuccessful ApplicationBackupStatusType = "Successful"
// ApplicationBackupStatusSkip for when backup has been skipped
ApplicationBackupStatusSkip ApplicationBackupStatusType = "Skipped"
)

// ApplicationBackupStageType is the stage of the backup
Expand Down
Loading

0 comments on commit d1f1c8f

Please sign in to comment.