in oracle/controllers/instancecontroller/instance_controller_restore.go [50:307]
func (r *InstanceReconciler) restoreStateMachine(req ctrl.Request, instanceReadyCond *v1.Condition, dbInstanceCond *v1.Condition, inst *v1alpha1.Instance, ctx context.Context, stsParams controllers.StsParams, log logr.Logger) (ctrl.Result, error) {
log.Info("restoreStateMachine start")
// Check instance is provisioned
if instanceReadyCond == nil || k8s.ConditionReasonEquals(instanceReadyCond, k8s.CreateInProgress) {
log.Info("restoreStateMachine: instance not ready yet, proceed with main reconciliation")
return ctrl.Result{}, nil
}
// Check database instance is ready for restore
if dbInstanceCond == nil || (!k8s.ConditionReasonEquals(dbInstanceCond, k8s.RestorePending) && !k8s.ConditionReasonEquals(dbInstanceCond, k8s.CreateComplete)) {
log.Info("restoreStateMachine: database instance is not ready for restore, proceed with main reconciliation")
return ctrl.Result{}, nil
}
// Check the Force flag
if !inst.Spec.Restore.Force {
log.Info("instance is up and running. To replace (restore from a backup), set force=true")
return ctrl.Result{}, nil
}
// Find the requested backup resource
backup, err := r.findBackupForRestore(ctx, *inst, req.Namespace, log)
if err != nil {
log.Error(err, "findBackupForRestore failed")
e := r.setRestoreFailed(ctx, inst, fmt.Sprintf(
"Could not find a matching backup for BackupID: %v, BackupRef: %v, BackupType: %v, PITRRestore: %v. Error message: %v",
inst.Spec.Restore.BackupID, inst.Spec.Restore.BackupRef, inst.Spec.Restore.BackupType, inst.Spec.Restore.PITRRestore, err), log)
return ctrl.Result{}, e
}
// Check if the Backup object is in Ready status
backupReadyCond := k8s.FindCondition(backup.Status.Conditions, k8s.Ready)
if !k8s.ConditionStatusEquals(backupReadyCond, v1.ConditionTrue) {
if k8s.ConditionReasonEquals(backupReadyCond, k8s.BackupFailed) {
e := r.setRestoreFailed(ctx, inst, "Backup is in failed state", log)
return ctrl.Result{}, e
} else {
log.Info("Backup is in progress, waiting")
return ctrl.Result{RequeueAfter: 10 * time.Second}, nil
}
}
log.Info("Found backup object for restore", "backup", backup)
switch instanceReadyCond.Reason {
// Entry points for restore process
case k8s.RestoreComplete, k8s.CreateComplete, k8s.RestoreFailed:
if inst.Spec.Restore.BackupType != "Snapshot" && inst.Spec.Restore.BackupType != "Physical" {
// Not playing games here. A restore (especially the in-place restore)
// is destructive. It's not about being user-friendly. A user is to
// be specific as to what kind of backup they want to restore from.
log.Error(fmt.Errorf("a BackupType is a mandatory parameter for a restore"), "stopping")
return ctrl.Result{}, nil
}
// Check the request time
requestTime := inst.Spec.Restore.RequestTime.Rfc3339Copy()
if inst.Status.LastRestoreTime != nil && !requestTime.After(inst.Status.LastRestoreTime.Time) {
log.Info(fmt.Sprintf("skipping the restore request as requestTime=%v is not later than the last restore time %v",
requestTime, inst.Status.LastRestoreTime.Time))
return ctrl.Result{}, nil
}
// Acquire maintenance lock
if e := AcquireInstanceMaintenanceLock(ctx, r.Client, inst, "instancecontroller"); e != nil {
log.Error(e, "AcquireInstanceMaintenanceLock failed")
return ctrl.Result{RequeueAfter: 5 * time.Second}, e
}
inst.Status.LastRestoreTime = inst.Spec.Restore.RequestTime.DeepCopy()
inst.Status.BackupID = ""
k8s.InstanceUpsertCondition(&inst.Status, k8s.Ready, v1.ConditionFalse, k8s.RestorePreparationInProgress, "")
if err := r.Status().Update(ctx, inst); err != nil {
return ctrl.Result{}, err
}
log.Info(fmt.Sprintf("restoreStateMachine: %s->RestorePreparationInProgress", instanceReadyCond.Reason))
// Reconcile again
return ctrl.Result{Requeue: true}, nil
case k8s.RestorePreparationInProgress:
switch inst.Spec.Restore.BackupType {
case "Snapshot":
// Cleanup STS and PVCs.
done, err := r.deleteOldSTSandPVCs(ctx, *inst, stsParams, log)
if err != nil {
if e := r.setRestoreFailed(ctx, inst, err.Error(), log); e != nil {
return ctrl.Result{}, e
}
return ctrl.Result{}, err
}
if !done {
log.Info("STS/PVC removal in progress, waiting")
return ctrl.Result{RequeueAfter: 5 * time.Second}, err
}
case "Physical":
// Do nothing in this step.
}
k8s.InstanceUpsertCondition(&inst.Status, k8s.Ready, v1.ConditionFalse, k8s.RestorePreparationComplete, "")
log.Info("restoreStateMachine: RestorePreparationInProgress->RestorePreparationComplete")
// Reconcile again
return ctrl.Result{Requeue: true}, nil
case k8s.RestorePreparationComplete:
// Update status and commit it to k8s before we proceed.
// This will protect us from a case where we start a restore job but fail to update our status.
k8s.InstanceUpsertCondition(&inst.Status, k8s.Ready, v1.ConditionFalse, k8s.RestoreInProgress, "")
if err := r.Status().Update(ctx, inst); err != nil {
return ctrl.Result{}, err
}
log.Info("restoreStateMachine: RestorePreparationComplete->RestoreInProgress")
switch inst.Spec.Restore.BackupType {
case "Snapshot":
// Launch the restore process
if err := r.restoreSnapshot(ctx, *inst, stsParams, log); err != nil {
return ctrl.Result{}, err
}
log.Info("restore from a storage snapshot: started")
case "Physical":
// Launch the LRO
operation, err := r.restorePhysical(ctx, *inst, backup, req, log)
if err != nil {
if !controllers.IsAlreadyExistsError(err) {
log.Error(err, "PhysicalRestore failed")
return ctrl.Result{}, err
}
} else {
if operation.Done {
// we're dealing with non LRO version of restore
log.Info("encountered synchronous version of PhysicalRestore")
log.Info("PhysicalRestore DONE")
log.Info("restoreStateMachine: CreateComplete->RestoreComplete")
message := fmt.Sprintf("Physical restore done. Elapsed Time: %v",
k8s.ElapsedTimeFromLastTransitionTime(k8s.FindCondition(inst.Status.Conditions, k8s.Ready), time.Second))
if e := r.setRestoreSucceeded(ctx, inst, message, log); e != nil {
return ctrl.Result{}, e
}
} else {
log.Info("PhysicalRestore started")
}
}
}
// Reconcile again
return ctrl.Result{Requeue: true}, nil
case k8s.RestoreInProgress:
done, err := false, error(nil)
switch inst.Spec.Restore.BackupType {
case "Snapshot":
done, err = r.isSnapshotRestoreDone(ctx, *inst, log)
case "Physical":
id := lroRestoreOperationID(physicalRestore, *inst)
done, err = controllers.IsLROOperationDone(ctx, r.DatabaseClientFactory, r.Client, id, inst.GetNamespace(), inst.GetName())
// Clean up LRO after we are done.
// The job will remain available for `ttlAfterDelete`.
if done {
_ = controllers.DeleteLROOperation(ctx, r.DatabaseClientFactory, r.Client, id, inst.Namespace, inst.Name)
if err != nil {
backupID := inst.Spec.Restore.BackupID
backupType := inst.Spec.Restore.BackupType
err = fmt.Errorf("Failed to restore on %s-%d from backup %s (type %s): %v.", time.Now().Format(dateFormat),
time.Now().Nanosecond(), backupID, backupType, err.Error())
}
}
default:
e := r.setRestoreFailed(ctx, inst, "Unknown restore type", log)
return ctrl.Result{}, e
}
if !done {
if err != nil {
// let the controller retry
return ctrl.Result{}, err
}
log.Info("restore still in progress, waiting")
return ctrl.Result{RequeueAfter: 10 * time.Second}, nil
}
// if done and the error is not nil
if err != nil {
if e := r.setRestoreFailed(ctx, inst, err.Error(), log); e != nil {
return ctrl.Result{}, e
}
return ctrl.Result{}, err
}
log.Info("restoreStateMachine: RestoreInProgress->PostRestoreBootstrapInProgress")
k8s.InstanceUpsertCondition(&inst.Status, k8s.Ready, v1.ConditionFalse, k8s.PostRestoreBootstrapInProgress, "")
// Reconcile again
return ctrl.Result{Requeue: true}, r.Status().Update(ctx, inst)
case k8s.PostRestoreBootstrapInProgress:
switch inst.Spec.Restore.BackupType {
case "Snapshot":
oracleRunning, err := r.isOracleUpAndRunning(ctx, inst, req.Namespace, log)
if err != nil {
log.Error(err, "failed to check the database instance status")
return ctrl.Result{}, err
}
if !oracleRunning {
log.Info("post restore bootstrap still in progress, waiting")
return ctrl.Result{RequeueAfter: 10 * time.Second}, nil
}
case "Physical":
req := &controllers.BootstrapDatabaseRequest{
CdbName: inst.Spec.CDBName,
DbUniqueName: inst.Spec.DBUniqueName,
Dbdomain: controllers.GetDBDomain(inst),
Mode: controllers.BootstrapDatabaseRequest_Restore,
}
if _, err = controllers.BootstrapDatabase(ctx, r, r.DatabaseClientFactory, inst.Namespace, inst.Name, *req); err != nil {
if e := r.setRestoreFailed(ctx, inst, fmt.Sprintf("Post restore bootstrap failed with %v", err), log); e != nil {
return ctrl.Result{}, e
}
return ctrl.Result{}, nil
}
}
log.Info("restoreStateMachine: PostRestoreBootstrapInProgress->PostRestoreBootstrapComplete")
k8s.InstanceUpsertCondition(&inst.Status, k8s.Ready, v1.ConditionFalse, k8s.PostRestoreBootstrapComplete, "")
// Reconcile again
return ctrl.Result{Requeue: true}, r.Status().Update(ctx, inst)
case k8s.PostRestoreBootstrapComplete:
if backup.Annotations[controllers.DatabaseImageAnnotation] == inst.Status.ActiveImages["service"] {
description := fmt.Sprintf("Restored on %s-%d from backup %s (type %s)", time.Now().Format(dateFormat),
time.Now().Nanosecond(), inst.Spec.Restore.BackupID, inst.Spec.Restore.BackupType)
log.Info("restoreStateMachine: PostRestoreBootstrapComplete->RestoreComplete")
r.setRestoreSucceeded(ctx, inst, description, log)
return ctrl.Result{}, r.Status().Update(ctx, inst)
}
if err := r.startDatabasePatching(req, ctx, *inst, log); err != nil {
k8s.InstanceUpsertCondition(&inst.Status, k8s.Ready, v1.ConditionFalse, k8s.RestoreFailed, "Failed to start database patching")
r.setRestoreFailed(ctx, inst, fmt.Sprintf("Post restore database patching failed with %v", err), log)
return ctrl.Result{}, nil
}
log.Info("restoreStateMachine: PostRestoreBootstrapComplete->PostRestoreDatabasePatchingInProgress")
k8s.InstanceUpsertCondition(&inst.Status, k8s.Ready, v1.ConditionFalse, k8s.PostRestoreDatabasePatchingInProgress, "Calling ApplyDataPatch()")
return ctrl.Result{Requeue: true}, r.Status().Update(ctx, inst)
case k8s.PostRestoreDatabasePatchingInProgress:
// Monitor patching progress
done, err := r.isDatabasePatchingDone(ctx, req, *inst, log)
if err != nil {
log.Info("restoreStateMachine: PostRestoreDatabasePatchingInProgress->RestoreFailed")
k8s.InstanceUpsertCondition(&inst.Status, k8s.Ready, v1.ConditionFalse, k8s.RestoreFailed, "Failed to check datapatch status")
r.setRestoreFailed(ctx, inst, fmt.Sprintf("Post restore database patching failed with %v", err), log)
return ctrl.Result{}, nil
}
if !done {
log.Info("datapatch still in progress, waiting")
return ctrl.Result{RequeueAfter: 10 * time.Second}, nil
}
description := fmt.Sprintf("Restored on %s-%d from backup %s (type %s)", time.Now().Format(dateFormat),
time.Now().Nanosecond(), inst.Spec.Restore.BackupID, inst.Spec.Restore.BackupType)
log.Info("restoreStateMachine: PostRestoreDatabasePatchingInProgress->RestoreComplete")
if e := r.setRestoreSucceeded(ctx, inst, description, log); e != nil {
log.Error(e, "setRestoreSucceeded returned an error, retrying")
return ctrl.Result{}, e
}
return ctrl.Result{Requeue: true}, nil
default:
log.Info("restoreStateMachine: no action needed, proceed with main reconciliation")
}
return ctrl.Result{}, nil
}