func()

in oracle/controllers/instancecontroller/instance_controller_restore.go [50:307]


func (r *InstanceReconciler) restoreStateMachine(req ctrl.Request, instanceReadyCond *v1.Condition, dbInstanceCond *v1.Condition, inst *v1alpha1.Instance, ctx context.Context, stsParams controllers.StsParams, log logr.Logger) (ctrl.Result, error) {
	log.Info("restoreStateMachine start")

	// Check instance is provisioned
	if instanceReadyCond == nil || k8s.ConditionReasonEquals(instanceReadyCond, k8s.CreateInProgress) {
		log.Info("restoreStateMachine: instance not ready yet, proceed with main reconciliation")
		return ctrl.Result{}, nil
	}

	// Check database instance is ready for restore
	if dbInstanceCond == nil || (!k8s.ConditionReasonEquals(dbInstanceCond, k8s.RestorePending) && !k8s.ConditionReasonEquals(dbInstanceCond, k8s.CreateComplete)) {
		log.Info("restoreStateMachine: database instance is not ready for restore, proceed with main reconciliation")
		return ctrl.Result{}, nil
	}

	// Check the Force flag
	if !inst.Spec.Restore.Force {
		log.Info("instance is up and running. To replace (restore from a backup), set force=true")
		return ctrl.Result{}, nil
	}

	// Find the requested backup resource
	backup, err := r.findBackupForRestore(ctx, *inst, req.Namespace, log)
	if err != nil {
		log.Error(err, "findBackupForRestore failed")
		e := r.setRestoreFailed(ctx, inst, fmt.Sprintf(
			"Could not find a matching backup for BackupID: %v, BackupRef: %v, BackupType: %v, PITRRestore: %v. Error message: %v",
			inst.Spec.Restore.BackupID, inst.Spec.Restore.BackupRef, inst.Spec.Restore.BackupType, inst.Spec.Restore.PITRRestore, err), log)
		return ctrl.Result{}, e
	}

	// Check if the Backup object is in Ready status
	backupReadyCond := k8s.FindCondition(backup.Status.Conditions, k8s.Ready)
	if !k8s.ConditionStatusEquals(backupReadyCond, v1.ConditionTrue) {
		if k8s.ConditionReasonEquals(backupReadyCond, k8s.BackupFailed) {
			e := r.setRestoreFailed(ctx, inst, "Backup is in failed state", log)
			return ctrl.Result{}, e
		} else {
			log.Info("Backup is in progress, waiting")
			return ctrl.Result{RequeueAfter: 10 * time.Second}, nil
		}
	}

	log.Info("Found backup object for restore", "backup", backup)
	switch instanceReadyCond.Reason {
	// Entry points for restore process
	case k8s.RestoreComplete, k8s.CreateComplete, k8s.RestoreFailed:
		if inst.Spec.Restore.BackupType != "Snapshot" && inst.Spec.Restore.BackupType != "Physical" {
			// Not playing games here. A restore (especially the in-place restore)
			// is destructive. It's not about being user-friendly. A user is to
			// be specific as to what kind of backup they want to restore from.
			log.Error(fmt.Errorf("a BackupType is a mandatory parameter for a restore"), "stopping")
			return ctrl.Result{}, nil
		}
		// Check the request time
		requestTime := inst.Spec.Restore.RequestTime.Rfc3339Copy()
		if inst.Status.LastRestoreTime != nil && !requestTime.After(inst.Status.LastRestoreTime.Time) {
			log.Info(fmt.Sprintf("skipping the restore request as requestTime=%v is not later than the last restore time %v",
				requestTime, inst.Status.LastRestoreTime.Time))
			return ctrl.Result{}, nil
		}
		// Acquire maintenance lock
		if e := AcquireInstanceMaintenanceLock(ctx, r.Client, inst, "instancecontroller"); e != nil {
			log.Error(e, "AcquireInstanceMaintenanceLock failed")
			return ctrl.Result{RequeueAfter: 5 * time.Second}, e
		}
		inst.Status.LastRestoreTime = inst.Spec.Restore.RequestTime.DeepCopy()
		inst.Status.BackupID = ""
		k8s.InstanceUpsertCondition(&inst.Status, k8s.Ready, v1.ConditionFalse, k8s.RestorePreparationInProgress, "")
		if err := r.Status().Update(ctx, inst); err != nil {
			return ctrl.Result{}, err
		}
		log.Info(fmt.Sprintf("restoreStateMachine: %s->RestorePreparationInProgress", instanceReadyCond.Reason))
		// Reconcile again
		return ctrl.Result{Requeue: true}, nil
	case k8s.RestorePreparationInProgress:
		switch inst.Spec.Restore.BackupType {
		case "Snapshot":
			// Cleanup STS and PVCs.
			done, err := r.deleteOldSTSandPVCs(ctx, *inst, stsParams, log)
			if err != nil {
				if e := r.setRestoreFailed(ctx, inst, err.Error(), log); e != nil {
					return ctrl.Result{}, e
				}
				return ctrl.Result{}, err
			}
			if !done {
				log.Info("STS/PVC removal in progress, waiting")
				return ctrl.Result{RequeueAfter: 5 * time.Second}, err
			}
		case "Physical":
			// Do nothing in this step.
		}

		k8s.InstanceUpsertCondition(&inst.Status, k8s.Ready, v1.ConditionFalse, k8s.RestorePreparationComplete, "")
		log.Info("restoreStateMachine: RestorePreparationInProgress->RestorePreparationComplete")
		// Reconcile again
		return ctrl.Result{Requeue: true}, nil
	case k8s.RestorePreparationComplete:
		// Update status and commit it to k8s before we proceed.
		// This will protect us from a case where we start a restore job but fail to update our status.
		k8s.InstanceUpsertCondition(&inst.Status, k8s.Ready, v1.ConditionFalse, k8s.RestoreInProgress, "")
		if err := r.Status().Update(ctx, inst); err != nil {
			return ctrl.Result{}, err
		}
		log.Info("restoreStateMachine: RestorePreparationComplete->RestoreInProgress")
		switch inst.Spec.Restore.BackupType {
		case "Snapshot":
			// Launch the restore process
			if err := r.restoreSnapshot(ctx, *inst, stsParams, log); err != nil {
				return ctrl.Result{}, err
			}
			log.Info("restore from a storage snapshot: started")
		case "Physical":
			// Launch the LRO
			operation, err := r.restorePhysical(ctx, *inst, backup, req, log)
			if err != nil {
				if !controllers.IsAlreadyExistsError(err) {
					log.Error(err, "PhysicalRestore failed")
					return ctrl.Result{}, err
				}
			} else {
				if operation.Done {
					// we're dealing with non LRO version of restore
					log.Info("encountered synchronous version of PhysicalRestore")
					log.Info("PhysicalRestore DONE")
					log.Info("restoreStateMachine: CreateComplete->RestoreComplete")
					message := fmt.Sprintf("Physical restore done. Elapsed Time: %v",
						k8s.ElapsedTimeFromLastTransitionTime(k8s.FindCondition(inst.Status.Conditions, k8s.Ready), time.Second))
					if e := r.setRestoreSucceeded(ctx, inst, message, log); e != nil {
						return ctrl.Result{}, e
					}
				} else {
					log.Info("PhysicalRestore started")
				}
			}
		}
		// Reconcile again
		return ctrl.Result{Requeue: true}, nil
	case k8s.RestoreInProgress:
		done, err := false, error(nil)
		switch inst.Spec.Restore.BackupType {
		case "Snapshot":
			done, err = r.isSnapshotRestoreDone(ctx, *inst, log)
		case "Physical":
			id := lroRestoreOperationID(physicalRestore, *inst)
			done, err = controllers.IsLROOperationDone(ctx, r.DatabaseClientFactory, r.Client, id, inst.GetNamespace(), inst.GetName())
			// Clean up LRO after we are done.
			// The job will remain available for `ttlAfterDelete`.
			if done {
				_ = controllers.DeleteLROOperation(ctx, r.DatabaseClientFactory, r.Client, id, inst.Namespace, inst.Name)
				if err != nil {
					backupID := inst.Spec.Restore.BackupID
					backupType := inst.Spec.Restore.BackupType

					err = fmt.Errorf("Failed to restore on %s-%d from backup %s (type %s): %v.", time.Now().Format(dateFormat),
						time.Now().Nanosecond(), backupID, backupType, err.Error())
				}
			}
		default:
			e := r.setRestoreFailed(ctx, inst, "Unknown restore type", log)
			return ctrl.Result{}, e
		}

		if !done {
			if err != nil {
				// let the controller retry
				return ctrl.Result{}, err
			}
			log.Info("restore still in progress, waiting")
			return ctrl.Result{RequeueAfter: 10 * time.Second}, nil
		}

		// if done and the error is not nil
		if err != nil {
			if e := r.setRestoreFailed(ctx, inst, err.Error(), log); e != nil {
				return ctrl.Result{}, e
			}
			return ctrl.Result{}, err
		}
		log.Info("restoreStateMachine: RestoreInProgress->PostRestoreBootstrapInProgress")
		k8s.InstanceUpsertCondition(&inst.Status, k8s.Ready, v1.ConditionFalse, k8s.PostRestoreBootstrapInProgress, "")
		// Reconcile again
		return ctrl.Result{Requeue: true}, r.Status().Update(ctx, inst)
	case k8s.PostRestoreBootstrapInProgress:
		switch inst.Spec.Restore.BackupType {
		case "Snapshot":
			oracleRunning, err := r.isOracleUpAndRunning(ctx, inst, req.Namespace, log)
			if err != nil {
				log.Error(err, "failed to check the database instance status")
				return ctrl.Result{}, err
			}
			if !oracleRunning {
				log.Info("post restore bootstrap still in progress, waiting")
				return ctrl.Result{RequeueAfter: 10 * time.Second}, nil
			}
		case "Physical":
			req := &controllers.BootstrapDatabaseRequest{
				CdbName:      inst.Spec.CDBName,
				DbUniqueName: inst.Spec.DBUniqueName,
				Dbdomain:     controllers.GetDBDomain(inst),
				Mode:         controllers.BootstrapDatabaseRequest_Restore,
			}

			if _, err = controllers.BootstrapDatabase(ctx, r, r.DatabaseClientFactory, inst.Namespace, inst.Name, *req); err != nil {
				if e := r.setRestoreFailed(ctx, inst, fmt.Sprintf("Post restore bootstrap failed with %v", err), log); e != nil {
					return ctrl.Result{}, e
				}
				return ctrl.Result{}, nil
			}
		}

		log.Info("restoreStateMachine: PostRestoreBootstrapInProgress->PostRestoreBootstrapComplete")
		k8s.InstanceUpsertCondition(&inst.Status, k8s.Ready, v1.ConditionFalse, k8s.PostRestoreBootstrapComplete, "")
		// Reconcile again
		return ctrl.Result{Requeue: true}, r.Status().Update(ctx, inst)
	case k8s.PostRestoreBootstrapComplete:
		if backup.Annotations[controllers.DatabaseImageAnnotation] == inst.Status.ActiveImages["service"] {
			description := fmt.Sprintf("Restored on %s-%d from backup %s (type %s)", time.Now().Format(dateFormat),
				time.Now().Nanosecond(), inst.Spec.Restore.BackupID, inst.Spec.Restore.BackupType)
			log.Info("restoreStateMachine: PostRestoreBootstrapComplete->RestoreComplete")
			r.setRestoreSucceeded(ctx, inst, description, log)
			return ctrl.Result{}, r.Status().Update(ctx, inst)
		}
		if err := r.startDatabasePatching(req, ctx, *inst, log); err != nil {
			k8s.InstanceUpsertCondition(&inst.Status, k8s.Ready, v1.ConditionFalse, k8s.RestoreFailed, "Failed to start database patching")
			r.setRestoreFailed(ctx, inst, fmt.Sprintf("Post restore database patching failed with %v", err), log)
			return ctrl.Result{}, nil
		}
		log.Info("restoreStateMachine: PostRestoreBootstrapComplete->PostRestoreDatabasePatchingInProgress")
		k8s.InstanceUpsertCondition(&inst.Status, k8s.Ready, v1.ConditionFalse, k8s.PostRestoreDatabasePatchingInProgress, "Calling ApplyDataPatch()")
		return ctrl.Result{Requeue: true}, r.Status().Update(ctx, inst)
	case k8s.PostRestoreDatabasePatchingInProgress:
		// Monitor patching progress
		done, err := r.isDatabasePatchingDone(ctx, req, *inst, log)
		if err != nil {
			log.Info("restoreStateMachine: PostRestoreDatabasePatchingInProgress->RestoreFailed")
			k8s.InstanceUpsertCondition(&inst.Status, k8s.Ready, v1.ConditionFalse, k8s.RestoreFailed, "Failed to check datapatch status")
			r.setRestoreFailed(ctx, inst, fmt.Sprintf("Post restore database patching failed with %v", err), log)
			return ctrl.Result{}, nil
		}
		if !done {
			log.Info("datapatch still in progress, waiting")
			return ctrl.Result{RequeueAfter: 10 * time.Second}, nil
		}
		description := fmt.Sprintf("Restored on %s-%d from backup %s (type %s)", time.Now().Format(dateFormat),
			time.Now().Nanosecond(), inst.Spec.Restore.BackupID, inst.Spec.Restore.BackupType)
		log.Info("restoreStateMachine: PostRestoreDatabasePatchingInProgress->RestoreComplete")
		if e := r.setRestoreSucceeded(ctx, inst, description, log); e != nil {
			log.Error(e, "setRestoreSucceeded returned an error, retrying")
			return ctrl.Result{}, e
		}
		return ctrl.Result{Requeue: true}, nil
	default:
		log.Info("restoreStateMachine: no action needed, proceed with main reconciliation")
	}
	return ctrl.Result{}, nil
}