func()

in controllers/hyperparametertuningjob/hyperparametertuningjob_controller.go [139:223]


func (r *Reconciler) reconcileTuningJob(ctx reconcileRequestContext) error {
	var err error

	if ctx.TuningJob.Status.HyperParameterTuningJobStatus == "" {
		if err = r.updateStatus(ctx, controllers.InitializingJobStatus); err != nil {
			return err
		}
	}

	if err = r.initializeContext(&ctx); err != nil {
		return r.updateStatusAndReturnError(ctx, string(sagemaker.HyperParameterTuningJobStatusFailed), errors.Wrap(err, "Unable to initialize operator"))
	}

	// Add finalizer if it's not marked for deletion.
	if !controllers.HasDeletionTimestamp(ctx.TuningJob.ObjectMeta) {
		if !controllers.ContainsString(ctx.TuningJob.ObjectMeta.GetFinalizers(), controllers.SageMakerResourceFinalizerName) {
			ctx.TuningJob.ObjectMeta.Finalizers = append(ctx.TuningJob.ObjectMeta.Finalizers, controllers.SageMakerResourceFinalizerName)
			if err := r.Update(ctx, ctx.TuningJob); err != nil {
				return errors.Wrap(err, "Failed to add finalizer")
			}
			ctx.Log.Info("Finalizer added")
		}
	}

	// Get the HyperParameterTuningJob from SageMaker
	if ctx.TuningJobDescription, err = ctx.SageMakerClient.DescribeHyperParameterTuningJob(ctx, ctx.TuningJobName); err != nil {
		return r.updateStatusAndReturnError(ctx, ReconcilingTuningJobStatus, errors.Wrap(err, "Unable to describe SageMaker hyperparameter tuning job"))
	}

	// The resource does not exist within SageMaker yet.
	if ctx.TuningJobDescription == nil {
		if controllers.HasDeletionTimestamp(ctx.TuningJob.ObjectMeta) {
			// Don't attempt to clean up resources as none should exist yet
			return r.removeFinalizer(ctx)
		}

		if err = r.createHyperParameterTuningJob(ctx); err != nil {
			return r.updateStatusAndReturnError(ctx, ReconcilingTuningJobStatus, errors.Wrap(err, "Unable to create hyperparameter tuning job"))
		}

		if ctx.TuningJobDescription, err = ctx.SageMakerClient.DescribeHyperParameterTuningJob(ctx, ctx.TuningJobName); err != nil {
			return r.updateStatusAndReturnError(ctx, ReconcilingTuningJobStatus, errors.Wrap(err, "Unable to describe SageMaker hyperparameter tuning job"))
		}
	}

	// Spawn training jobs regardless of the status
	ctx.HPOTrainingJobSpawner.SpawnMissingTrainingJobs(ctx, *ctx.TuningJob)
	if err = r.addBestTrainingJobToStatus(ctx); err != nil {
		return r.updateStatusAndReturnError(ctx, ReconcilingTuningJobStatus, errors.Wrap(err, "Unable to add best training job to status"))
	}

	switch *ctx.TuningJobDescription.HyperParameterTuningJobStatus {
	case sagemaker.HyperParameterTuningJobStatusInProgress:
		if controllers.HasDeletionTimestamp(ctx.TuningJob.ObjectMeta) {
			// Request to stop the job. If SageMaker returns a 404 then the job has already been deleted.
			if _, err := ctx.SageMakerClient.StopHyperParameterTuningJob(ctx, ctx.TuningJobName); err != nil && !clientwrapper.IsStopHyperParameterTuningJob404Error(err) {
				return r.updateStatusAndReturnError(ctx, ReconcilingTuningJobStatus, errors.Wrap(err, "Unable to delete hyperparameter tuning job"))
			}
			// Describe the new state of the job
			if ctx.TuningJobDescription, err = ctx.SageMakerClient.DescribeHyperParameterTuningJob(ctx, ctx.TuningJobName); err != nil {
				return r.updateStatusAndReturnError(ctx, ReconcilingTuningJobStatus, errors.Wrap(err, "Unable to describe SageMaker hyperparameter tuning job"))
			}
		}

	case sagemaker.HyperParameterTuningJobStatusStopped, sagemaker.HyperParameterTuningJobStatusFailed, sagemaker.HyperParameterTuningJobStatusCompleted:
		if controllers.HasDeletionTimestamp(ctx.TuningJob.ObjectMeta) {
			return r.cleanupAndRemoveFinalizer(ctx)
		}

	case sagemaker.HyperParameterTuningJobStatusStopping:
		break

	default:
		return r.updateStatusAndReturnError(ctx, ReconcilingTuningJobStatus, fmt.Errorf("Unknown Tuning Job Status: %s", *ctx.TuningJobDescription.HyperParameterTuningJobStatus))
	}

	status := *ctx.TuningJobDescription.HyperParameterTuningJobStatus
	additional := controllers.GetOrDefault(ctx.TuningJobDescription.FailureReason, "")

	if err = r.updateStatusWithAdditional(ctx, status, additional); err != nil {
		return err
	}

	return nil
}