public String launchJob()

in genie-web/src/main/java/com/netflix/genie/web/services/impl/JobLaunchServiceImpl.java [114:223]


    public String launchJob(
        @Valid final JobSubmission jobSubmission
    ) throws
        AgentLaunchException,
        GenieJobResolutionException,
        IdAlreadyExistsException,
        NotFoundException {
        final long start = System.nanoTime();
        final SpanCustomizer span = this.tracer.currentSpanCustomizer();
        span.annotate(BEGIN_LAUNCH_JOB_ANNOTATION);
        final Set<Tag> tags = Sets.newHashSet();
        try {
            /*
             * Steps:
             *
             * 1. Save the job information
             * 2. Attempt to resolve the job information (includes saving)
             * 3. Mark the job as accepted
             * 4. Launch the agent process given the implementation configured for this Genie instance
             * 5. If the agent launch fails mark the job failed else return
             */
            final String jobId = this.persistenceService.saveJobSubmission(jobSubmission);
            span.annotate(SAVED_JOB_SUBMISSION_ANNOTATION);

            final ResolvedJob resolvedJob;
            try {
                resolvedJob = this.jobResolverService.resolveJob(jobId);
            } catch (final Throwable t) {
                final String message;
                if (t instanceof GenieJobResolutionException) {
                    message = JobStatusMessages.FAILED_TO_RESOLVE_JOB;
                } else {
                    message = JobStatusMessages.RESOLUTION_RUNTIME_ERROR;
                }

                MetricsUtils.addFailureTagsWithException(tags, t);
                this.persistenceService.updateJobArchiveStatus(jobId, ArchiveStatus.NO_FILES);
                if (
                    this.updateJobStatus(jobId, JobStatus.RESERVED, JobStatus.FAILED, message, INITIAL_ATTEMPT)
                        != JobStatus.FAILED
                ) {
                    log.error("Updating status to failed didn't succeed");
                }
                throw t; // Caught below for metrics gathering
            }
            span.annotate(RESOLVED_JOB_ANNOTATION);

            // Job state should be RESOLVED now. Mark it ACCEPTED to avoid race condition with agent starting up
            // before we get return from launchAgent and trying to set it to CLAIMED
            try {
                final JobStatus updatedStatus = this.updateJobStatus(
                    jobId,
                    JobStatus.RESOLVED,
                    JobStatus.ACCEPTED,
                    ACCEPTED_MESSAGE,
                    INITIAL_ATTEMPT
                );
                if (updatedStatus != JobStatus.ACCEPTED) {
                    throw new AgentLaunchException("Unable to mark job accepted. Job state " + updatedStatus);
                }
            } catch (final Exception e) {
                this.persistenceService.updateJobArchiveStatus(jobId, ArchiveStatus.NO_FILES);
                // TODO: Failed to update the status to accepted. Try to set it to failed or rely on other cleanup
                //       mechanism? For now rely on janitor mechanisms
                throw e;
            }
            span.annotate(MARKED_JOB_ACCEPTED_ANNOTATION);

            // TODO: at the moment this is not populated, it's going to be a null node (not null)
            final JsonNode requestedLauncherExt = this.persistenceService.getRequestedLauncherExt(jobId);

            final Optional<JsonNode> launcherExt;
            try {
                final AgentLauncher launcher = this.selectLauncher(jobId, jobSubmission, resolvedJob);
                tags.add(Tag.of(LAUNCHER_CLASS_TAG, launcher.getClass().getCanonicalName()));
                launcherExt = launcher.launchAgent(resolvedJob, requestedLauncherExt);
            } catch (final AgentLaunchException e) {
                this.persistenceService.updateJobArchiveStatus(jobId, ArchiveStatus.NO_FILES);
                this.updateJobStatus(jobId, JobStatus.ACCEPTED, JobStatus.FAILED, e.getMessage(), INITIAL_ATTEMPT);
                // TODO: How will we get the ID back to the user? Should we add it to an exception? We don't get
                //       We don't get the ID until after saveJobSubmission so if that fails we'd still return nothing
                //       Probably need multiple exceptions to be thrown from this API (if we go with checked)
                throw e;
            }
            span.annotate(LAUNCHED_AGENT_ANNOTATION);

            if (launcherExt.isPresent()) {
                try {
                    this.persistenceService.updateLauncherExt(jobId, launcherExt.get());
                } catch (final Exception e) {
                    // Being unable to update the launcher ext is not optimal however
                    // it's not worth returning an error to the user at this point as
                    // the agent has launched and we have all the other pieces in place
                    log.error("Unable to update the launcher ext for job {}", jobId, e);
                }
            }
            span.annotate(SAVED_LAUNCHER_EXT_ANNOTATION);

            MetricsUtils.addSuccessTags(tags);
            return jobId;
        } catch (final Throwable t) {
            MetricsUtils.addFailureTagsWithException(tags, t);
            throw t;
        } finally {
            span.annotate(END_LAUNCH_JOB_ANNOTATION);
            this.registry
                .timer(LAUNCH_JOB_TIMER, tags)
                .record(System.nanoTime() - start, TimeUnit.NANOSECONDS);
        }
    }