private void resolveCluster()

in genie-web/src/main/java/com/netflix/genie/web/services/impl/JobResolverServiceImpl.java [452:570]


    private void resolveCluster(final JobResolutionContext context) throws GenieJobResolutionException {
        final long start = System.nanoTime();
        final Set<Tag> tags = new HashSet<>();

        final String jobId = context.getJobId();
        try {
            final Command command = context
                .getCommand()
                .orElseThrow(
                    () -> new IllegalStateException(
                        "Command not resolved before attempting to resolve a cluster for job " + jobId
                    )
                );
            final Set<Cluster> candidateClusters = context
                .getCommandClusters()
                .orElseThrow(
                    () -> new IllegalStateException("Command to candidate cluster map not available for job " + jobId)
                )
                .get(command);
            if (candidateClusters == null || candidateClusters.isEmpty()) {
                throw new IllegalStateException(
                    "Command " + command.getId() + " had no candidate clusters for job " + jobId
                );
            }

            Cluster cluster = null;
            for (final ClusterSelector clusterSelector : this.clusterSelectors) {
                // Create subset of tags just for this selector. Copy existing tags if any.
                final Set<Tag> selectorTags = new HashSet<>(tags);
                // Note: This is done before the selection because if we do it after and the selector throws
                //       exception then we don't have this tag in the metrics. Which is unfortunate since the result
                //       does return the selector
                final String clusterSelectorClass = this.getProxyObjectClassName(clusterSelector);
                selectorTags.add(Tag.of(MetricsConstants.TagKeys.CLASS_NAME, clusterSelectorClass));

                try {
                    final ResourceSelectionResult<Cluster> result = clusterSelector.select(
                        new ClusterSelectionContext(
                            jobId,
                            context.getJobRequest(),
                            context.isApiJob(),
                            command,
                            candidateClusters
                        )
                    );

                    final Optional<Cluster> selectedClusterOptional = result.getSelectedResource();
                    if (selectedClusterOptional.isPresent()) {
                        cluster = selectedClusterOptional.get();
                        LOG.debug(
                            "Successfully selected cluster {} using selector {} for job {} with rationale: {}",
                            cluster.getId(),
                            clusterSelectorClass,
                            jobId,
                            result.getSelectionRationale().orElse(NO_RATIONALE)
                        );
                        selectorTags.add(Tag.of(MetricsConstants.TagKeys.STATUS, CLUSTER_SELECTOR_STATUS_SUCCESS));
                        selectorTags.add(Tag.of(MetricsConstants.TagKeys.CLUSTER_ID, cluster.getId()));
                        selectorTags.add(
                            Tag.of(MetricsConstants.TagKeys.CLUSTER_NAME, cluster.getMetadata().getName())
                        );
                        break;
                    } else {
                        selectorTags.add(
                            Tag.of(MetricsConstants.TagKeys.STATUS, CLUSTER_SELECTOR_STATUS_NO_PREFERENCE)
                        );
                        selectorTags.add(NO_CLUSTER_RESOLVED_ID);
                        selectorTags.add(NO_CLUSTER_RESOLVED_NAME);
                        LOG.debug(
                            "Selector {} returned no preference with rationale: {}",
                            clusterSelectorClass,
                            result.getSelectionRationale().orElse(NO_RATIONALE)
                        );
                    }
                } catch (final Exception e) {
                    // Swallow exception and proceed to next selector.
                    // This is a choice to provides "best-service": select a cluster as long as it matches criteria,
                    // even if one of the selectors encountered an error and cannot choose the best candidate.
                    MetricsUtils.addFailureTagsWithException(selectorTags, e);
                    LOG.warn(
                        "Cluster selector {} evaluation threw exception for job {}",
                        clusterSelectorClass,
                        jobId,
                        e
                    );
                } finally {
                    this.registry.counter(CLUSTER_SELECTOR_COUNTER, selectorTags).increment();
                }
            }

            if (cluster == null) {
                throw new GenieJobResolutionException("No cluster resolved for job " + jobId);
            }

            LOG.debug("Resolved cluster {} for job {}", cluster.getId(), jobId);

            context.setCluster(cluster);
            MetricsUtils.addSuccessTags(tags);
            final String clusterId = cluster.getId();
            final String clusterName = cluster.getMetadata().getName();
            tags.add(Tag.of(MetricsConstants.TagKeys.CLUSTER_ID, clusterId));
            tags.add(Tag.of(MetricsConstants.TagKeys.CLUSTER_NAME, clusterName));
            final SpanCustomizer spanCustomizer = context.getSpanCustomizer();
            this.tagAdapter.tag(spanCustomizer, TracingConstants.JOB_CLUSTER_ID_TAG, clusterId);
            this.tagAdapter.tag(spanCustomizer, TracingConstants.JOB_CLUSTER_NAME_TAG, clusterName);
        } catch (final GenieJobResolutionException e) {
            tags.add(NO_CLUSTER_RESOLVED_ID);
            tags.add(NO_CLUSTER_RESOLVED_NAME);
            MetricsUtils.addFailureTagsWithException(tags, e);
            throw e;
        } catch (final Throwable t) {
            MetricsUtils.addFailureTagsWithException(tags, t);
            throw new GenieJobResolutionRuntimeException(t);
        } finally {
            this.registry
                .timer(RESOLVE_CLUSTER_TIMER, tags)
                .record(System.nanoTime() - start, TimeUnit.NANOSECONDS);
        }
    }