public JobModel calculateJobModel()

in samza-core/src/main/java/org/apache/samza/coordinator/JobModelCalculator.java [79:155]


  public JobModel calculateJobModel(Config originalConfig, Map<TaskName, Integer> changeLogPartitionMapping,
      StreamMetadataCache streamMetadataCache, GrouperMetadata grouperMetadata) {
    // refresh config if enabled regex topic rewriter
    Config refreshedConfig = refreshConfigByRegexTopicRewriter(originalConfig);

    TaskConfig taskConfig = new TaskConfig(refreshedConfig);
    // Do grouping to fetch TaskName to SSP mapping
    Set<SystemStreamPartition> allSystemStreamPartitions =
        getMatchedInputStreamPartitions(refreshedConfig, streamMetadataCache);

    // processor list is required by some of the groupers. So, let's pass them as part of the config.
    // Copy the config and add the processor list to the config copy.
    // TODO: It is non-ideal to have config as a medium to transmit the locality information; especially, if the locality information evolves. Evaluate options on using context objects to pass dependent components.
    Map<String, String> configMap = new HashMap<>(refreshedConfig);
    configMap.put(JobConfig.PROCESSOR_LIST, String.join(",", grouperMetadata.getProcessorLocality().keySet()));
    SystemStreamPartitionGrouper grouper = getSystemStreamPartitionGrouper(new MapConfig(configMap));

    JobConfig jobConfig = new JobConfig(refreshedConfig);

    Map<TaskName, Set<SystemStreamPartition>> groups;
    if (jobConfig.isSSPGrouperProxyEnabled()) {
      SSPGrouperProxy sspGrouperProxy = new SSPGrouperProxy(refreshedConfig, grouper);
      groups = sspGrouperProxy.group(allSystemStreamPartitions, grouperMetadata);
    } else {
      LOG.warn(String.format(
          "SSPGrouperProxy is disabled (%s = false). Stateful jobs may produce erroneous results if this is not enabled.",
          JobConfig.SSP_INPUT_EXPANSION_ENABLED));
      groups = grouper.group(allSystemStreamPartitions);
    }
    LOG.info(String.format(
        "SystemStreamPartitionGrouper %s has grouped the SystemStreamPartitions into %d tasks with the following taskNames: %s",
        grouper, groups.size(), groups));

    // If no mappings are present (first time the job is running) we return -1, this will allow 0 to be the first change
    // mapping.
    int maxChangelogPartitionId = changeLogPartitionMapping.values().stream().max(Comparator.naturalOrder()).orElse(-1);
    // Sort the groups prior to assigning the changelog mapping so that the mapping is reproducible and intuitive
    TreeMap<TaskName, Set<SystemStreamPartition>> sortedGroups = new TreeMap<>(groups);
    Set<TaskModel> taskModels = new HashSet<>();
    for (Map.Entry<TaskName, Set<SystemStreamPartition>> group : sortedGroups.entrySet()) {
      TaskName taskName = group.getKey();
      Set<SystemStreamPartition> systemStreamPartitions = group.getValue();
      Optional<Integer> changelogPartitionId = Optional.ofNullable(changeLogPartitionMapping.get(taskName));
      Partition changelogPartition;
      if (changelogPartitionId.isPresent()) {
        changelogPartition = new Partition(changelogPartitionId.get());
      } else {
        // If we've never seen this TaskName before, then assign it a new changelog partition.
        maxChangelogPartitionId++;
        LOG.info(
            String.format("New task %s is being assigned changelog partition %s.", taskName, maxChangelogPartitionId));
        changelogPartition = new Partition(maxChangelogPartitionId);
      }
      taskModels.add(new TaskModel(taskName, systemStreamPartitions, changelogPartition));
    }

    // Here is where we should put in a pluggable option for the SSPTaskNameGrouper for locality, load-balancing, etc.
    TaskNameGrouperFactory containerGrouperFactory =
        ReflectionUtil.getObj(taskConfig.getTaskNameGrouperFactory(), TaskNameGrouperFactory.class);
    boolean standbyTasksEnabled = jobConfig.getStandbyTasksEnabled();
    int standbyTaskReplicationFactor = jobConfig.getStandbyTaskReplicationFactor();
    TaskNameGrouperProxy taskNameGrouperProxy =
        new TaskNameGrouperProxy(containerGrouperFactory.build(refreshedConfig), standbyTasksEnabled,
            standbyTaskReplicationFactor);
    Set<ContainerModel> containerModels;
    boolean isHostAffinityEnabled = new ClusterManagerConfig(refreshedConfig).getHostAffinityEnabled();
    if (isHostAffinityEnabled) {
      containerModels = taskNameGrouperProxy.group(taskModels, grouperMetadata);
    } else {
      containerModels =
          taskNameGrouperProxy.group(taskModels, new ArrayList<>(grouperMetadata.getProcessorLocality().keySet()));
    }

    Map<String, ContainerModel> containerMap =
        containerModels.stream().collect(Collectors.toMap(ContainerModel::getId, Function.identity()));
    return new JobModel(refreshedConfig, containerMap);
  }