void FFMapper::map_task()

in src/mapper/mapper.cc [448:621]


void FFMapper::map_task(const MapperContext ctx,
                        const Task& task,
                        const MapTaskInput& input,
                        MapTaskOutput& output)
{
  std::vector<VariantID> variant_ids;
  runtime->find_valid_variants(ctx, task.task_id, variant_ids, task.target_proc.kind());
  // Currently assume there is exactly one variant
  assert(variant_ids.size() == 1);
  output.chosen_variant = variant_ids[0];
  // TODO: assign priorities
  output.task_priority = 0;
  output.postmap_task = false;
  if (task.target_proc.address_space() != node_id) {
    output.target_procs.push_back(task.target_proc);
  } else if (task.target_proc.kind() == Processor::TOC_PROC) {
    output.target_procs.push_back(task.target_proc);
  } else if (task.target_proc.kind() == Processor::LOC_PROC) {
    // Put any of our CPU procs here
    // If we're part of a must epoch launch, our
    // target proc will be sufficient
    if (!task.must_epoch_task)
      output.target_procs.insert(output.target_procs.end(),
          local_cpus.begin(), local_cpus.end());
    else
      output.target_procs.push_back(task.target_proc);
  } else if (task.target_proc.kind() == Processor::PY_PROC) {
    // Put any of our Python procs here
    // If we're part of a must epoch launch, our
    // target proc will be sufficient
    if (!task.must_epoch_task)
      output.target_procs.insert(output.target_procs.end(),
          local_pys.begin(), local_pys.end());
    else
      output.target_procs.push_back(task.target_proc);
  } else {
    // Unsupported proc kind
    assert(false);
  }
  // In control replication, each mapper should only map tasks
  // assigned to local proccessors
  // Violation of this assertion may result in severe runtime
  // overheads to Legion
  if (enable_control_replication) {
    for (size_t i = 0; i < output.target_procs.size(); i++)
      assert(output.target_procs[i].address_space() == node_id);
  }
  // Find instances that still need to be mapped
  std::vector<std::set<FieldID> > missing_fields(task.regions.size());
  runtime->filter_instances(ctx, task, output.chosen_variant,
      output.chosen_instances, missing_fields);
  // Track which regions have already been mapped
  std::vector<bool> done_regions(task.regions.size(), false);
  if (!input.premapped_regions.empty())
    for (std::vector<unsigned>::const_iterator it =
          input.premapped_regions.begin(); it !=
          input.premapped_regions.end(); it++)
      done_regions[*it] = true;
  // Now we need to go through and make instances for any of our
  // regions which do not have space for certain fields
  for (unsigned idx = 0; idx < task.regions.size(); idx++) {
    if (done_regions[idx])
      continue;
    // Skip any empty regions
    if ((task.regions[idx].privilege == LEGION_NO_ACCESS) ||
        (task.regions[idx].privilege_fields.empty()) ||
        missing_fields[idx].empty())
      continue;
    // Select a memory for the req
    Memory target_mem = default_select_target_memory(ctx,
        task.target_proc, task.regions[idx]);
    // Assert no virtual mapping for now
    assert((task.regions[idx].tag & DefaultMapper::VIRTUAL_MAP) == 0);
    // Check to see if any of the valid instances satisfy the requirement
    {
      std::vector<PhysicalInstance> valid_instances;
      for (std::vector<PhysicalInstance>::const_iterator
             it = input.valid_instances[idx].begin(),
             ie = input.valid_instances[idx].end(); it != ie; ++it)
      {
        if (it->get_location() == target_mem) {
          // Only select instances with exact same index domain
          Domain instance_domain = it->get_instance_domain();
          Domain region_domain = runtime->get_index_space_domain(
              ctx, task.regions[idx].region.get_index_space());
          if (instance_domain.get_volume() == region_domain.get_volume()) 
            valid_instances.push_back(*it);
        }
      }

      std::set<FieldID> valid_missing_fields;
      runtime->filter_instances(ctx, task, idx, output.chosen_variant,
                                valid_instances, valid_missing_fields);
      runtime->acquire_and_filter_instances(ctx, valid_instances);
      output.chosen_instances[idx] = valid_instances;
      missing_fields[idx] = valid_missing_fields;
      if (missing_fields[idx].empty())
        continue;
    }
    // Otherwise make nromal instances for the given region
    LayoutConstraintID layout_id = default_select_layout_constraints(
        ctx, target_mem, task.regions[idx], false/*needs constraint check*/);
    const LayoutConstraintSet &constraint_set =
        runtime->find_layout_constraints(ctx, layout_id);
    size_t footprint;
    PhysicalInstance result;
    bool created;
    if (!default_make_instance(ctx, target_mem, constraint_set,
        result, true/*meet_constraints*/,
        task.regions[idx], created, &footprint))
    {
      if (log_instance_creation) {
        for (size_t idx = 0; idx < created_instances.size(); idx++) {
          log_ff_mapper.print("Instance[%zu]: memory:" IDFMT
              "	proc:" IDFMT "	size:%zu	task:%s", idx,
              created_instances[idx].memory.id,
              created_instances[idx].processor.id,
              created_instances[idx].size,
              created_instances[idx].task_name.c_str());
        }
      }
      // Report failed to creation
      log_ff_mapper.error("FlexFlow failed allocation of size %zd bytes for "
          "region requirement %d of task %s (UID %lld) in memory "
          IDFMT " with kind %d for processor " IDFMT ".", footprint, idx,
          task.get_task_name(), task.get_unique_id(),
          target_mem.id, target_mem.kind(), task.target_proc.id);
      assert(false);
    } else {
      output.chosen_instances[idx].push_back(result);
    }
    if (log_instance_creation && created) {
      //Log instance creation
      InstanceCreationLog clog;
      clog.task_name = task.get_task_name();
      clog.size = footprint;
      clog.memory = target_mem;
      clog.processor = task.target_proc;
      created_instances.push_back(clog);
    }
  } //for idx
#ifdef DEADCODE
  if ((task.task_id == CONV2D_INIT_TASK_ID)
     || (task.task_id == CONV2D_FWD_TASK_ID)
     || (task.task_id == CONV2D_BWD_TASK_ID))
  {
    VariantInfo chosen = default_find_preferred_variant(task, ctx,
                             true/*needs tight bound*/, true/*cache*/,
                             task.target_proc.kind());
    output.chosen_variant = chosen.variant;
    output.task_priority = 0;
    output.postmap_task = false;
    output.target_procs.push_back(task.target_proc);
    assert(task.target_proc.kind() == Processor::TOC_PROC);
    Memory fbmem = proc_fbmems[task.target_proc];
    for (unsigned idx = 0; idx < task.regions.size(); idx++)
    {
      if ((task.regions[idx].privilege == NO_ACCESS) ||
          (task.regions[idx].privilege_fields.empty())) continue;
      const TaskLayoutConstraintSet &layout_constraints =
        runtime->find_task_layout_constraints(ctx, task.task_id,
                                              output.chosen_variant);
      std::set<FieldID> fields(task.regions[idx].privilege_fields);
      if (!default_create_custom_instances(ctx, task.target_proc,
             fbmem, task.regions[idx], idx, fields,
             layout_constraints, true, output.chosen_instances[idx]))
      {
        default_report_failed_instance_creation(task, idx, task.target_proc,
                                                fbmem);
      }
    }
  } else
#endif
}