virtual Status Execute()

in horovod/common/ops/collective_operations.h [198:256]


  virtual Status Execute(std::vector<TensorTableEntry>& entries,
                         const Response& response) = 0;

  virtual bool Enabled(const ParameterManager& param_manager,
                       const std::vector<TensorTableEntry>& entries,
                       const Response& response) const = 0;

protected:
  template <typename T>
  Status PrepareOutputAndParams(TensorTableEntry& e,
                                std::vector<T>& sdispls,
                                std::vector<T>& rdispls,
                                std::vector<T>& sendcounts,
                                std::vector<T>& recvcounts) {
    auto world_size = global_state_->controller->GetSize();

    const auto& splits = e.splits;
    std::vector<int32_t> recvsplits;
    // Perform alltoall of splits to get expeceted receive splits
    global_state_->controller->AlltoallGetRecvSplits(splits, recvsplits);

    // Every tensor participating in Alltoall operation may have different
    // first dimension size, but the rest of dimensions are same for all
    // tensors.  Here we get shape of tensor sliced by first dimension.
    TensorShape slice_shape;
    for (int i = 1; i < e.tensor->shape().dims(); ++i) {
      slice_shape.AddDim(e.tensor->shape().dim_size(i));
    }
    int64_t slice_num_elements = slice_shape.num_elements();

    // Prepare send/recvcounts and displacements for Alltoallv
    sdispls.resize(world_size);
    rdispls.resize(world_size);
    sendcounts.resize(world_size);
    recvcounts.resize(world_size);

    size_t output_first_dim = 0;
    for (int i = 0; i < world_size; ++i) {
      sendcounts[i] = splits[i] * slice_num_elements;
      recvcounts[i] = recvsplits[i] * slice_num_elements;
      output_first_dim += recvsplits[i];
    }

    for (int i = 1; i < world_size; ++i) {
      sdispls[i] = sdispls[i-1] + sendcounts[i-1];
      rdispls[i] = rdispls[i-1] + recvcounts[i-1];
    }

    // Allocate output
    TensorShape output_shape;
    output_shape.AddDim(output_first_dim);
    output_shape.AppendShape(slice_shape);
    Status status = e.context->AllocateOutput(output_shape, &e.output);
    if (!status.ok()) {
      return status;
    }

    return Status::OK();
  }