void NGraphEncapsulateOp::Compute()

in ngraph_bridge/kernels/ngraph_encapsulate_op.cc [197:358]


void NGraphEncapsulateOp::Compute(OpKernelContext* ctx) {
  NGRAPH_VLOG(1) << "Compute using executor " << name();
  std::ostringstream oss;
  oss << "Execute: Encapsulate_" << m_cluster_id << ": " << name();
  NGRAPH_VLOG(4) << "NGraphEncapsulateOp::Compute starting for cluster "
                 << m_cluster_id;

  Timer compute_time;
  std::lock_guard<std::mutex> lock(m_compute_lock_);
  int time_func_create_or_lookup;
  Timer function_lookup_or_create;

  // TF input tensor
  std::vector<Tensor> tf_input_tensors;
  std::shared_ptr<Executable> ng_exec;
  int step_id;
  for (int i = 0; i < ctx->num_inputs(); i++) {
    tf_input_tensors.push_back(ctx->input(i));
  }

  step_id = ctx->step_id();

  // Get ngraph executable and inputs information
  OP_REQUIRES_OK(ctx, GetExecutable(tf_input_tensors, ng_exec));

  NGRAPH_VLOG(1) << " Step_ID: " << step_id;
  NGRAPH_VLOG(4)
      << "NGraphEncapsulateOp::Compute got ngraph executable for cluster "
      << m_cluster_id;

  time_func_create_or_lookup = function_lookup_or_create.ElapsedInMS();

  NGRAPH_VLOG(4) << "NGraphEncapsulateOp::Compute got graph for cluster "
                 << m_cluster_id;

  Timer create_or_lookup_tensors;
  vector<shared_ptr<ngraph::runtime::Tensor>> ng_inputs;
  int ng_input_tensor_size_in_bytes = 0;
  // Allocate tensors for input arguments.
  for (int i = 0; i < tf_input_tensors.size(); i++) {
    ngraph::Shape ng_shape(tf_input_tensors[i].shape().dims());
    for (int j = 0; j < tf_input_tensors[i].shape().dims(); ++j) {
      ng_shape[j] = tf_input_tensors[i].shape().dim_size(j);
    }
    ngraph::element::Type ng_element_type;
    OP_REQUIRES_OK(ctx, tf_utils::TFDataTypeToNGraphElementType(
                            tf_input_tensors[i].dtype(), &ng_element_type));

    auto backend = BackendManager::GetBackend();
    std::shared_ptr<ngraph::runtime::Tensor> ng_tensor = make_shared<IETensor>(
        ng_element_type, ng_shape, tf_input_tensors[i].data());
    ng_inputs.push_back(ng_tensor);
  }

  NGRAPH_VLOG(4) << "NGraphEncapsulateOp::Compute allocated argument tensors "
                    "for cluster "
                 << m_cluster_id;

  // Allocate tensors for the output results.

  auto results = ng_exec->GetResults();
  std::vector<shared_ptr<ngraph::runtime::Tensor>> ng_outputs(results.size(),
                                                              nullptr);
  std::vector<int> dyn_shape_tensors;
  for (auto i = 0; i < results.size(); i++) {
    auto ng_element = results[i];
    if (ng_element->get_output_partial_shape(0).is_dynamic()) {
      NGRAPH_VLOG(4)
          << "NGraphEncapsulateOp::Compute skipping output allocation for "
             "dynamic tensor at index"
          << i;
      dyn_shape_tensors.push_back(i);
      continue;
    }

    // Create the TF output tensor
    auto ng_shape = ng_element->get_shape();
    TensorShape tf_shape;
    for (auto dim : ng_shape) {
      tf_shape.AddDim(dim);
    }
    Tensor* output_tensor = nullptr;
    OP_REQUIRES_OK(ctx, ctx->allocate_output(i, tf_shape, &output_tensor));

    // Make sure the nGraph-inferred element type agrees with what TensorFlow
    // expected.
    ngraph::element::Type expected_elem_type;
    auto ng_element_type = ng_element->get_element_type();
    OP_REQUIRES_OK(ctx,
                   tf_utils::TFDataTypeToNGraphElementType(
                       ctx->expected_output_dtype(i), &expected_elem_type));
    OP_REQUIRES(
        ctx, ng_element_type == expected_elem_type,
        errors::Internal("Element type inferred by nGraph does not match "
                         "the element type expected by TensorFlow"));
    ng_outputs[i] =
        make_shared<IETensor>(ng_element_type, ng_shape, output_tensor->data());
  }
  NGRAPH_VLOG(4)
      << "NGraphEncapsulateOp::Compute allocated result tensors for cluster "
      << m_cluster_id;

  int time_create_or_lookup_tensors = create_or_lookup_tensors.ElapsedInMS();
  // Execute the nGraph function.
  int time_execute_function;
  {
    Timer execute_function;
    NGRAPH_VLOG(4) << "NGraphEncapsulateOp::Compute call starting for cluster "
                   << m_cluster_id;
    try {
      ng_exec->Call(ng_inputs, ng_outputs);
    } catch (const std::exception& exp) {
      string status_string = "Caught exception while executing cluster " +
                             to_string(m_cluster_id) + ": " +
                             string(exp.what());
      OP_REQUIRES(ctx, false, errors::Internal(status_string));
    } catch (...) {
      string status_string =
          "Caught exception while executing cluster " + to_string(m_cluster_id);
      OP_REQUIRES(ctx, false, errors::Internal(status_string));
    }
    time_execute_function = execute_function.ElapsedInMS();
  }

  for (auto i : dyn_shape_tensors) {
    auto ng_output = ng_outputs[i];
    // Create the TF output tensor
    auto ng_shape = ng_output->get_shape();
    TensorShape tf_shape;
    for (auto dim : ng_shape) {
      tf_shape.AddDim(dim);
    }

    // Zero-copy IE tensor to TF
    IETensorBuffer* tf_buffer =
        new IETensorBuffer(static_pointer_cast<IETensor>(ng_output));
    Tensor tf_tensor(ctx->expected_output_dtype(i), tf_shape, tf_buffer);
    ctx->set_output(i, tf_tensor);
  }

  long vm, rss;
  utils::MemoryProfile(vm, rss);
  NGRAPH_VLOG(1) << "NGRAPH_TF_MEM_PROFILE:  OP_ID: " << m_cluster_id
                 << " Step_ID: " << step_id << " Cluster: " << name()
                 << " Input Tensors created: "
                 << ng_input_tensor_size_in_bytes / (1024 * 1024) << " MB"
                 << " Total process memory: " << rss / (1024 * 1024) << " GB";

  NGRAPH_VLOG(4) << "NGraphEncapsulateOp::Compute call done for cluster "
                 << m_cluster_id;

  NGRAPH_VLOG(4)
      << "NGraphEncapsulateOp::Compute done marking fresh for cluster "
      << m_cluster_id;
  NGRAPH_VLOG(1) << "NGRAPH_TF_TIMING_PROFILE: OP_ID: " << m_cluster_id
                 << " Step_ID: " << step_id << " Cluster: " << name()
                 << " Time-Compute: " << compute_time.ElapsedInMS()
                 << " Function-Create-or-Lookup: " << time_func_create_or_lookup
                 << " Create-and-copy-tensors: "
                 << time_create_or_lookup_tensors
                 << " Execute: " << time_execute_function;
}  // end compute