in ngraph_bridge/kernels/ngraph_encapsulate_op.cc [197:358]
void NGraphEncapsulateOp::Compute(OpKernelContext* ctx) {
NGRAPH_VLOG(1) << "Compute using executor " << name();
std::ostringstream oss;
oss << "Execute: Encapsulate_" << m_cluster_id << ": " << name();
NGRAPH_VLOG(4) << "NGraphEncapsulateOp::Compute starting for cluster "
<< m_cluster_id;
Timer compute_time;
std::lock_guard<std::mutex> lock(m_compute_lock_);
int time_func_create_or_lookup;
Timer function_lookup_or_create;
// TF input tensor
std::vector<Tensor> tf_input_tensors;
std::shared_ptr<Executable> ng_exec;
int step_id;
for (int i = 0; i < ctx->num_inputs(); i++) {
tf_input_tensors.push_back(ctx->input(i));
}
step_id = ctx->step_id();
// Get ngraph executable and inputs information
OP_REQUIRES_OK(ctx, GetExecutable(tf_input_tensors, ng_exec));
NGRAPH_VLOG(1) << " Step_ID: " << step_id;
NGRAPH_VLOG(4)
<< "NGraphEncapsulateOp::Compute got ngraph executable for cluster "
<< m_cluster_id;
time_func_create_or_lookup = function_lookup_or_create.ElapsedInMS();
NGRAPH_VLOG(4) << "NGraphEncapsulateOp::Compute got graph for cluster "
<< m_cluster_id;
Timer create_or_lookup_tensors;
vector<shared_ptr<ngraph::runtime::Tensor>> ng_inputs;
int ng_input_tensor_size_in_bytes = 0;
// Allocate tensors for input arguments.
for (int i = 0; i < tf_input_tensors.size(); i++) {
ngraph::Shape ng_shape(tf_input_tensors[i].shape().dims());
for (int j = 0; j < tf_input_tensors[i].shape().dims(); ++j) {
ng_shape[j] = tf_input_tensors[i].shape().dim_size(j);
}
ngraph::element::Type ng_element_type;
OP_REQUIRES_OK(ctx, tf_utils::TFDataTypeToNGraphElementType(
tf_input_tensors[i].dtype(), &ng_element_type));
auto backend = BackendManager::GetBackend();
std::shared_ptr<ngraph::runtime::Tensor> ng_tensor = make_shared<IETensor>(
ng_element_type, ng_shape, tf_input_tensors[i].data());
ng_inputs.push_back(ng_tensor);
}
NGRAPH_VLOG(4) << "NGraphEncapsulateOp::Compute allocated argument tensors "
"for cluster "
<< m_cluster_id;
// Allocate tensors for the output results.
auto results = ng_exec->GetResults();
std::vector<shared_ptr<ngraph::runtime::Tensor>> ng_outputs(results.size(),
nullptr);
std::vector<int> dyn_shape_tensors;
for (auto i = 0; i < results.size(); i++) {
auto ng_element = results[i];
if (ng_element->get_output_partial_shape(0).is_dynamic()) {
NGRAPH_VLOG(4)
<< "NGraphEncapsulateOp::Compute skipping output allocation for "
"dynamic tensor at index"
<< i;
dyn_shape_tensors.push_back(i);
continue;
}
// Create the TF output tensor
auto ng_shape = ng_element->get_shape();
TensorShape tf_shape;
for (auto dim : ng_shape) {
tf_shape.AddDim(dim);
}
Tensor* output_tensor = nullptr;
OP_REQUIRES_OK(ctx, ctx->allocate_output(i, tf_shape, &output_tensor));
// Make sure the nGraph-inferred element type agrees with what TensorFlow
// expected.
ngraph::element::Type expected_elem_type;
auto ng_element_type = ng_element->get_element_type();
OP_REQUIRES_OK(ctx,
tf_utils::TFDataTypeToNGraphElementType(
ctx->expected_output_dtype(i), &expected_elem_type));
OP_REQUIRES(
ctx, ng_element_type == expected_elem_type,
errors::Internal("Element type inferred by nGraph does not match "
"the element type expected by TensorFlow"));
ng_outputs[i] =
make_shared<IETensor>(ng_element_type, ng_shape, output_tensor->data());
}
NGRAPH_VLOG(4)
<< "NGraphEncapsulateOp::Compute allocated result tensors for cluster "
<< m_cluster_id;
int time_create_or_lookup_tensors = create_or_lookup_tensors.ElapsedInMS();
// Execute the nGraph function.
int time_execute_function;
{
Timer execute_function;
NGRAPH_VLOG(4) << "NGraphEncapsulateOp::Compute call starting for cluster "
<< m_cluster_id;
try {
ng_exec->Call(ng_inputs, ng_outputs);
} catch (const std::exception& exp) {
string status_string = "Caught exception while executing cluster " +
to_string(m_cluster_id) + ": " +
string(exp.what());
OP_REQUIRES(ctx, false, errors::Internal(status_string));
} catch (...) {
string status_string =
"Caught exception while executing cluster " + to_string(m_cluster_id);
OP_REQUIRES(ctx, false, errors::Internal(status_string));
}
time_execute_function = execute_function.ElapsedInMS();
}
for (auto i : dyn_shape_tensors) {
auto ng_output = ng_outputs[i];
// Create the TF output tensor
auto ng_shape = ng_output->get_shape();
TensorShape tf_shape;
for (auto dim : ng_shape) {
tf_shape.AddDim(dim);
}
// Zero-copy IE tensor to TF
IETensorBuffer* tf_buffer =
new IETensorBuffer(static_pointer_cast<IETensor>(ng_output));
Tensor tf_tensor(ctx->expected_output_dtype(i), tf_shape, tf_buffer);
ctx->set_output(i, tf_tensor);
}
long vm, rss;
utils::MemoryProfile(vm, rss);
NGRAPH_VLOG(1) << "NGRAPH_TF_MEM_PROFILE: OP_ID: " << m_cluster_id
<< " Step_ID: " << step_id << " Cluster: " << name()
<< " Input Tensors created: "
<< ng_input_tensor_size_in_bytes / (1024 * 1024) << " MB"
<< " Total process memory: " << rss / (1024 * 1024) << " GB";
NGRAPH_VLOG(4) << "NGraphEncapsulateOp::Compute call done for cluster "
<< m_cluster_id;
NGRAPH_VLOG(4)
<< "NGraphEncapsulateOp::Compute done marking fresh for cluster "
<< m_cluster_id;
NGRAPH_VLOG(1) << "NGRAPH_TF_TIMING_PROFILE: OP_ID: " << m_cluster_id
<< " Step_ID: " << step_id << " Cluster: " << name()
<< " Time-Compute: " << compute_time.ElapsedInMS()
<< " Function-Create-or-Lookup: " << time_func_create_or_lookup
<< " Create-and-copy-tensors: "
<< time_create_or_lookup_tensors
<< " Execute: " << time_execute_function;
} // end compute