std::string gen_node()

in src/backends/cuda/cuda.cpp [190:255]


std::string gen_node(const LoopTree &lt, const Auxiliary &aux,
                     UnrollMap &unroll, LoopTree::TreeRef ref) {
  std::stringstream ss;
  auto depth = lt.tree_node(ref).depth;
  auto node_ref = lt.node(ref);
  auto out_alloc = aux.allocs.at(node_ref);
  const auto &node = lt.ir.node(node_ref);
  if (node.op() == Operation::add) {
    ss << indent(depth);
    ss << gen_compute(lt, aux, unroll, ref, "+");
  } else if (node.op() == Operation::multiply) {
    ss << indent(depth);
    ss << gen_compute(lt, aux, unroll, ref, "*");
  } else if (node.op() == Operation::read) {
    int external_memory = -1;
    for (auto i = 0; i < lt.ir.inputs().size(); ++i) {
      if (lt.ir.inputs()[i] == lt.node(ref)) {
        external_memory = i;
      }
    }
    ASSERT(external_memory > -1 && "No input found!");
    auto out_alloc = aux.allocs.at(node_ref);
    auto inp_alloc = out_alloc;
    inp_alloc.lca = -1;  // TODO clean up read hacks

    ss << indent(depth);
    ss << gen_access(lt, aux, out_alloc, ref, unroll);
    ss << " = ";
    ss << gen_access(lt, aux, inp_alloc, ref, unroll, external_memory);
    ss << ";\n";
  } else if (node.op() == Operation::write) {
    int external_memory = -1;
    for (auto i = 0; i < lt.ir.outputs().size(); ++i) {
      if (lt.ir.outputs()[i] == lt.node(ref)) {
        external_memory = i + lt.ir.inputs().size();
      }
    }
    ASSERT(external_memory > -1 && "No output found!");

    auto out_alloc = aux.allocs.at(node_ref);
    ASSERT(node.inputs().size() == 1);
    auto inp_alloc = aux.allocs.at(node.inputs().at(0));

    ss << indent(depth);
    ss << gen_access(lt, aux, out_alloc, ref, unroll, external_memory);
    ss << " = ";
    ss << gen_access(lt, aux, inp_alloc, ref, unroll);
    ss << ";\n";
  } else if (node.op() == Operation::view) {
    auto out_alloc = aux.allocs.at(node_ref);
    ASSERT(node.inputs().size() == 1)
        << "Cuda backend can only emit simple views";
    auto dep_ref = node.inputs().at(0);
    auto inp_alloc = aux.allocs.at(dep_ref);

    ss << indent(depth);
    ss << gen_access(lt, aux, out_alloc, ref, unroll);
    ss << " = ";
    ss << gen_access(lt, aux, inp_alloc, ref, unroll);
    ss << ";\n";
  } else {
    ASSERT(0) << "node in IR yet supported in CUDA " << lt.ir.dump(node_ref);
  }

  return ss.str();
}