void IRGenVisitor::post()

in lib/IR/IRGen.cpp [95:572]


void IRGenVisitor::post(Node *parent, Node *N) {
  visited_.insert(N);

  // Allows backend to generate their custom instrution IR.
  if (B_.generateInst(N, *this)) {
    return;
  }

  switch (N->getKind()) {
  default:
    llvm_unreachable("Unhandled node; perhaps the node should have been "
                     "lowered, or the backend should have specified an IRGen "
                     "case for this node to a backend-specific Instr.");
    break;

    // Include all automatically generated cases:
#include "glow/AutoGenIRGen.h"

  case glow::Kinded::Kind::ReshapeNodeKind: {
    auto *RN = cast<ReshapeNode>(N);

    auto *inVal = valueForNode(RN->getInput());
    std::vector<dim_t> offsets(inVal->getType()->dims().size(), 0);
    auto *TVI = builder_.createTensorViewInst(
        DECORATE_NODE_NAME(N, "tensorview"), inVal, RN->getResult().getType(),
        offsets);
    auto *dest = builder_.createAllocActivationInst(
        DECORATE_NODE_NAME(N, "res"), RN->getResult().getType());
    builder_.createCopyInst(DECORATE_NODE_NAME(N, "copy"), dest, TVI);
    registerIR(N, dest);
    break;
  }
  case glow::Kinded::Kind::ConvolutionGradNodeKind: {
    auto *CG = cast<ConvolutionGradNode>(N);

    auto *input = valueForNode(CG->getInput());
    auto *filter = valueForNode(CG->getFilter());
    auto *bias = valueForNode(CG->getBias());

    auto *outGrad = valueForNode(CG->getGradOfOriginalOutputNamedResult());

    auto *inG = builder_.createAllocActivationInst(
        DECORATE_NODE_NAME(N, "input", "grad"), input->getType());
    auto *biasG = builder_.createAllocActivationInst(
        DECORATE_NODE_NAME(N, "bias", "grad"), bias->getType());
    auto *filterG = builder_.createAllocActivationInst(
        DECORATE_NODE_NAME(N, "filter", "grad"), filter->getType());

    builder_.createConvolutionGradInst(
        N->getName(), input, filter, outGrad, inG, filterG, biasG,
        CG->getKernels(), CG->getStrides(), CG->getPads(), CG->getGroup(),
        CG->getDilation(), CG->getLayout(), CG->getFusedActivation(),
        CG->getFusedActivationArgs());

    registerIR(CG->getGradOfInputNamedInput(), inG);
    registerIR(CG->getGradOfInputNamedFilter(), filterG);
    registerIR(CG->getGradOfInputNamedBias(), biasG);
    break;
  }
  case glow::Kinded::Kind::MaxPoolNodeKind: {
    auto *P = cast<MaxPoolNode>(N);
    auto *in = valueForNode(P->getInput());
    auto argMax = P->getArgmax();
    auto *V = builder_.createMaxPoolWithArgmaxOp(
        N->getName(), in, P->getKernels(), P->getStrides(), P->getPads(),
        P->getLayout(), argMax.getElementType());
    Value *dest = V->getDest();
    Value *argmax = V->getArgmax();
    nodeToInstr_[N] = V;
    registerIR(P->getResult(), dest);
    registerIR(P->getArgmax(), argmax);
    break;
  }
  case glow::Kinded::Kind::MaxPoolGradNodeKind: {
    auto *PG = cast<MaxPoolGradNode>(N);

    auto poolIn = PG->getInput();
    auto poolOut = PG->getOriginalOutputForResult();
    auto *inW = valueForNode(poolIn);
    auto *outW = valueForNode(poolOut);
    auto *outG = valueForNode(PG->getGradOfOriginalOutputNamedResult());

    auto *inG = builder_.createAllocActivationInst(
        DECORATE_NODE_NAME(N, "outG"), PG->getInput().getType());

    // Find the original pool instruction.
    assert(nodeToInstr_.count(poolOut) && "Pool IRgen did not register itself");
    auto *PI = cast<MaxPoolWithArgmaxInst>(nodeToInstr_[poolOut.getNode()]);

    builder_.createMaxPoolWithArgmaxGradInst(
        N->getName(), outW, inW, PI->getArgmax(), outG, inG, PG->getKernels(),
        PG->getStrides(), PG->getPads(), PG->getLayout());
    registerIR(PG->getGradOfInputNamedInput(), inG);
    break;
  }
  case glow::Kinded::Kind::AvgPoolGradNodeKind: {
    auto *PG = cast<AvgPoolGradNode>(N);

    auto poolIn = PG->getInput();
    auto poolOut = PG->getOriginalOutputForResult();
    auto *inW = valueForNode(poolIn);
    auto *outW = valueForNode(poolOut);
    auto *outG = valueForNode(PG->getGradOfOriginalOutputNamedResult());

    auto *inG = builder_.createAllocActivationInst(
        DECORATE_NODE_NAME(N, "outG"), PG->getInput().getType());

    builder_.createAvgPoolGradInst(
        N->getName(), outW, inW, outG, inG, PG->getKernels(), PG->getStrides(),
        PG->getPads(), PG->getLayout(), PG->getCountIncludePads());
    registerIR(PG->getGradOfInputNamedInput(), inG);
    break;
  }
  case glow::Kinded::Kind::AdaptiveAvgPoolGradNodeKind: {
    auto *PG = cast<AdaptiveAvgPoolGradNode>(N);

    auto poolOut = PG->getOriginalOutputForResult();
    auto *outW = valueForNode(poolOut);
    auto *outG = valueForNode(PG->getGradOfOriginalOutputNamedResult());

    auto *inG = builder_.createAllocActivationInst(
        DECORATE_NODE_NAME(N, "outG"), PG->getInput().getType());

    builder_.createAdaptiveAvgPoolGradInst(N->getName(), outW, outG, inG);
    registerIR(PG->getGradOfInputNamedInput(), inG);
    break;
  }
  case glow::Kinded::Kind::SoftMaxGradNodeKind: {
    auto *SMG = cast<SoftMaxGradNode>(N);
    // Original inputs:
    auto *origIn = valueForNode(SMG->getInput());
    auto *origSelect = valueForNode(SMG->getSelected());
    // Values related to the output of the node.
    auto *outGrad = valueForNode(SMG->getGradOfOriginalOutputNamedResult());
    auto originalNodeResult = SMG->getOriginalOutputForResult();
    assert(nodeToInstr_.count(originalNodeResult.getNode()) &&
           "Unknown original node");
    auto *origOut = valueForNode(originalNodeResult);
    auto *srcGrad = builder_.createAllocActivationInst(
        DECORATE_NODE_NAME(N, "res"), outGrad->getType());
    auto *SMGI = builder_.createSoftMaxGradInst(N->getName(), origOut, origIn,
                                                origSelect, srcGrad);
    registerIR(SMG->getGradOfInputNamedInput(), SMGI->getSrcGrad());
    break;
  }
  case glow::Kinded::Kind::CrossEntropyLossNodeKind: {
    auto *CELoss = cast<CrossEntropyLossNode>(N);
    auto *P = valueForNode(CELoss->getP());
    auto *Labels = valueForNode(CELoss->getLabels());
    auto *V = builder_.createCrossEntropyLossOp(N->getName(), P, Labels);
    registerIR(N, V->getCE());
    nodeToInstr_[N] = V;
    break;
  }
  case glow::Kinded::Kind::CrossEntropyLossGradNodeKind: {
    auto *CELossG = cast<CrossEntropyLossGradNode>(N);
    // Forward pass inputs.
    auto *P = valueForNode(CELossG->getP());
    auto *Y = valueForNode(CELossG->getLabels());
    // Backward pass gradient dL/dY.
    auto *dY = valueForNode(CELossG->getGradOfOriginalOutputNamedCE());
    auto *pGrad = builder_.createAllocActivationInst(
        DECORATE_NODE_NAME(N, "p", "grad"), P->getType());
    auto *yGrad = builder_.createAllocActivationInst(
        DECORATE_NODE_NAME(N, "labels", "grad"), Y->getType());
    auto *CELossGI = builder_.createCrossEntropyLossGradInst(
        N->getName(), dY, P, Y, pGrad, yGrad);
    registerIR(CELossG->getGradOfInputNamedP(), CELossGI->getPgrad());
    registerIR(CELossG->getGradOfInputNamedLabels(), CELossGI->getLabelsgrad());
    break;
  }
  case glow::Kinded::Kind::ConcatNodeKind: {
    auto *CC = cast<ConcatNode>(N);

    auto *dest = builder_.createAllocActivationInst(CC->getName(),
                                                    CC->getResult().getType());
    // Mark the buffer as initialized, this is safe since the InsertTensors
    // below will fully overwrite the buffer.
    builder_.createTouchInst(CC->getName(), dest);
    auto inputs = CC->getInputs();

    // We start inserting to the shape at (0,0, ... ).
    std::vector<dim_t> offsets(CC->getResult().dims().size(), 0);
    unsigned dim = CC->getDim();

    for (size_t i = 0, e = inputs.size(); i < e;) {
      // Look for a series of the same Node being concated consecutively many
      // times. We can wrap n such consecutive repeats into a single insert
      // with count n along the dim axis.
      const size_t consecutiveCount = getConsecutiveSameNodeCount(inputs, i);

      // Create the new InsertTensor instruction given the input node, along
      // with the number of times to insert the node and the axis (dim) we are
      // inserting in.
      builder_.createInsertTensorInst(
          DECORATE_NODE_NAME(CC, inputs[i].getNode()->getName()), dest,
          valueForNode(inputs[i]), offsets, consecutiveCount, dim);

      // We are stacking the tensors along a specific dimension. This means
      // that we increase the size of the tensor along this dimension, count
      // times.
      offsets[dim] += inputs[i].dims()[dim] * consecutiveCount;

      // Increment i by the number of the same nodes that were found in a row,
      // which were all wrapped into a single InsertTensorInst.
      i += consecutiveCount;
    }
    registerIR(N, dest);
    break;
  }
  case glow::Kinded::Kind::CollectRpnProposalsNodeKind: {
    auto *CRPN = llvm::cast<CollectRpnProposalsNode>(N);

    std::string allocName = std::string(CRPN->getName()) + ".res";
    auto *dest = builder_.createAllocActivationInst(
        allocName, CRPN->getResult().getType());

    auto *inst = builder_.createCollectRpnProposalsInst(
        CRPN->getName(), dest, CRPN->getRpnMaxLevel(), CRPN->getRpnMinLevel(),
        CRPN->getRpnPostNmsTopN());

    // Adding inputs to instruction
    for (auto &in : CRPN->getRoisIn()) {
      inst->pushOperand({valueForNode(in), OperandKind::In});
    }

    for (auto &in : CRPN->getRoisProbsIn()) {
      inst->pushOperand({valueForNode(in), OperandKind::In});
    }

    registerIR(CRPN->getResult(), dest);
    break;
  }
  case glow::Kinded::Kind::SliceNodeKind: {
    auto *SL = cast<SliceNode>(N);
    auto start = SL->getStart();
    auto *in = valueForNode(SL->getInput());
    auto *dest = builder_.createAllocActivationInst(SL->getName(),
                                                    SL->getResult().getType());
    builder_.createExtractTensorInst(SL->getName(), dest, in, start);
    registerIR(N, dest);
    break;
  }
  case glow::Kinded::Kind::InsertTensorNodeKind: {
    auto *IT = cast<InsertTensorNode>(N);
    auto start = IT->getStart();
    auto count = IT->getCount();
    auto axis = IT->getAxis();
    auto *big = valueForNode(IT->getBig());
    auto *small = valueForNode(IT->getSmall());
    auto *dest = builder_.createAllocActivationInst(IT->getName(),
                                                    IT->getResult().getType());
    if (small->getSizeInBytes() * count < big->getSizeInBytes()) {
      builder_.createCopyInst(DECORATE_NODE_NAME(N, "copy"), dest, big);
    } else {
      // Small tensor completely fills the big tensor, thus no need to
      // initialize the destination.
      builder_.createTouchInst(DECORATE_NODE_NAME(N, "init"), dest);
    }
    builder_.createInsertTensorInst(IT->getName(), dest, small, start, count,
                                    axis);

    registerIR(N, dest);
    break;
  }
  case glow::Kinded::Kind::ScatterDataNodeKind: {
    auto *SDI = cast<ScatterDataNode>(N);
    auto *dataTensor = valueForNode(SDI->getData());
    auto *indicesTensor = valueForNode(SDI->getIndices());
    auto *slicesTensor = valueForNode(SDI->getSlices());
    auto *dest = builder_.createAllocActivationInst(SDI->getName(),
                                                    SDI->getResult().getType());
    builder_.createCopyInst(DECORATE_NODE_NAME(N, "copy"), dest, dataTensor);
    builder_.createScatterDataInst(SDI->getName(), dest, indicesTensor,
                                   slicesTensor, SDI->getCumulative());
    registerIR(N, dest);
    break;
  }
  case glow::Kinded::Kind::LocalResponseNormalizationNodeKind: {
    auto *LR = cast<LocalResponseNormalizationNode>(N);
    auto *in = valueForNode(LR->getInput());
    auto *V = builder_.createLocalResponseNormalizationOp(
        N->getName(), in, LR->getHalfWindowSize(), LR->getAlpha(),
        LR->getBeta(), LR->getK());
    nodeToInstr_[N] = V;
    registerIR(N, V->getDest());
    break;
  }

  case glow::Kinded::Kind::LocalResponseNormalizationGradNodeKind: {
    auto *LRG = cast<LocalResponseNormalizationGradNode>(N);
    auto *origIn = valueForNode(LRG->getInput());

    auto originalNodeResult = LRG->getOriginalOutputForResult();
    assert(nodeToInstr_.count(originalNodeResult.getNode()) &&
           "Unknown original node");
    auto *LRI =
        cast<LocalResponseNormalizationInst>(nodeToInstr_[originalNodeResult]);

    auto *srcGrad = builder_.createAllocActivationInst(
        DECORATE_NODE_NAME(N, "res", "grad"), origIn->getType());

    builder_.createLocalResponseNormalizationGradInst(
        N->getName(), valueForNode(LRG->getOriginalOutputForResult()),
        valueForNode(LRG->getInput()), LRI->getScale(),
        valueForNode(LRG->getGradOfOriginalOutputNamedResult()), srcGrad,
        LRG->getHalfWindowSize(), LRG->getAlpha(), LRG->getBeta(), LRG->getK());

    registerIR(LRG->getGradOfInputNamedInput(), srcGrad);
    break;
  }
  case glow::Kinded::Kind::SaveNodeKind: {
    auto *R = cast<SaveNode>(N);
    auto *src = valueForNode(R->getInput());
    auto *dest = valueForNode(R->getOutput());
    builder_.createCopyInst(N->getName(), dest, src);
    break;
  }
  case glow::Kinded::Kind::ConstantKind: {
    auto *V = cast<Constant>(N);
    auto *W = builder_.createWeightVar(V->getType(), V->getName(),
                                       WeightVar::MutabilityKind::Constant);
    registerIR(N, W);
    break;
  }
  case glow::Kinded::Kind::PlaceholderKind: {
    auto *P = cast<Placeholder>(N);
    auto *W = builder_.createWeightVar(P->getType(), P->getName(),
                                       WeightVar::MutabilityKind::Mutable);
    registerIR(N, W);
    break;
  }
  case glow::Kinded::Kind::QuantizationProfileNodeKind: {
    auto *QPN = cast<QuantizationProfileNode>(N);
    auto *inputTensor = valueForNode(QPN->getInput());
    auto *histogram = valueForNode(QPN->getHistogramPlaceholder());
    auto *computationInfo = valueForNode(QPN->getComputationInfoPlaceholder());
    builder_.createQuantizationProfileInst(QPN->getName(), inputTensor,
                                           histogram, computationInfo);
    break;
  }
  case glow::Kinded::Kind::TopKNodeKind: {
    auto *TKN = cast<TopKNode>(N);
    auto *inputTensor = valueForNode(TKN->getInput());
    auto k = TKN->getK();
    auto *V = builder_.createTopKOp(N->getName(), inputTensor, k,
                                    TKN->getIndices().getElementType());
    registerIR(TKN->getValues(), V->getValues());
    registerIR(TKN->getIndices(), V->getIndices());
    break;
  }
  case glow::Kinded::Kind::TraceEventNodeKind: {
    auto *TEN = cast<TraceEventNode>(N);
    auto *dataTensor = valueForNode(TEN->getData());
    builder_.createTraceEventInst(TEN->getName(), dataTensor, TEN->getIndex());
    break;
  }
  case glow::Kinded::Kind::SparseLengthsSumGradNodeKind: {
    auto *SLSG = cast<SparseLengthsSumGradNode>(N);

    auto *data = valueForNode(SLSG->getData());
    auto *indices = valueForNode(SLSG->getIndices());
    auto *lengths = valueForNode(SLSG->getLengths());

    auto *destGrad = valueForNode(SLSG->getGradOfOriginalOutputNamedResult());
    auto *dataGrad = builder_.createAllocActivationInst(
        DECORATE_NODE_NAME(N, "dataG"),
        SLSG->getGradOfInputNamedData().getType());

    builder_.createSparseLengthsSumGradInst(
        N->getName(), data, indices, lengths, destGrad, dataGrad,
        SLSG->getLengthsMode(), SLSG->getAvgLength());

    registerIR(SLSG->getGradOfInputNamedData(), dataGrad);
    break;
  }
  case glow::Kinded::Kind::SparseLengthsWeightedSumGradNodeKind: {
    auto *SLWSG = cast<SparseLengthsWeightedSumGradNode>(N);

    auto *data = valueForNode(SLWSG->getData());
    auto *weights = valueForNode(SLWSG->getWeights());
    auto *indices = valueForNode(SLWSG->getIndices());
    auto *lengths = valueForNode(SLWSG->getLengths());

    auto *destGrad = valueForNode(SLWSG->getGradOfOriginalOutputNamedResult());
    auto *dataGrad = builder_.createAllocActivationInst(
        DECORATE_NODE_NAME(N, "dataG"),
        SLWSG->getGradOfInputNamedData().getType());
    auto *weightsGrad = builder_.createAllocActivationInst(
        DECORATE_NODE_NAME(N, "weightsG"),
        SLWSG->getGradOfInputNamedWeights().getType());

    builder_.createSparseLengthsWeightedSumGradInst(
        N->getName(), data, weights, indices, lengths, destGrad, dataGrad,
        weightsGrad, SLWSG->getLengthsMode(), SLWSG->getAvgLength());

    registerIR(SLWSG->getGradOfInputNamedData(), dataGrad);
    registerIR(SLWSG->getGradOfInputNamedWeights(), weightsGrad);
    break;
  }
  case glow::Kinded::Kind::BatchedPairwiseDotProductNodeKind: {
    auto *BPDPN = llvm::cast<BatchedPairwiseDotProductNode>(N);
    auto firstInput = BPDPN->getInputs()[0];

    std::string allocName = std::string(BPDPN->getName()) + ".res";
    auto *dest = builder_.createAllocActivationInst(
        allocName, BPDPN->getResult().getType());

    auto *inst = builder_.createBatchedPairwiseDotProductInst(
        BPDPN->getName(), dest, BPDPN->getInputs().size(),
        firstInput.getType()->dims()[1]);

    // First instruction operand is the buffer to write the dot products, the
    // rest are all inputs.
    for (auto &in : BPDPN->getInputs()) {
      inst->pushOperand({valueForNode(in), OperandKind::In});
    }

    registerIR(BPDPN->getResult(), dest);
    break;
  }

  case glow::Kinded::Kind::BatchedPairwiseDotProductGradNodeKind: {
    auto *BPDPGN = llvm::cast<BatchedPairwiseDotProductGradNode>(N);

    auto *in0 = valueForNode(BPDPGN->getOriginalInputs()[0]);
    auto *outputGrad = valueForNode(BPDPGN->getOutputGrad());

    // First, create alloc instructions for all of the gradients. This needs to
    // be done first so that these instructions precede the first use of the
    // buffers they create.
    std::vector<Value *> dests;
    for (unsigned i = 0, e = BPDPGN->getNumResults(); i < e; ++i) {
      NodeValue res = BPDPGN->getNthResult(i);
      std::string allocName =
          std::string(BPDPGN->getName()) + ".res." + std::to_string(i);
      auto *dest = builder_.createAllocActivationInst(allocName, res.getType());
      dests.emplace_back(dest);
    }

    auto *inst = builder_.createBatchedPairwiseDotProductGradInst(
        BPDPGN->getName(), outputGrad, BPDPGN->getOriginalInputs().size(),
        in0->dims()[1]);

    // Operands 1 -> numInputs are gradients.
    for (unsigned i = 0, e = BPDPGN->getNumResults(); i < e; ++i) {
      NodeValue res = BPDPGN->getNthResult(i);
      inst->pushOperand({dests[i], OperandKind::Out});
      registerIR(res, dests[i]);
    }

    // Operands numInputs + 1 -> 2 * numInputs are original inputs.
    for (auto &in : BPDPGN->getOriginalInputs()) {
      inst->pushOperand({valueForNode(in), OperandKind::In});
    }
    break;
  }
  case glow::Kinded::Kind::ExternalFunctionCallNodeKind: {
    auto *EFCN = llvm::cast<ExternalFunctionCallNode>(N);
    std::string externalCallType = std::string(EFCN->getName());
    std::string allocName = std::string(EFCN->getName()) + ".res";
    auto *dest = builder_.createAllocActivationInst(
        allocName, EFCN->getResult().getType());

    auto *inst = builder_.createExternalFunctionCallInst(
        EFCN->getName(), dest, EFCN->getFunctionName(), EFCN->getFunctionImpl(),
        EFCN->getFunctionKind());

    // First instruction operand is the buffer for the result, the
    // rest are all inputs.
    for (auto &in : EFCN->getInputs()) {
      inst->pushOperand({valueForNode(in), OperandKind::In});
    }
    registerIR(EFCN->getResult(), dest);
    break;
  }
  }
}