in lib/LLVMIRCodeGen/LLVMIRGen.cpp [1913:3901]
void LLVMIRGen::generateLLVMIRForInstr(llvm::IRBuilder<> &builder,
const glow::Instruction *I) {
setCurrentDebugLocation(builder, I);
assert((!canBePartOfDataParallelKernel(I)) &&
"data parallel instructions are not handled here");
switch (I->getKind()) {
case Kinded::Kind::MatMulInstKind: {
auto *MM = cast<MatMulInst>(I);
auto *dest = MM->getDest();
auto *lhs = MM->getLHS();
auto *rhs = MM->getRHS();
auto *destPtr = emitValueAddress(builder, dest);
auto *lhsPtr = emitValueAddress(builder, lhs);
auto *rhsPtr = emitValueAddress(builder, rhs);
auto *destDims = emitValueDims(builder, dest);
auto *lhsDims = emitValueDims(builder, lhs);
auto *rhsDims = emitValueDims(builder, rhs);
auto *F = getFunction("matmul", dest->getElementType());
if (lhs->getType()->isQuantizedType()) {
auto *destTy = dest->getType();
auto *lhsTy = lhs->getType();
auto *rhsTy = rhs->getType();
auto *destOffset = emitConstI32(builder, destTy->getOffset());
auto *lhsOffset = emitConstI32(builder, lhsTy->getOffset());
auto *rhsOffset = emitConstI32(builder, rhsTy->getOffset());
auto outScaleParams = quantization::quantizeScaleOffset32To8(
lhsTy->getScale() * rhsTy->getScale() / destTy->getScale(), 0);
auto *outPre = emitConstI32(builder, outScaleParams.pre);
auto *outPost = emitConstI32(builder, outScaleParams.post);
auto *outScale = emitConstI32(builder, outScaleParams.scale);
createCall(builder, F,
{destPtr, lhsPtr, rhsPtr, destDims, lhsDims, rhsDims,
destOffset, lhsOffset, rhsOffset, outPre, outPost, outScale});
} else {
createCall(builder, F,
{destPtr, lhsPtr, rhsPtr, destDims, lhsDims, rhsDims});
}
break;
}
case Kinded::Kind::QuantizationProfileInstKind: {
auto *QP = cast<QuantizationProfileInst>(I);
auto *hist = QP->getHistogram();
auto *compInfo = QP->getComputationInfo();
auto *inputTensor = QP->getInputTensor();
auto *histPtr = emitValueAddress(builder, hist);
auto *compInfoPtr = emitValueAddress(builder, compInfo);
auto *inputTensorInfoPtr = emitValueAddress(builder, inputTensor);
auto *histDims = emitValueDims(builder, hist);
assert(inputTensor->getElementType() == ElemKind::FloatTy &&
"None float Tensor type for Quantization Profile Instruction.");
auto *tensorSize = emitConstDimT(builder, inputTensor->getType()->size());
auto *F = getFunction("quantization_profile");
createCall(
builder, F,
{inputTensorInfoPtr, tensorSize, compInfoPtr, histPtr, histDims});
break;
}
case Kinded::Kind::FullyConnectedInstKind: {
auto *FCI = cast<FullyConnectedInst>(I);
auto *dest = FCI->getDest();
auto *src = FCI->getSrc();
auto *weights = FCI->getWeights();
auto *bias = FCI->getBias();
auto *destPtr = emitValueAddress(builder, dest);
auto *srcPtr = emitValueAddress(builder, src);
auto *weightsPtr = emitValueAddress(builder, weights);
auto *biasPtr = emitValueAddress(builder, bias);
auto *destDims = emitValueDims(builder, dest);
auto *srcDims = emitValueDims(builder, src);
auto *weightsDims = emitValueDims(builder, weights);
auto *biasDims = emitValueDims(builder, bias);
if (src->getType()->isQuantizedType()) {
auto *destTy = dest->getType();
auto *srcTy = src->getType();
auto *weightsTy = weights->getType();
auto *biasTy = bias->getType();
auto *destOffset = emitConstI32(builder, destTy->getOffset());
auto *srcOffset = emitConstI32(builder, srcTy->getOffset());
auto *weightsOffset = emitConstI32(builder, weightsTy->getOffset());
auto *biasOffset = emitConstI32(builder, biasTy->getOffset());
// Calculate the scale of the values that come out of the matrix
// multiplication part of the calculation.
float matMulScale = srcTy->getScale() * weightsTy->getScale();
// Calculate the scaling parameters for the bias and output.
auto biasScaleParam = quantization::quantizeScaleOffset32To8(
biasTy->getScale() / matMulScale, 0);
auto outScaleParam = quantization::quantizeScaleOffset32To8(
matMulScale / destTy->getScale(), 0);
// Pass the pre-shift, post-shift and integer scale parameters for the
// bias and output calculation.
auto *biasPre = emitConstI32(builder, biasScaleParam.pre);
auto *biasPost = emitConstI32(builder, biasScaleParam.post);
auto *biasScale = emitConstI32(builder, biasScaleParam.scale);
auto *outPre = emitConstI32(builder, outScaleParam.pre);
auto *outPost = emitConstI32(builder, outScaleParam.post);
auto *outScale = emitConstI32(builder, outScaleParam.scale);
auto *F =
getFunction("fc", {dest->getElementType(), bias->getElementType()});
createCall(builder, F,
{destPtr, srcPtr, weightsPtr, biasPtr, destDims, srcDims,
weightsDims, biasDims, destOffset, srcOffset, weightsOffset,
biasOffset, biasPre, biasPost, biasScale, outPre, outPost,
outScale});
} else {
auto *F = getFunction("fc", dest->getElementType());
createCall(builder, F,
{destPtr, srcPtr, weightsPtr, biasPtr, destDims, srcDims,
weightsDims, biasDims});
}
break;
}
case Kinded::Kind::RowwiseQuantizedFullyConnectedInstKind: {
auto *RWQFC = cast<RowwiseQuantizedFullyConnectedInst>(I);
auto scalesT = getTensorForConstantValue(RWQFC->getScales());
auto scalesH = scalesT.getHandle();
size_t rowNum = scalesH.dims()[0];
float inputScale = RWQFC->getSrc()->getType()->getScale();
float bScale = RWQFC->getBias()->getType()->getScale();
int32_t bOffset = RWQFC->getBias()->getType()->getOffset();
float outputScale = RWQFC->getDest()->getType()->getScale();
std::vector<llvm::Constant *> biasPreV(rowNum);
std::vector<llvm::Constant *> biasPostV(rowNum);
std::vector<llvm::Constant *> biasScaleV(rowNum);
std::vector<llvm::Constant *> outputPreV(rowNum);
std::vector<llvm::Constant *> outputPostV(rowNum);
std::vector<llvm::Constant *> outputScaleV(rowNum);
for (size_t i = 0; i < rowNum; i++) {
// Calculate the scale of the values that come out of the matrix
// multiplication part of the calculation.
float matMulScale = inputScale * scalesH.raw(i);
// Calculate the scaling parameters for the bias and output.
auto biasScaleParam =
quantization::quantizeScaleOffset32To8(bScale / matMulScale, bOffset);
auto outScaleParam =
quantization::quantizeScaleOffset32To8(matMulScale / outputScale, 0);
// Pass the pre-shift, post-shift and integer scale parameters for the
// bias and output calculation.
biasPreV[i] = llvm::ConstantInt::get(builder.getInt32Ty(),
biasScaleParam.pre, true);
biasPostV[i] = llvm::ConstantInt::get(builder.getInt32Ty(),
biasScaleParam.post, true);
biasScaleV[i] = llvm::ConstantInt::get(builder.getInt32Ty(),
biasScaleParam.scale, true);
outputPreV[i] =
llvm::ConstantInt::get(builder.getInt32Ty(), outScaleParam.pre, true);
outputPostV[i] = llvm::ConstantInt::get(builder.getInt32Ty(),
outScaleParam.post, true);
outputScaleV[i] = llvm::ConstantInt::get(builder.getInt32Ty(),
outScaleParam.scale, true);
}
auto *dest = RWQFC->getDest();
auto *src = RWQFC->getSrc();
auto *weights = RWQFC->getWeights();
auto *bias = RWQFC->getBias();
auto *weightsOffsets = RWQFC->getOffsets();
auto *destPtr = emitValueAddress(builder, dest);
auto *srcPtr = emitValueAddress(builder, src);
auto *weightsPtr = emitValueAddress(builder, weights);
auto *biasPtr = emitValueAddress(builder, bias);
auto *weightsOffsetsPtr = emitValueAddress(builder, weightsOffsets);
auto *biasPrePtr = emitConstArray(builder, biasPreV, builder.getInt32Ty());
auto *biasPostPtr =
emitConstArray(builder, biasPostV, builder.getInt32Ty());
auto *biasScalePtr =
emitConstArray(builder, biasScaleV, builder.getInt32Ty());
auto *outputPrePtr =
emitConstArray(builder, outputPreV, builder.getInt32Ty());
auto *outputPostPtr =
emitConstArray(builder, outputPostV, builder.getInt32Ty());
auto *outputScalePtr =
emitConstArray(builder, outputScaleV, builder.getInt32Ty());
auto *srcDims = emitValueDims(builder, src);
auto *weightsDims = emitValueDims(builder, weights);
auto *destDims = emitValueDims(builder, dest);
auto *biasDims = emitValueDims(builder, bias);
auto *row = emitConstDimT(builder, weightsOffsets->dims()[0]);
auto *destOffset = emitConstI32(builder, dest->getType()->getOffset());
auto *srcOffset = emitConstI32(builder, src->getType()->getOffset());
auto *biasOffset = emitConstI32(builder, bOffset);
llvm::Function *F = nullptr;
if ((dest->getElementType() == ElemKind::Int8QTy) &&
(bias->getElementType() == ElemKind::Int8QTy)) {
F = getFunction("rowwise_quantized_fc_i8_i8");
} else if ((dest->getElementType() == ElemKind::Int8QTy) &&
(bias->getElementType() == ElemKind::Int32QTy)) {
F = getFunction("rowwise_quantized_fc_i8_i32");
} else {
LOG(FATAL) << "Unsupported element/bias type for "
"RowwiseQuantizedFullyConnectedInst";
}
createCall(builder, F,
{destPtr, srcPtr, weightsPtr, biasPtr, weightsOffsetsPtr,
biasPrePtr, biasPostPtr, biasScalePtr, outputPrePtr,
outputPostPtr, outputScalePtr, destDims, srcDims, weightsDims,
biasDims, row, destOffset, srcOffset, biasOffset});
break;
}
case Kinded::Kind::BatchedAddInstKind: {
auto *BA = cast<BatchedAddInst>(I);
auto *dest = BA->getDest();
auto *batch = BA->getBatch();
auto *slice = BA->getSlice();
auto *destPtr = emitValueAddress(builder, dest);
auto *batchPtr = emitValueAddress(builder, batch);
auto *slicePtr = emitValueAddress(builder, slice);
auto bdim = flattenCdr(batch->dims());
auto *numSlice = emitConstDimT(builder, bdim.first);
auto *sliceSize = emitConstDimT(builder, bdim.second);
if (batch->getType()->isQuantizedType()) {
auto *destTy = dest->getType();
auto *batchTy = batch->getType();
auto *sliceTy = slice->getType();
auto *destOffset = emitConstI32(builder, destTy->getOffset());
auto *batchOffset = emitConstI32(builder, batchTy->getOffset());
auto *sliceOffset = emitConstI32(builder, sliceTy->getOffset());
float destScale = destTy->getScale();
// Here, we select parameters for scaling both summands to the
// destination scale.
auto batchScaleParams = quantization::quantizeScaleOffset32To8(
batchTy->getScale() / destScale, batchTy->getOffset());
auto sliceScaleParams = quantization::quantizeScaleOffset32To8(
sliceTy->getScale() / destScale, sliceTy->getOffset());
auto *batchPre = emitConstI32(builder, batchScaleParams.pre);
auto *batchPost = emitConstI32(builder, batchScaleParams.post);
auto *batchScale = emitConstI32(builder, batchScaleParams.scale);
auto *slicePre = emitConstI32(builder, sliceScaleParams.pre);
auto *slicePost = emitConstI32(builder, sliceScaleParams.post);
auto *sliceScale = emitConstI32(builder, sliceScaleParams.scale);
llvm::Function *F = nullptr;
if (sliceTy->getElementType() == ElemKind::Int8QTy) {
F = getFunction("batchedadd", dest->getElementType());
} else if (sliceTy->getElementType() == ElemKind::Int32QTy) {
F = getFunction("batchedadd_i32", dest->getElementType());
} else {
LOG(FATAL) << "Type is not supported: "
<< Type::getElementName(sliceTy->getElementType()).str();
}
createCall(builder, F,
{destPtr, batchPtr, slicePtr, numSlice, sliceSize, destOffset,
batchOffset, sliceOffset, batchPre, batchPost, batchScale,
slicePre, slicePost, sliceScale});
} else {
auto *F = getFunction("batchedadd", dest->getElementType());
createCall(builder, F,
{destPtr, batchPtr, slicePtr, numSlice, sliceSize});
}
break;
}
case Kinded::Kind::BatchedReduceAddInstKind: {
auto *BR = cast<BatchedReduceAddInst>(I);
auto *dest = BR->getDest();
auto *batch = BR->getBatch();
auto *destPtr = emitValueAddress(builder, dest);
auto *batchPtr = emitValueAddress(builder, batch);
auto *axis = emitConstDimT(builder, BR->getAxis());
ShapeVector eBatchDims = expandDimsToMax(batch->dims());
ShapeVector eDestDims = eBatchDims;
eDestDims[BR->getAxis()] = 1;
auto *batchDims =
emitConstDimTArray(builder, llvm::makeArrayRef(eBatchDims));
auto *destDims = emitConstDimTArray(builder, llvm::makeArrayRef(eDestDims));
auto *F = getFunction("batchedreduceadd", dest->getElementType());
if (batch->getType()->isQuantizedType()) {
auto *destTy = dest->getType();
auto *batchTy = batch->getType();
auto *destOffset = emitConstI32(builder, destTy->getOffset());
auto *batchOffset = emitConstI32(builder, batchTy->getOffset());
// BatchedReduceAdd is an accumulation operation, with equations
// s_d * (i_d - o_d) = \sum s_b * (i_b - o_b)
// => i_d - o_d = \sum (s_b / s_d) * (i_b - o_b)
// => i_d = (s_b / s_d ) * [\sum (i_b - o_b)] + o_d
auto batchScaleParams = quantization::quantizeScaleOffset32To8(
batchTy->getScale() / destTy->getScale(), batchTy->getOffset());
auto *batchPre = emitConstI32(builder, batchScaleParams.pre);
auto *batchPost = emitConstI32(builder, batchScaleParams.post);
auto *batchScale = emitConstI32(builder, batchScaleParams.scale);
createCall(builder, F,
{destPtr, batchPtr, destDims, batchDims, destOffset,
batchOffset, batchPre, batchPost, batchScale, axis});
} else {
auto *destSize = emitConstDimT(builder, dest->size());
createCall(builder, F,
{destPtr, batchPtr, destSize, destDims, batchDims, axis});
}
break;
}
case Kinded::Kind::BatchedReduceProdInstKind: {
auto *BR = cast<BatchedReduceProdInst>(I);
auto *dest = BR->getDest();
auto *batch = BR->getBatch();
auto *destPtr = emitValueAddress(builder, dest);
auto *batchPtr = emitValueAddress(builder, batch);
auto *axis = emitConstDimT(builder, BR->getAxis());
ShapeVector eBatchDims = expandDimsToMax(batch->dims());
ShapeVector eDestDims = eBatchDims;
eDestDims[BR->getAxis()] = 1;
auto *batchDims =
emitConstDimTArray(builder, llvm::makeArrayRef(eBatchDims));
auto *destDims = emitConstDimTArray(builder, llvm::makeArrayRef(eDestDims));
auto *F = getFunction("batchedreduceprod", dest->getElementType());
assert(!batch->getType()->isQuantizedType() &&
"Quantized implementation for ReduceProd not supported yet.");
auto *destSize = emitConstDimT(builder, dest->size());
createCall(builder, F,
{destPtr, batchPtr, destSize, destDims, batchDims, axis});
break;
}
#define BATCHED_REDUCE_MINMAX_CASE(INST_NAME_, FUN_NAME_) \
case Kinded::Kind::Batched##INST_NAME_##InstKind: { \
auto *BR = cast<Batched##INST_NAME_##Inst>(I); \
auto *dest = BR->getDest(); \
auto *batch = BR->getBatch(); \
auto axes = BR->getAxes(); \
auto *destPtr = emitValueAddress(builder, dest); \
auto *batchPtr = emitValueAddress(builder, batch); \
\
ShapeVector eBatchDims = expandDimsToMax(batch->dims()); \
ShapeVector eDestDims = eBatchDims; \
for (dim_t i = 0; i < axes.size(); i++) { \
eDestDims[axes[i]] = 1; \
} \
\
auto *batchDims = \
emitConstDimTArray(builder, llvm::makeArrayRef(eBatchDims)); \
auto *destDims = \
emitConstDimTArray(builder, llvm::makeArrayRef(eDestDims)); \
\
if (((batch->getElementType() != ElemKind::FloatTy) && \
(batch->getElementType() != ElemKind::Int32ITy) && \
(batch->getElementType() != ElemKind::Int64ITy)) || \
(batch->getElementType() != dest->getElementType())) { \
std::string errStr = "Cannot get function for "; \
std::string name = "INST_NAME_"; \
errStr += name; \
llvm_unreachable(errStr.c_str()); \
} \
\
llvm::Function *F = getFunction(FUN_NAME_, batch->getElementType()); \
if (!batch->getType()->isQuantizedType()) { \
auto *destSize = emitConstSizeT(builder, dest->size()); \
\
createCall(builder, F, \
{destPtr, batchPtr, destSize, destDims, batchDims}); \
} \
break; \
}
BATCHED_REDUCE_MINMAX_CASE(ReduceMin, "reducemin")
BATCHED_REDUCE_MINMAX_CASE(ReduceMax, "reducemax")
#undef BATCHED_REDUCE_MINMAX_CASE
case Kinded::Kind::ConvolutionInstKind: {
auto *CI = cast<ConvolutionInst>(I);
assert(CI->getLayout() == NHWC &&
"Glow CPU Backend supports only NHWC Convolutions");
auto *dest = CI->getDest();
auto *src = CI->getSrc();
auto *filter = CI->getFilter();
auto *bias = CI->getBias();
auto *destPtr = emitValueAddress(builder, dest);
auto *srcPtr = emitValueAddress(builder, src);
auto *filterPtr = emitValueAddress(builder, filter);
auto *biasPtr = emitValueAddress(builder, bias);
auto *destDims = emitValueDims(builder, dest);
auto *srcDims = emitValueDims(builder, src);
auto *filterDims = emitValueDims(builder, filter);
auto *biasDims = emitValueDims(builder, bias);
auto *kernels = emitConstDimTArray(builder, CI->getKernels());
auto *strides = emitConstDimTArray(builder, CI->getStrides());
auto *pads = emitConstDimTArray(builder, CI->getPads());
auto *group = emitConstDimT(builder, CI->getGroup());
auto *dilation = emitConstDimTArray(builder, CI->getDilation());
auto destDepth = dest->dims()[3];
// Try to 'block' the convolution on the 'depth' dimension. We will process
// this number output slices each iteration.
unsigned unrollDFactor = 1;
// In libjit_convolution_f function, 'unrollDFactor' output
// layers will be processed together. Therefore, the number of
// output layers in each group should be divisible by 'unrollDFactor'
bool groupDividedBy8 = ((destDepth / CI->getGroup()) % 8) == 0;
if (groupDividedBy8) {
unrollDFactor = 8;
}
auto *unrollD = emitConstI32(builder, unrollDFactor);
auto *actType = emitConstI32(builder, CI->getFusedActivation());
if (src->getType()->isQuantizedType()) {
auto *destTy = dest->getType();
auto *srcTy = src->getType();
auto *filterTy = filter->getType();
auto *biasTy = bias->getType();
auto *destOffset = emitConstI32(builder, destTy->getOffset());
auto *srcOffset = emitConstI32(builder, srcTy->getOffset());
auto *filterOffset = emitConstI32(builder, filterTy->getOffset());
auto *biasOffset = emitConstI32(builder, biasTy->getOffset());
// Calculate the scale of the values that come out of the matrix
// multiplication part of the calculation.
float matMulScale = srcTy->getScale() * filterTy->getScale();
// Calculate the scaling parameters for the bias and output.
auto biasScaleParam = quantization::quantizeScaleOffset32To8(
biasTy->getScale() / matMulScale, biasTy->getOffset());
auto outScaleParam = quantization::quantizeScaleOffset32To8(
matMulScale / destTy->getScale(), 0);
// Pass the pre-shift, post-shift and integer scale parameters for the
// bias and output calculation.
auto *biasPre = emitConstI32(builder, biasScaleParam.pre);
auto *biasPost = emitConstI32(builder, biasScaleParam.post);
auto *biasScale = emitConstI32(builder, biasScaleParam.scale);
auto *outPre = emitConstI32(builder, outScaleParam.pre);
auto *outPost = emitConstI32(builder, outScaleParam.post);
auto *outScale = emitConstI32(builder, outScaleParam.scale);
// Emit parameters for fused activation.
auto *actArgsQuant = emitConstQuantActivationArgs(builder, CI);
auto *F = getFunction("conv2d",
{dest->getElementType(), bias->getElementType()});
createCall(builder, F,
{destPtr, srcPtr, filterPtr, biasPtr, destDims,
srcDims, filterDims, biasDims, kernels, strides,
pads, group, destOffset, srcOffset, filterOffset,
biasOffset, biasPre, biasPost, biasScale, outPre,
outPost, outScale, unrollD, dilation, actType,
actArgsQuant});
} else {
// Emit parameters for fused activation.
auto *actArgsFloat = emitConstFloatActivationArgs(builder, CI);
auto *F = getFunction("conv2d", dest->getElementType());
createCall(builder, F,
{destPtr, srcPtr, filterPtr, biasPtr, destDims, srcDims,
filterDims, biasDims, kernels, strides, pads, group, unrollD,
dilation, actType, actArgsFloat});
}
break;
}
case Kinded::Kind::ConvolutionGradInstKind: {
auto *CG = cast<ConvolutionGradInst>(I);
auto *srcGrad = CG->getSrcGrad();
auto *destGrad = CG->getDestGrad();
auto *src = CG->getSrc();
auto *filterGrad = CG->getFilterGrad();
auto *srcGradPtr = emitValueAddress(builder, srcGrad);
auto *destGradPtr = emitValueAddress(builder, destGrad);
auto *srcPtr = emitValueAddress(builder, src);
auto *filterGradPtr = emitValueAddress(builder, filterGrad);
auto *biasGradPtr = emitValueAddress(builder, CG->getBiasGrad());
auto *filterPtr = emitValueAddress(builder, CG->getFilter());
auto *destGradDims = emitValueDims(builder, destGrad);
auto *srcDims = emitValueDims(builder, src);
auto *filterGradDims = emitValueDims(builder, filterGrad);
auto *kernels = emitConstDimTArray(builder, CG->getKernels());
auto *strides = emitConstDimTArray(builder, CG->getStrides());
auto *pads = emitConstDimTArray(builder, CG->getPads());
auto *group = emitConstDimT(builder, CG->getGroup());
auto *dilation = emitConstDimTArray(builder, CG->getDilation());
auto *F = getFunction("convolution_grad", srcGrad->getElementType());
createCall(builder, F,
{srcGradPtr, destGradPtr, srcPtr, filterGradPtr, biasGradPtr,
filterPtr, destGradDims, srcDims, filterGradDims, kernels,
strides, pads, group, dilation});
break;
}
case Kinded::Kind::ConvTransposeInstKind: {
auto *CI = cast<ConvTransposeInst>(I);
auto *dest = CI->getDest();
auto *src = CI->getSrc();
auto *filter = CI->getFilter();
auto *bias = CI->getBias();
auto *destPtr = emitValueAddress(builder, dest);
auto *srcPtr = emitValueAddress(builder, src);
auto *filterPtr = emitValueAddress(builder, filter);
auto *biasPtr = emitValueAddress(builder, bias);
auto *destDims = emitValueDims(builder, dest);
auto *srcDims = emitValueDims(builder, src);
auto *filterDims = emitValueDims(builder, filter);
auto *biasDims = emitValueDims(builder, bias);
auto *kernels = emitConstDimTArray(builder, CI->getKernels());
auto *strides = emitConstDimTArray(builder, CI->getStrides());
auto *pads = emitConstDimTArray(builder, CI->getPads());
auto *group = emitConstDimT(builder, CI->getGroup());
auto *dilation = emitConstDimTArray(builder, CI->getDilation());
const char *kernelName = "conv_transpose";
auto *F = getFunction(kernelName, dest->getElementType());
if (src->getType()->isQuantizedType()) {
auto *destTy = dest->getType();
auto *srcTy = src->getType();
auto *filterTy = filter->getType();
auto *destOffset = emitConstI32(builder, destTy->getOffset());
auto *srcOffset = emitConstI32(builder, srcTy->getOffset());
auto *filterOffset = emitConstI32(builder, filterTy->getOffset());
// Calculate the scale of the values that come out of the matrix
// multiplication part of the calculation.
float matMulScale = srcTy->getScale() * filterTy->getScale();
// Calculate the scaling parameters for the bias and output.
auto outScaleParam = quantization::quantizeScaleOffset32To8(
matMulScale / destTy->getScale(), 0);
// Pass the pre-shift, post-shift and integer scale parameters for the
// output calculation.
auto *outPre = emitConstI32(builder, outScaleParam.pre);
auto *outPost = emitConstI32(builder, outScaleParam.post);
auto *outScale = emitConstI32(builder, outScaleParam.scale);
createCall(builder, F,
{destPtr, srcPtr, filterPtr, biasPtr, destDims, srcDims,
filterDims, biasDims, kernels, strides, pads, group,
destOffset, srcOffset, filterOffset, outPre, outPost,
outScale, dilation});
} else {
createCall(builder, F,
{destPtr, srcPtr, filterPtr, biasPtr, destDims, srcDims,
filterDims, biasDims, kernels, strides, pads, group,
dilation});
}
break;
}
case Kinded::Kind::ChannelwiseQuantizedConvolutionInstKind: {
auto *CQCI = cast<ChannelwiseQuantizedConvolutionInst>(I);
auto *dest = CQCI->getDest();
auto *src = CQCI->getSrc();
auto *filter = CQCI->getFilter();
auto *bias = CQCI->getBias();
auto *filterScales = CQCI->getFilterScales();
auto *filterOffsets = CQCI->getFilterOffsets();
auto *biasScales = CQCI->getBiasScales();
auto *biasOffsets = CQCI->getBiasOffsets();
auto *destTy = dest->getType();
auto *srcTy = src->getType();
auto filterScalesT = getTensorForConstantValue(filterScales);
auto filterScalesH = filterScalesT.getHandle<float>();
auto biasScalesT = getTensorForConstantValue(biasScales);
auto biasScalesH = biasScalesT.getHandle<float>();
// Compute quantization parameters for each channel.
auto channelNum = dest->dims().back();
std::vector<llvm::Constant *> biasPreV(channelNum);
std::vector<llvm::Constant *> biasPostV(channelNum);
std::vector<llvm::Constant *> biasScaleV(channelNum);
std::vector<llvm::Constant *> outputPreV(channelNum);
std::vector<llvm::Constant *> outputPostV(channelNum);
std::vector<llvm::Constant *> outputScaleV(channelNum);
for (size_t i = 0; i < channelNum; i++) {
// Compute the scaling parameters for bias and output.
float matMulScale = srcTy->getScale() * filterScalesH.raw(i);
auto biasScaleParam = quantization::quantizeScaleOffset32To8(
biasScalesH.raw(i) / matMulScale, 0);
auto outScaleParam = quantization::quantizeScaleOffset32To8(
matMulScale / destTy->getScale(), 0);
// Pass the pre-shift, post-shift and integer scale parameters for the
// bias and output calculation.
biasPreV[i] = llvm::ConstantInt::get(builder.getInt32Ty(),
biasScaleParam.pre, true);
biasPostV[i] = llvm::ConstantInt::get(builder.getInt32Ty(),
biasScaleParam.post, true);
biasScaleV[i] = llvm::ConstantInt::get(builder.getInt32Ty(),
biasScaleParam.scale, true);
outputPreV[i] =
llvm::ConstantInt::get(builder.getInt32Ty(), outScaleParam.pre, true);
outputPostV[i] = llvm::ConstantInt::get(builder.getInt32Ty(),
outScaleParam.post, true);
outputScaleV[i] = llvm::ConstantInt::get(builder.getInt32Ty(),
outScaleParam.scale, true);
}
auto *destPtr = emitValueAddress(builder, dest);
auto *srcPtr = emitValueAddress(builder, src);
auto *filterPtr = emitValueAddress(builder, filter);
auto *biasPtr = emitValueAddress(builder, bias);
auto *destDims = emitValueDims(builder, dest);
auto *srcDims = emitValueDims(builder, src);
auto *filterDims = emitValueDims(builder, filter);
auto *biasDims = emitValueDims(builder, bias);
auto *kernels = emitConstDimTArray(builder, CQCI->getKernels());
auto *strides = emitConstDimTArray(builder, CQCI->getStrides());
auto *pads = emitConstDimTArray(builder, CQCI->getPads());
auto *group = emitConstDimT(builder, CQCI->getGroup());
auto *dilation = emitConstDimTArray(builder, CQCI->getDilation());
auto *destOffset = emitConstI32(builder, destTy->getOffset());
auto *srcOffset = emitConstI32(builder, srcTy->getOffset());
auto *filterOffsetsPtr = emitValueAddress(builder, filterOffsets);
auto *biasOffsetsPtr = emitValueAddress(builder, biasOffsets);
auto *biasPrePtr = emitConstArray(builder, biasPreV, builder.getInt32Ty());
auto *biasPostPtr =
emitConstArray(builder, biasPostV, builder.getInt32Ty());
auto *biasScalePtr =
emitConstArray(builder, biasScaleV, builder.getInt32Ty());
auto *outputPrePtr =
emitConstArray(builder, outputPreV, builder.getInt32Ty());
auto *outputPostPtr =
emitConstArray(builder, outputPostV, builder.getInt32Ty());
auto *outputScalePtr =
emitConstArray(builder, outputScaleV, builder.getInt32Ty());
bool isConv3D = (srcTy->dims().size() == 5);
auto *F = getFunction(isConv3D ? "channelwise_quantized_conv3d"
: "channelwise_quantized_conv2d",
{dest->getElementType(), bias->getElementType()});
auto *actType = emitConstI32(builder, CQCI->getFusedActivation());
auto *actArgsQuant = emitConstQuantActivationArgs(builder, CQCI);
createCall(builder, F,
{destPtr, srcPtr, filterPtr, biasPtr,
destDims, srcDims, filterDims, biasDims,
kernels, strides, pads, group,
dilation, destOffset, srcOffset, filterOffsetsPtr,
biasOffsetsPtr, biasPrePtr, biasPostPtr, biasScalePtr,
outputPrePtr, outputPostPtr, outputScalePtr, actType,
actArgsQuant});
break;
}
case Kinded::Kind::CrossEntropyLossInstKind: {
auto *CI = cast<CrossEntropyLossInst>(I);
auto *P = CI->getP();
auto *labels = CI->getLabels();
auto *CE = CI->getCE();
auto *CEPtr = emitValueAddress(builder, CE);
auto *PPtr = emitValueAddress(builder, P);
auto *labelsPtr = emitValueAddress(builder, labels);
auto *dims = emitValueDims(builder, P);
auto *F = getFunction("cross_entropy_loss",
{CE->getElementType(), labels->getElementType()});
createCall(builder, F, {CEPtr, PPtr, labelsPtr, dims});
break;
}
case Kinded::Kind::LengthsToRangesInstKind: {
auto *LTR = cast<LengthsToRangesInst>(I);
auto *dest = LTR->getDest();
auto *lengths = LTR->getLengths();
auto *destPtr = emitValueAddress(builder, dest);
auto *lengthsPtr = emitValueAddress(builder, lengths);
auto *size = emitConstDimT(builder, lengths->dims()[0]);
auto *F = getFunction("lengths_to_ranges", dest->getElementType());
createCall(builder, F, {destPtr, lengthsPtr, size});
break;
}
case Kinded::Kind::LengthsSumInstKind: {
auto *LS = cast<LengthsSumInst>(I);
auto *dest = LS->getDest();
auto *data = LS->getData();
auto *lengths = LS->getLengths();
auto *destPtr = emitValueAddress(builder, dest);
auto *dataPtr = emitValueAddress(builder, data);
auto *lengthsPtr = emitValueAddress(builder, lengths);
auto *lengthsSize = emitConstDimT(builder, lengths->size());
auto *dataType = data->getType();
auto *destSize = emitConstDimT(builder, dest->size());
auto *sliceSize =
emitConstDimT(builder, dataType->size() / dataType->dims()[0]);
auto *F = getFunction("lengths_sum", data->getElementType());
createCall(
builder, F,
{destPtr, dataPtr, lengthsPtr, destSize, lengthsSize, sliceSize});
break;
}
case Kinded::Kind::LocalResponseNormalizationInstKind: {
auto *LRN = cast<LocalResponseNormalizationInst>(I);
auto *dest = LRN->getDest();
auto *src = LRN->getSrc();
auto *destPtr = emitValueAddress(builder, dest);
auto *srcPtr = emitValueAddress(builder, src);
auto *scalePtr = emitValueAddress(builder, LRN->getScale());
auto *destDims = emitValueDims(builder, dest);
auto *srcDims = emitValueDims(builder, src);
auto *halfWindow = emitConstDimT(builder, LRN->getHalfWindowSize());
auto *alpha = emitConstF32(builder, LRN->getAlpha());
auto *beta = emitConstF32(builder, LRN->getBeta());
auto *k = emitConstF32(builder, LRN->getK());
auto *F =
getFunction("local_response_normalization", dest->getElementType());
createCall(builder, F,
{destPtr, srcPtr, scalePtr, destDims, srcDims, halfWindow, alpha,
beta, k});
break;
}
case Kinded::Kind::LocalResponseNormalizationGradInstKind: {
auto *LRNG = llvm::cast<LocalResponseNormalizationGradInst>(I);
auto *srcGrad = LRNG->getSrcGrad();
auto *dest = LRNG->getDest();
auto *srcGradPtr = emitValueAddress(builder, srcGrad);
auto *destGradPtr = emitValueAddress(builder, LRNG->getDestGrad());
auto *srcPtr = emitValueAddress(builder, LRNG->getSrc());
auto *destPtr = emitValueAddress(builder, dest);
auto *scalePtr = emitValueAddress(builder, LRNG->getScale());
auto *destDims = emitValueDims(builder, dest);
auto *halfWindow = emitConstDimT(builder, LRNG->getHalfWindowSize());
auto *alpha = emitConstF32(builder, LRNG->getAlpha());
auto *beta = emitConstF32(builder, LRNG->getBeta());
auto *F = getFunction("local_response_normalization_grad",
srcGrad->getElementType());
createCall(builder, F,
{srcGradPtr, destGradPtr, srcPtr, destPtr, scalePtr, destDims,
halfWindow, alpha, beta});
break;
}
case Kinded::Kind::MaxPoolInstKind: {
auto *PM = cast<MaxPoolInst>(I);
assert(PM->getLayout() == NHWC &&
"Glow CPU Backend supports only NHWC Pools");
auto *dest = PM->getDest();
auto *src = PM->getSrc();
auto *destPtr = emitValueAddress(builder, dest);
auto *srcPtr = emitValueAddress(builder, src);
auto *destDims = emitValueDims(builder, dest);
auto *srcDims = emitValueDims(builder, src);
auto *kernels = emitConstDimTArray(builder, PM->getKernels());
auto *strides = emitConstDimTArray(builder, PM->getStrides());
auto *pads = emitConstDimTArray(builder, PM->getPads());
auto *F = getFunction("max_pool", dest->getElementType());
if (src->getType()->isQuantizedType()) {
auto *destOffset = emitConstI32(builder, dest->getType()->getOffset());
createCall(builder, F,
{srcPtr, destPtr, srcDims, destDims, kernels, strides, pads,
destOffset});
} else {
createCall(builder, F,
{srcPtr, destPtr, srcDims, destDims, kernels, strides, pads});
}
break;
}
case Kinded::Kind::MaxPoolWithArgmaxInstKind: {
auto *PMXY = cast<MaxPoolWithArgmaxInst>(I);
assert(PMXY->getLayout() == NHWC &&
"Glow CPU Backend supports only NHWC Pools");
auto *dest = PMXY->getDest();
auto *src = PMXY->getSrc();
auto *destPtr = emitValueAddress(builder, dest);
auto *srcPtr = emitValueAddress(builder, src);
auto *argMax = PMXY->getArgmax();
auto *argmaxPtr = emitValueAddress(builder, argMax);
auto *destDims = emitValueDims(builder, dest);
auto *srcDims = emitValueDims(builder, src);
auto *kernels = emitConstDimTArray(builder, PMXY->getKernels());
auto *strides = emitConstDimTArray(builder, PMXY->getStrides());
auto *pads = emitConstDimTArray(builder, PMXY->getPads());
auto *F = getFunction("max_pool_argmax",
{dest->getElementType(), argMax->getElementType()});
createCall(builder, F,
{srcPtr, destPtr, argmaxPtr, srcDims, destDims, kernels, strides,
pads});
break;
}
case Kinded::Kind::MaxPoolWithArgmaxGradInstKind: {
auto *PMG = cast<MaxPoolWithArgmaxGradInst>(I);
auto *srcGrad = PMG->getSrcGrad();
auto *srcGradPtr = emitValueAddress(builder, srcGrad);
auto *destGradPtr = emitValueAddress(builder, PMG->getDestGrad());
auto *argMax = PMG->getArgmax();
auto *argmaxPtr = emitValueAddress(builder, argMax);
auto *srcGradDims = emitValueDims(builder, srcGrad);
auto *destDims = emitValueDims(builder, PMG->getDest());
auto *F = getFunction("max_pool_argmax_grad", {srcGrad->getElementType(),
argMax->getElementType()});
createCall(builder, F,
{srcGradPtr, destGradPtr, argmaxPtr, srcGradDims, destDims});
break;
}
case Kinded::Kind::ArgMaxInstKind: {
auto *AM = cast<ArgMaxInst>(I);
auto *dest = AM->getDest();
auto *src = AM->getSrc();
auto *destPtr = emitValueAddress(builder, dest);
auto *srcPtr = emitValueAddress(builder, src);
auto *srcDims = emitValueDims(builder, src);
auto *srcNumDims = emitConstSizeT(builder, src->dims().size());
auto *axis = emitConstSizeT(builder, AM->getAxis());
auto *F =
getFunction("arg_max", {src->getElementType(), dest->getElementType()});
createCall(builder, F, {srcPtr, destPtr, srcDims, srcNumDims, axis});
break;
}
case Kinded::Kind::ArgMinInstKind: {
auto *AM = cast<ArgMinInst>(I);
auto *dest = AM->getDest();
auto *src = AM->getSrc();
auto *destPtr = emitValueAddress(builder, dest);
auto *srcPtr = emitValueAddress(builder, src);
auto *srcDims = emitValueDims(builder, src);
auto *srcNumDims = emitConstSizeT(builder, src->dims().size());
auto *axis = emitConstSizeT(builder, AM->getAxis());
auto *F =
getFunction("arg_min", {src->getElementType(), dest->getElementType()});
createCall(builder, F, {srcPtr, destPtr, srcDims, srcNumDims, axis});
break;
}
case Kinded::Kind::AvgPoolInstKind: {
auto *PA = cast<AvgPoolInst>(I);
assert(PA->getLayout() == NHWC &&
"Glow CPU Backend supports only NHWC Pools");
auto *dest = PA->getDest();
auto *src = PA->getSrc();
auto *destPtr = emitValueAddress(builder, dest);
auto *srcPtr = emitValueAddress(builder, src);
auto *destDims = emitValueDims(builder, dest);
auto *srcDims = emitValueDims(builder, src);
auto *kernels = emitConstDimTArray(builder, PA->getKernels());
auto *strides = emitConstDimTArray(builder, PA->getStrides());
auto *pads = emitConstDimTArray(builder, PA->getPads());
auto *countIncludePads = emitConstI1(builder, PA->getCountIncludePads());
auto *F = getFunction("avg_pool", dest->getElementType());
if (src->getType()->isQuantizedType()) {
auto *destTy = dest->getType();
auto *srcTy = src->getType();
auto *destOffset = emitConstI32(builder, destTy->getOffset());
auto *srcOffset = emitConstI32(builder, srcTy->getOffset());
// When we count the padding pixels in the normalizing factor we include
// the filter area in the scaling parameters since it is a constant.
float scale = srcTy->getScale() / destTy->getScale();
if (PA->getCountIncludePads()) {
scale = scale / (PA->getKernels()[0] * PA->getKernels()[1]);
}
auto outScaleParam = quantization::quantizeScaleOffset32To8(scale, 0);
auto *outPre = emitConstI32(builder, outScaleParam.pre);
auto *outPost = emitConstI32(builder, outScaleParam.post);
auto *outScale = emitConstI32(builder, outScaleParam.scale);
createCall(builder, F,
{srcPtr, destPtr, srcDims, destDims, kernels, strides, pads,
countIncludePads, destOffset, srcOffset, outPre, outPost,
outScale});
} else {
createCall(builder, F,
{srcPtr, destPtr, srcDims, destDims, kernels, strides, pads,
countIncludePads});
}
break;
}
case Kinded::Kind::AdaptiveAvgPoolInstKind: {
auto *PA = cast<AdaptiveAvgPoolInst>(I);
auto *dest = PA->getDest();
auto *src = PA->getSrc();
auto *destPtr = emitValueAddress(builder, dest);
auto *srcPtr = emitValueAddress(builder, src);
auto *destDims = emitValueDims(builder, dest);
auto *srcDims = emitValueDims(builder, src);
auto *F = getFunction("adaptive_avg_pool", dest->getElementType());
createCall(builder, F, {srcPtr, destPtr, srcDims, destDims});
break;
}
case Kinded::Kind::AvgPoolGradInstKind: {
auto *PAG = cast<AvgPoolGradInst>(I);
auto *srcGrad = PAG->getSrcGrad();
auto *srcGradPtr = emitValueAddress(builder, srcGrad);
auto *destGradPtr = emitValueAddress(builder, PAG->getDestGrad());
auto *srcGradDims = emitValueDims(builder, srcGrad);
auto *destDims = emitValueDims(builder, PAG->getDest());
auto *kernels = emitConstDimTArray(builder, PAG->getKernels());
auto *strides = emitConstDimTArray(builder, PAG->getStrides());
auto *pads = emitConstDimTArray(builder, PAG->getPads());
auto *countIncludePads = emitConstI1(builder, PAG->getCountIncludePads());
auto *F = getFunction("avg_pool_grad", srcGrad->getElementType());
createCall(builder, F,
{srcGradPtr, destGradPtr, srcGradDims, destDims, kernels,
strides, pads, countIncludePads});
break;
}
case Kinded::Kind::SoftMaxInstKind: {
auto *SM = cast<SoftMaxInst>(I);
auto *dest = SM->getDest();
auto *src = SM->getSrc();
auto *destPtr = emitValueAddress(builder, dest);
auto *srcPtr = emitValueAddress(builder, src);
auto *destDims = emitValueDims(builder, dest);
auto *srcDims = emitValueDims(builder, src);
auto *F = getFunction("softmax", dest->getElementType());
if (src->getType()->isQuantizedType()) {
std::vector<int32_t> lut;
// Compute lookup table containing all the exponentials based on the
// formula e^(scale * value), where scale is the input scale of
// the quantized input data and value is a value from [-255, 0].
for (int32_t i = 0; i < 256; i++) {
auto exponent =
FixedPointUInt32(exp(src->getType()->getScale() * (i - 255)), 1)
.getFixedVal();
lut.push_back(exponent);
}
auto *lutPtr = emitConstI32Array(builder, lut);
auto *outOffset = emitConstI32(builder, dest->getType()->getOffset());
float size = static_cast<float>(src->getType()->dims()[1]);
auto *sumIntegerPart = emitConstI32(builder, ceil(log2(size)));
if (ceil(log2(size)) == floor(log2(size))) {
sumIntegerPart = emitConstI32(builder, ceil(log2(size)) + 1);
}
FixedPointUInt32 invScaleFixedPoint =
FixedPointUInt32(1.f / dest->getType()->getScale());
auto *invScale = emitConstI32(builder, invScaleFixedPoint.getFixedVal());
auto *invScalePoint =
emitConstI32(builder, invScaleFixedPoint.getIntBits());
createCall(builder, F,
{srcPtr, destPtr, srcDims, lutPtr, outOffset, invScale,
sumIntegerPart, invScalePoint});
} else {
createCall(builder, F, {srcPtr, destPtr, srcDims, destDims});
}
break;
}
case Kinded::Kind::SoftMaxGradInstKind: {
auto *SMG = cast<SoftMaxGradInst>(I);
auto *srcGrad = SMG->getSrcGrad();
auto *selected = SMG->getSelected();
auto *srcGradPtr = emitValueAddress(builder, srcGrad);
auto *destPtr = emitValueAddress(builder, SMG->getOrigDest());
auto *selectedPtr = emitValueAddress(builder, selected);
auto *srcGradDims = emitValueDims(builder, srcGrad);
auto *selectedDims = emitValueDims(builder, selected);
auto *F = getFunction("softmax_grad", {srcGrad->getElementType(),
selected->getElementType()});
createCall(builder, F,
{srcGradPtr, destPtr, selectedPtr, srcGradDims, selectedDims});
break;
}
case Kinded::Kind::TopKInstKind: {
auto *TI = cast<TopKInst>(I);
auto *input = TI->getInput();
auto *valuesPtr = emitValueAddress(builder, TI->getValues());
auto *indicesPtr = emitValueAddress(builder, TI->getIndices());
auto *inputPtr = emitValueAddress(builder, input);
auto *scratchPtr = emitValueAddress(builder, TI->getScratch());
auto *k = emitConstDimT(builder, TI->getK());
auto *n = emitConstDimT(builder, input->dims().back());
auto *size = emitConstDimT(builder, input->size());
auto indicesTy = TI->getIndices()->getElementType();
auto *F = getFunction("topk", {input->getElementType(), indicesTy});
createCall(builder, F,
{valuesPtr, indicesPtr, inputPtr, scratchPtr, k, n, size});
break;
}
case Kinded::Kind::SpaceToDepthInstKind: {
auto *SI = cast<SpaceToDepthInst>(I);
auto *dest = SI->getDest();
auto *src = SI->getSrc();
auto *dstPtr = emitValueAddress(builder, dest);
auto *srcPtr = emitValueAddress(builder, src);
auto *dstDims = emitValueDims(builder, dest);
auto *srcDims = emitValueDims(builder, src);
unsigned blockSize = SI->getBlockSize();
auto *F = getFunction("space_to_depth", src->getElementType());
createCall(
builder, F,
{srcPtr, dstPtr, emitConstDimT(builder, blockSize), srcDims, dstDims});
break;
}
case Kinded::Kind::TransposeInstKind: {
auto *TI = cast<TransposeInst>(I);
auto *dest = TI->getDest();
auto *src = TI->getSrc();
auto *destPtr = emitValueAddress(builder, dest);
auto *srcPtr = emitValueAddress(builder, src);
auto *destDims = emitValueDims(builder, dest);
auto *srcDims = emitValueDims(builder, src);
// Convert the mask to size_t type.
ShapeVector shuffSizeT;
for (auto D : TI->getShuffle()) {
shuffSizeT.push_back((size_t)D);
}
auto *shuffle = emitConstDimTArray(builder, llvm::makeArrayRef(shuffSizeT));
auto *len = emitConstDimT(builder, TI->getShuffle().size());
auto *F = getFunction("transpose", dest->getElementType());
createCall(builder, F, {srcPtr, destPtr, srcDims, destDims, shuffle, len});
break;
}
case Kinded::Kind::FlipInstKind: {
auto *FI = cast<FlipInst>(I);
auto *dest = FI->getDest();
auto *src = FI->getSrc();
auto *destPtr = emitValueAddress(builder, dest);
auto *srcPtr = emitValueAddress(builder, src);
auto *dims = emitValueDims(builder, src);
auto *axis = emitConstDimT(builder, FI->getAxis());
auto *dimsSize = emitConstDimT(builder, src->getType()->dims().size());
auto *F = getFunction("flip", src->getElementType());
createCall(builder, F, {srcPtr, destPtr, dims, axis, dimsSize});
break;
}
// Alloc and Dealloc instructions are handled by the memory allocator.
case Kinded::Kind::AllocActivationInstKind:
case Kinded::Kind::DeallocActivationInstKind:
case Kinded::Kind::TensorViewInstKind:
break;
case Kinded::Kind::InsertTensorInstKind: {
auto *ITI = llvm::cast<InsertTensorInst>(I);
auto *dest = ITI->getDest();
auto *src = ITI->getSrc();
auto offsets = ITI->getOffsets();
auto *destPtr = emitValueAddress(builder, dest);
auto *srcPtr = emitValueAddress(builder, src);
auto *destDims = emitValueDims(builder, dest);
auto *srcDims = emitValueDims(builder, src);
auto *destDimsSize = emitConstDimT(builder, dest->getType()->dims().size());
auto *srcDimsSize = emitConstDimT(builder, src->getType()->dims().size());
auto *offsetsPtr = emitConstDimTArray(builder, offsets);
auto *offsetsArraySize = emitConstDimT(builder, offsets.size());
auto *count = emitConstDimT(builder, ITI->getCount());
auto *axis = emitConstDimT(builder, ITI->getAxis());
// Don't specialize the offsetPtr because we typically generate lots of
// extracts from different offsets and specializing on this argument does
// not speed things up.
markArgAsUnspecialized(offsetsPtr);
auto *F = getFunction("insert_tensor", dest->getElementType());
createCall(builder, F,
{destPtr, srcPtr, offsetsPtr, destDims, srcDims, destDimsSize,
srcDimsSize, offsetsArraySize, count, axis});
break;
}
case Kinded::Kind::ExtractTensorInstKind: {
auto *ITI = llvm::cast<ExtractTensorInst>(I);
auto *dest = ITI->getDest();
auto *src = ITI->getSrc();
auto offsets = ITI->getOffsets();
auto *destPtr = emitValueAddress(builder, dest);
auto *srcPtr = emitValueAddress(builder, src);
auto *destDims = emitValueDims(builder, dest);
auto *srcDims = emitValueDims(builder, src);
auto *destDimsSize = emitConstDimT(builder, dest->getType()->dims().size());
auto *srcDimsSize = emitConstDimT(builder, src->getType()->dims().size());
auto *offsetsPtr = emitConstDimTArray(builder, offsets);
auto *offsetsArraySize = emitConstDimT(builder, offsets.size());
// Don't specialize the offsetPtr because we typically generate lots of
// extracts from different offsets and specializing on this argument does
// not speed things up.
markArgAsUnspecialized(offsetsPtr);
auto *F = getFunction("extract_tensor", dest->getElementType());
createCall(builder, F,
{srcPtr, destPtr, offsetsPtr, srcDims, destDims, srcDimsSize,
destDimsSize, offsetsArraySize});
break;
}
case Kinded::Kind::GatherInstKind: {
auto *GI = llvm::cast<GatherInst>(I);
auto *dest = GI->getDest();
auto *data = GI->getData();
auto *indices = GI->getIndices();
unsigned axis = GI->getBatchDims();
auto *destPtr = emitValueAddress(builder, dest);
auto *dataPtr = emitValueAddress(builder, data);
auto *indicesPtr = emitValueAddress(builder, indices);
auto *indicesSize = emitConstDimT(builder, indices->size());
auto *dataType = data->getType();
// The size of the sample in the batch.
size_t sampleSize = dataType->getSliceSize(axis);
// The size of the slices that we gather.
size_t sliceSize = dataType->getSliceSize(axis + 1);
// The size of each sample in the batch.
size_t numSamples = dataType->size() / sampleSize;
auto *sliceSizeVal = emitConstDimT(builder, sliceSize);
auto *numSamplesVal = emitConstDimT(builder, numSamples);
auto *sampleSizeVal = emitConstDimT(builder, sampleSize);
// Dispatching function depending on the input type of Indices.
llvm::Function *F = nullptr;
if (indices->getElementType() == ElemKind::Int64ITy) {
F = getFunction("gather64", dest->getElementType());
} else if (indices->getElementType() == ElemKind::Int32ITy) {
F = getFunction("gather32", dest->getElementType());
}
if (!F) {
llvm_unreachable("Cannot get function for Gather. "
"Indices input of Gather has to be int32 or int64");
}
createCall(builder, F,
{destPtr, dataPtr, indicesPtr, indicesSize, sliceSizeVal,
numSamplesVal, sampleSizeVal});
break;
}
case Kinded::Kind::GatherNDInstKind: {
auto *GI = llvm::cast<GatherNDInst>(I);
auto *dest = GI->getDest();
auto *data = GI->getData();
auto *indices = GI->getIndices();
unsigned batchDims = GI->getBatchDims();
auto dataDims = data->dims();
auto indicesDims = indices->dims();
dim_t indicesDimLast = indicesDims.back();
// Compute batch count.
dim_t batchCount = 1;
for (size_t idx = 0; idx < batchDims; ++idx) {
batchCount *= dataDims[idx];
}
// Compute input slice count.
dim_t inpSliceCount = 1;
for (size_t idx = batchDims; idx < batchDims + indicesDimLast; ++idx) {
inpSliceCount *= dataDims[idx];
}
// Compute output slice count.
dim_t outSliceCount = 1;
for (size_t idx = batchDims; idx < indicesDims.size() - 1; ++idx) {
outSliceCount *= indicesDims[idx];
}
// Compute slice size (in bytes).
dim_t sliceSize = data->getType()->getElementSize();
for (size_t idx = batchDims + indicesDimLast; idx < dataDims.size();
idx++) {
sliceSize *= dataDims[idx];
}
// Get indices dimension products.
std::vector<dim_t> indicesDimProd(indicesDimLast);
indicesDimProd[indicesDimLast - 1] = 1;
for (ssize_t idx = static_cast<ssize_t>(indicesDimLast) - 2; idx >= 0;
idx--) {
indicesDimProd[idx] =
indicesDimProd[idx + 1] * dataDims[batchDims + idx + 1];
}
// Emit pointers.
auto *destPtr = emitValueAddress(builder, dest);
auto *dataPtr = emitValueAddress(builder, data);
auto *indicesPtr = emitValueAddress(builder, indices);
// Emit parameters.
auto *batchCountArg = emitConstDimT(builder, batchCount);
auto *inpSliceCountArg = emitConstDimT(builder, inpSliceCount);
auto *outSliceCountArg = emitConstDimT(builder, outSliceCount);
auto *sliceSizeArg = emitConstDimT(builder, sliceSize);
auto *indicesDimLastArg = emitConstDimT(builder, indicesDimLast);
auto *indicesDimProdArg =
emitConstDimTArray(builder, llvm::makeArrayRef(indicesDimProd));
llvm::Function *F = getFunction(
"gather_nd", {data->getElementType(), indices->getElementType()});
createCall(builder, F,
{destPtr, dataPtr, indicesPtr, batchCountArg, inpSliceCountArg,
outSliceCountArg, sliceSizeArg, indicesDimLastArg,
indicesDimProdArg});
break;
}
case Kinded::Kind::GatherRangesInstKind: {
auto *GRI = llvm::cast<GatherRangesInst>(I);
auto *output = GRI->getOutput();
auto *lengths = GRI->getLengths();
auto *data = GRI->getData();
auto *ranges = GRI->getRanges();
auto *outputPtr = emitValueAddress(builder, output);
auto *lengthsPtr = emitValueAddress(builder, lengths);
auto *dataPtr = emitValueAddress(builder, data);
auto *rangesPtr = emitValueAddress(builder, ranges);
auto rangesType = ranges->getType();
// The number of examples in ranges.
size_t numExamples = rangesType->dims()[0];
// The number of range pairs in each example.
size_t exampleSize = rangesType->dims()[1];
auto *numExamplesVal = emitConstDimT(builder, numExamples);
auto *exampleSizeVal = emitConstDimT(builder, exampleSize);
// Dispatching function depending on the input type of Ranges.
llvm::Function *F = nullptr;
if (ranges->getElementType() == ElemKind::Int64ITy) {
F = getFunction("gatherranges64", output->getElementType());
} else if (ranges->getElementType() == ElemKind::Int32ITy) {
F = getFunction("gatherranges32", output->getElementType());
}
if (!F) {
llvm_unreachable("Cannot get function for GatherRanges. "
"Ranges input of GatherRanges has to be int32 or int64");
}
createCall(builder, F,
{outputPtr, lengthsPtr, dataPtr, rangesPtr, numExamplesVal,
exampleSizeVal});
break;
}
case Kinded::Kind::LengthsRangeFillInstKind: {
auto *LRFI = llvm::cast<LengthsRangeFillInst>(I);
auto *dest = LRFI->getDest();
auto *lengths = LRFI->getLengths();
auto *destPtr = emitValueAddress(builder, dest);
auto *lengthsPtr = emitValueAddress(builder, lengths);
auto *lengthsSize = emitConstDimT(builder, lengths->size());
// Dispatching function depending on the input type of Ranges.
auto *F = getFunction("lengths_range_fill", dest->getElementType());
createCall(builder, F, {lengthsPtr, destPtr, lengthsSize});
break;
}
case Kinded::Kind::ScatterDataInstKind: {
auto *SDI = llvm::cast<ScatterDataInst>(I);
auto *data = SDI->getData();
auto *indices = SDI->getIndices();
auto *slices = SDI->getSlices();
auto *dataPtr = emitValueAddress(builder, data);
auto *indicesPtr = emitValueAddress(builder, indices);
auto *slicesPtr = emitValueAddress(builder, slices);
auto *dataDims = emitValueDims(builder, data);
auto *indicesCnt = emitConstDimT(builder, indices->getType()->dims()[0]);
auto *indicesSize = emitConstDimT(builder, indices->getType()->dims()[1]);
auto *slicesType = slices->getType();
auto *sliceSize =
emitConstDimT(builder, slicesType->size() / slicesType->dims()[0]);
auto *isCumulative = emitConstI1(builder, SDI->getCumulative());
auto *F = getFunction("scatterdata",
{data->getElementType(), indices->getElementType()});
if (data->getType()->isQuantizedType()) {
auto *dataScale = emitConstF32(builder, data->getType()->getScale());
auto *dataOffset = emitConstI32(builder, data->getType()->getOffset());
auto *sliceScale = emitConstF32(builder, slices->getType()->getScale());
auto *sliceOffset = emitConstI32(builder, slices->getType()->getOffset());
createCall(builder, F,
{dataPtr, dataDims, indicesPtr, slicesPtr, indicesCnt,
indicesSize, sliceSize, isCumulative, dataScale, dataOffset,
sliceScale, sliceOffset});
} else {
createCall(builder, F,
{dataPtr, dataDims, indicesPtr, slicesPtr, indicesCnt,
indicesSize, sliceSize, isCumulative});
}
break;
}
case Kinded::Kind::SparseLengthsSumInstKind: {
auto *SI = cast<SparseLengthsSumInst>(I);
auto *dest = SI->getDest();
auto *data = SI->getData();
auto *indices = SI->getIndices();
auto *lengths = SI->getLengths();
auto *destPtr = emitValueAddress(builder, dest);
auto *dataPtr = emitValueAddress(builder, data);
auto *indicesPtr = emitValueAddress(builder, indices);
auto *lengthsPtr = emitValueAddress(builder, lengths);
auto *segments = emitConstDimT(builder, lengths->dims()[0]);
auto *lineSize = emitConstDimT(builder, data->size() / data->dims()[0]);
auto *F = getFunction("sparse_lengths_sum",
{dest->getElementType(), indices->getElementType()});
createCall(builder, F,
{destPtr, dataPtr, indicesPtr, lengthsPtr, segments, lineSize});
break;
}
case Kinded::Kind::SparseLengthsWeightedSumInstKind: {
auto *SI = cast<SparseLengthsWeightedSumInst>(I);
auto *dest = SI->getDest();
auto *data = SI->getData();
auto *weights = SI->getWeights();
auto *indices = SI->getIndices();
auto *lengths = SI->getLengths();
auto *destPtr = emitValueAddress(builder, dest);
auto *dataPtr = emitValueAddress(builder, data);
auto *weightsPtr = emitValueAddress(builder, weights);
auto *indicesPtr = emitValueAddress(builder, indices);
auto *lengthsPtr = emitValueAddress(builder, lengths);
auto *segments = emitConstDimT(builder, lengths->dims()[0]);
auto *lineSize = emitConstDimT(builder, data->size() / data->dims()[0]);
auto *F = getFunction("sparse_lengths_weighted_sum",
{dest->getElementType(), indices->getElementType()});
createCall(builder, F,
{destPtr, dataPtr, weightsPtr, indicesPtr, lengthsPtr, segments,
lineSize});
break;
}
case Kinded::Kind::EmbeddingInstKind: {
auto *SI = cast<EmbeddingInst>(I);
auto *dest = SI->getDest();
auto *weights = SI->getWeights();
auto *indices = SI->getIndices();
auto *padIdx = emitConstSizeT(builder, SI->getPadIdx());
auto *scale = emitConstI1(builder, SI->getScale());
auto *sparse = emitConstI1(builder, SI->getSparse());
auto *destPtr = emitValueAddress(builder, dest);
auto *weightsPtr = emitValueAddress(builder, weights);
auto *indicesPtr = emitValueAddress(builder, indices);
auto *indDims = emitValueDims(builder, indices);
auto *indSize = emitConstDimT(builder, indices->dims().size());
assert(weights->dims().size() == 2 && "weights must be 2-D");
auto *numEmbedding = emitConstDimT(builder, weights->dims()[0]);
auto *embeddingDim = emitConstDimT(builder, weights->dims()[1]);
auto *F = getFunction("embedding", dest->getElementType());
createCall(builder, F,
{destPtr, weightsPtr, indicesPtr, indDims, indSize, numEmbedding,
embeddingDim, padIdx, scale, sparse});
break;
}
case Kinded::Kind::EmbeddingBagInstKind: {
auto *SI = cast<EmbeddingBagInst>(I);
auto *dest = SI->getDest();
auto *data = SI->getData();
auto *weights = SI->getWeights();
auto *indices = SI->getIndices();
auto *offsets = SI->getOffsets();
auto *hasEndOffset = emitConstI1(builder, SI->getHasEndOffset());
auto *destPtr = emitValueAddress(builder, dest);
auto *dataPtr = emitValueAddress(builder, data);
auto *weightsPtr = emitValueAddress(builder, weights);
auto *indicesPtr = emitValueAddress(builder, indices);
auto *offsetsPtr = emitValueAddress(builder, offsets);
auto *segments = emitConstDimT(builder, offsets->dims()[0]);
auto *totalLength = emitConstDimT(builder, indices->dims()[0]);
auto *lineSize = emitConstDimT(builder, data->size() / data->dims()[0]);
auto *F = getFunction("embedding_bag", dest->getElementType());
createCall(builder, F,
{destPtr, dataPtr, weightsPtr, indicesPtr, offsetsPtr, segments,
lineSize, totalLength, hasEndOffset});
break;
}
case Kinded::Kind::SparseLengthsWeightedSumGradInstKind: {
auto *SI = cast<SparseLengthsWeightedSumGradInst>(I);
auto *destGrad = SI->getDestGrad();
auto *dataGrad = SI->getDataGrad();
auto *weightsGrad = SI->getWeightsGrad();
auto *data = SI->getData();
auto *weights = SI->getWeights();
auto *indices = SI->getIndices();
auto *lengths = SI->getLengths();
auto *destGradPtr = emitValueAddress(builder, destGrad);
auto *dataGradPtr = emitValueAddress(builder, dataGrad);
auto *weightsGradPtr = emitValueAddress(builder, weightsGrad);
auto *dataPtr = emitValueAddress(builder, data);
auto *weightsPtr = emitValueAddress(builder, weights);
auto *indicesPtr = emitValueAddress(builder, indices);
auto *lengthsPtr = emitValueAddress(builder, lengths);
auto *segments = emitConstDimT(builder, lengths->dims()[0]);
auto *dataGradRawSize =
emitConstDimT(builder, dataGrad->size() * sizeof(float));
auto *lineSize =
emitConstDimT(builder, dataGrad->size() / dataGrad->dims()[0]);
auto *F =
getFunction("sparse_lengths_weighted_sum_grad",
{destGrad->getElementType(), indices->getElementType()});
createCall(builder, F,
{destGradPtr, dataGradPtr, weightsGradPtr, dataPtr, weightsPtr,
indicesPtr, lengthsPtr, segments, lineSize, dataGradRawSize});
break;
}
case Kinded::Kind::RowwiseQuantizedSparseLengthsWeightedSumInstKind: {
auto *N = cast<RowwiseQuantizedSparseLengthsWeightedSumInst>(I);
auto *dest = N->getDest();
auto *data = N->getData();
auto *scales = N->getScales();
auto *offsets = N->getOffsets();
auto *weights = N->getWeights();
auto *indices = N->getIndices();
auto *lengths = N->getLengths();
auto *destPtr = emitValueAddress(builder, dest);
auto *dataPtr = emitValueAddress(builder, data);
auto *scalesPtr = emitValueAddress(builder, scales);
auto *offsetsPtr = emitValueAddress(builder, offsets);
auto *weightsPtr = emitValueAddress(builder, weights);
auto *indicesPtr = emitValueAddress(builder, indices);
auto *lengthsPtr = emitValueAddress(builder, lengths);
auto *segments = emitConstDimT(builder, lengths->dims()[0]);
auto *lineSize = emitConstDimT(builder, data->size() / data->dims()[0]);
auto *F = getFunction("rowwise_quantized_sparse_lengths_weighted_sum",
{dest->getElementType(), indices->getElementType()});
createCall(builder, F,
{destPtr, dataPtr, scalesPtr, offsetsPtr, weightsPtr, indicesPtr,
lengthsPtr, segments, lineSize});
break;
}
case Kinded::Kind::FusedRowwiseQuantizedSparseLengthsWeightedSumInstKind: {
auto *N = cast<FusedRowwiseQuantizedSparseLengthsWeightedSumInst>(I);
auto *dest = N->getDest();
auto *data = N->getData();
auto *weights = N->getWeights();
auto *indices = N->getIndices();
auto *lengths = N->getLengths();
auto *destPtr = emitValueAddress(builder, dest);
auto *dataPtr = emitValueAddress(builder, data);
auto *weightsPtr = emitValueAddress(builder, weights);
auto *indicesPtr = emitValueAddress(builder, indices);
auto *lengthsPtr = emitValueAddress(builder, lengths);
auto *segments = emitConstDimT(builder, lengths->dims()[0]);
auto *inLineSize = emitConstDimT(builder, data->size() / data->dims()[0]);
auto *outLineSize = emitConstDimT(builder, dest->size() / dest->dims()[0]);
auto *F = getFunction("fused_rowwise_quantized_sparse_lengths_weighted_sum",
{dest->getElementType(), indices->getElementType()});
createCall(builder, F,
{destPtr, dataPtr, weightsPtr, indicesPtr, lengthsPtr, segments,
inLineSize, outLineSize});
break;
}
case Kinded::Kind::EmbeddingBagByteRowwiseOffsetsInstKind: {
auto *N = cast<EmbeddingBagByteRowwiseOffsetsInst>(I);
auto *dest = N->getDest();
auto *data = N->getData();
auto *weights = N->getWeights();
auto *indices = N->getIndices();
auto *offsets = N->getOffsets();
auto *hasEndOffset = emitConstI1(builder, N->getHasEndOffset());
auto *destPtr = emitValueAddress(builder, dest);
auto *dataPtr = emitValueAddress(builder, data);
auto *weightsPtr = emitValueAddress(builder, weights);
auto *indicesPtr = emitValueAddress(builder, indices);
auto *offsetsPtr = emitValueAddress(builder, offsets);
auto *segments = emitConstDimT(builder, offsets->dims()[0]);
auto *numIndices = emitConstDimT(builder, indices->dims()[0]);
auto *inLineSize = emitConstDimT(builder, data->size() / data->dims()[0]);
auto *outLineSize = emitConstDimT(builder, dest->size() / dest->dims()[0]);
auto *F = getFunction("embedding_bag_byte_rowwise_offsets",
dest->getElementType());
createCall(builder, F,
{destPtr, dataPtr, weightsPtr, indicesPtr, offsetsPtr, segments,
numIndices, inLineSize, outLineSize, hasEndOffset});
break;
}
case Kinded::Kind::DebugPrintInstKind: {
auto *DPI = llvm::cast<DebugPrintInst>(I);
auto *src = DPI->getSrc();
auto *srcPtr = emitValueAddress(builder, src);
srcPtr = builder.CreateBitCast(srcPtr, builder.getInt8PtrTy());
auto *srcDims = emitValueDims(builder, src);
auto *srcDimsSize = emitConstDimT(builder, src->getType()->dims().size());
auto *srcSize = emitConstSizeT(builder, src->getType()->size());
auto *srcSizeBytes =
emitConstSizeT(builder, src->getType()->getSizeInBytes());
auto *srcElemKind =
emitConstDimT(builder, static_cast<size_t>(src->getElementType()));
auto *name = emitStringConst(builder, I->getName());
auto *filename = emitStringConst(builder, DPI->getFileName());
auto srcTypeStr = src->getType()->toString();
std::string format = DPI->getFormat();
if (format == "console") {
// Dump tensor in console.
auto *F = getFunction("dump_tensor_console");
createCall(builder, F, {srcPtr, srcDims, srcDimsSize, srcElemKind, name});
} else if (format == "bin") {
// Dump tensor in file in binary format.
auto *F = getFunction("dump_tensor_bin");
auto *header = emitStringConst(builder, srcTypeStr);
createCall(builder, F, {srcPtr, srcSizeBytes, filename, header});
} else if (format == "txt") {
// Dump tensor in file in text format.
auto *F = getFunction("dump_tensor_txt", src->getElementType());
auto *header = emitStringConst(builder, srcTypeStr);
createCall(builder, F, {srcPtr, srcSize, filename, header});
} else if (format == "rawbin") {
// Dump tensor in file in raw binary format.
auto *F = getFunction("dump_tensor_bin");
auto *header = emitStringConst(builder, "");
createCall(builder, F, {srcPtr, srcSizeBytes, filename, header});
} else if (format == "rawtxt") {
// Dump tensor in file in raw text format.
auto *F = getFunction("dump_tensor_txt", src->getElementType());
auto *header = emitStringConst(builder, "");
createCall(builder, F, {srcPtr, srcSize, filename, header});
} else {
LOG(FATAL) << "Invalid 'Format' attribute for DebugPrint instruction!";
}
break;
}
case Kinded::Kind::InstrumentInstKind: {
auto *instrumentI = llvm::cast<InstrumentInst>(I);
auto *opInfo = instrumentI->getOperandsInfo();
// Instruction being instrumented.
Instruction *instrRef = instrumentI->getInstrRef();
// Emit instruction ID and instruction kind.
llvm::Type *intTy =
llvm::Type::getIntNTy(getLLVMContext(), getLibjitIntWidth());
auto *ID = llvm::ConstantInt::get(intTy, instrumentI->getID());
auto *kind = llvm::ConstantInt::get(intTy, (int)(instrRef->getKind()));
// Emit number of input and output operands.
auto inpNum = instrRef->getNumInputs();
auto outNum = instrRef->getNumOutputs();
auto opNum = inpNum + outNum;
auto *opInp = llvm::ConstantInt::get(intTy, inpNum);
auto *opOut = llvm::ConstantInt::get(intTy, outNum);
// Emit opInfo address as uint8_t*.
assert(opInfo->getType()->getSizeInBytes() >= 2 * sizeof(int64_t) &&
"Not enough memory allocated for instrumentation!");
auto *opInfoPtr = emitValueAddress(builder, opInfo);
opInfoPtr = builder.CreateBitCast(opInfoPtr, builder.getInt8PtrTy());
// Emit opAddr address as uint8_t** starting from offset 0.
auto *opAddrPtr =
builder.CreateGEP(opInfoPtr, llvm::ConstantInt::get(intTy, 0));
opAddrPtr = builder.CreateBitCast(opAddrPtr,
builder.getInt8PtrTy()->getPointerTo());
// Emit opSize address as int* starting from offset opNum * sizeof(int64_t).
auto *opSizePtr = builder.CreateGEP(
opInfoPtr, llvm::ConstantInt::get(intTy, opNum * sizeof(int64_t)));
opSizePtr = builder.CreateBitCast(opSizePtr, intTy->getPointerTo());
// Generate instrumentation.
auto instrumentKind = instrumentI->getInstrumentKind();
if (instrumentKind == InstrumentKind::Before) {
// Operands addresses and sizes.
std::vector<llvm::Value *> opAddrArray;
std::vector<llvm::Value *> opSizeArray;
// Get addresses and sizes for the input operands.
for (const auto &op : instrRef->getOperands()) {
if (op.second == OperandKind::Out) {
continue;
}
// Emit operand address as uint8_t* variable.
auto *opAddr = emitValueAddress(builder, op.first);
opAddr = builder.CreateBitCast(opAddr, builder.getInt8PtrTy());
opAddrArray.push_back(opAddr);
// Emit operand size in bytes as int constant.
auto *opSize = llvm::ConstantInt::get(
intTy, op.first->getType()->getSizeInBytes());
opSizeArray.push_back(opSize);
}
assert(opAddrArray.size() == inpNum && "Inconsistent size!");
// Get addresses and sizes for the output operands.
for (const auto &op : instrRef->getOperands()) {
if (op.second == OperandKind::In) {
continue;
}
// Emit operand address as uint8_t* variable.
auto *opAddr = emitValueAddress(builder, op.first);
opAddr = builder.CreateBitCast(opAddr, builder.getInt8PtrTy());
opAddrArray.push_back(opAddr);
// Emit operand size in bytes as int constant.
auto *opSize = llvm::ConstantInt::get(
intTy, op.first->getType()->getSizeInBytes());
opSizeArray.push_back(opSize);
}
assert(opAddrArray.size() == opNum && "Inconsistent size!");
// Write the addresses of the operands in the opAddr.
emitArrayStore(builder, opAddrArray, opAddrPtr);
// Write the sizes of the operands in opSize.
emitArrayStore(builder, opSizeArray, opSizePtr);
// Create callback call.
auto *F = getFunction("instrument_before");
createCall(builder, F, {ID, kind, opInp, opOut, opAddrPtr, opSizePtr});
} else if (instrumentKind == InstrumentKind::After) {
// Create callback call.
auto *F = getFunction("instrument_after");
createCall(builder, F, {ID, kind, opInp, opOut, opAddrPtr, opSizePtr});
} else {
llvm_unreachable("Instrumentation kind not supported!");
}
// Print the IR instrumentation callback API.
printInstrumentIR_ = true;
break;
}
case Kinded::Kind::TraceEventInstKind: {
auto *TEI = llvm::cast<TraceEventInst>(I);
auto *data = TEI->getData();
auto *offset = emitConstDimT(builder, TEI->getIndex());
auto *dataPtr = emitValueAddress(builder, data);
auto *F = getFunction("write_timestamp");
createCall(builder, F, {dataPtr, offset});
break;
}
case Kinded::Kind::ResizeNearestInstKind: {
auto *RNI = llvm::cast<ResizeNearestInst>(I);
auto *result = RNI->getDest();
auto *input = RNI->getSrc();
auto *resultPtr = emitValueAddress(builder, result);
auto *inputPtr = emitValueAddress(builder, input);
auto *scalePtr = emitConstFloatArray(builder, RNI->getScale());
auto *destDims = emitValueDims(builder, result);
auto *srcDims = emitValueDims(builder, input);
auto *F = getFunction("resizenearest", input->getElementType());
createCall(builder, F, {resultPtr, inputPtr, scalePtr, srcDims, destDims});
break;
}
case Kinded::Kind::ResizeBilinearInstKind: {
auto *RBI = llvm::cast<ResizeBilinearInst>(I);
auto *result = RBI->getDest();
auto *input = RBI->getSrc();
auto *resultPtr = emitValueAddress(builder, result);
auto *inputPtr = emitValueAddress(builder, input);
CHECK_EQ(RBI->getScale()[0], 1.0) << "Scaling batch not supported.";
CHECK_EQ(RBI->getScale()[3], 1.0) << "Scaling channel not supported.";
auto *scalePtr = emitConstFloatArray(builder, RBI->getScale());
auto *destDims = emitValueDims(builder, result);
auto *srcDims = emitValueDims(builder, input);
auto *F = getFunction("resizebilinear", input->getElementType());
createCall(builder, F, {resultPtr, inputPtr, scalePtr, srcDims, destDims});
break;
}
case Kinded::Kind::NonMaxSuppressionInstKind: {
auto *NMSI = llvm::cast<NonMaxSuppressionInst>(I);
auto boxes = NMSI->getBoxes();
auto scores = NMSI->getScores();
auto indices = NMSI->getIndices();
auto numDetected = NMSI->getNumberOfSelectedIndices();
float iouThreshold = NMSI->getIouThreshold();
int64_t maxBoxesPerClass = NMSI->getMaxOutputBoxesPerClass();
float scoreThreshold = NMSI->getScoreThreshold();
int centerPointBox = NMSI->getCenterPointBox();
bool isV4 = NMSI->getIsTFVersion4();
auto *boxesPtr = emitValueAddress(builder, boxes);
auto *scoresPtr = emitValueAddress(builder, scores);
auto *indicesPtr = emitValueAddress(builder, indices);
auto *numDetectedPtr = emitValueAddress(builder, numDetected);
auto *maxBoxesPerClassVal = emitConstI32(builder, maxBoxesPerClass);
auto *centerPointBoxVal = emitConstI32(builder, centerPointBox);
auto *iouThresholdVal = emitConstF32(builder, iouThreshold);
auto *scoreThresholdVal = emitConstF32(builder, scoreThreshold);
auto *boxesDimVal = emitValueDims(builder, boxes);
auto *scoreDimVal = emitValueDims(builder, scores);
auto *indicesDimVal = emitValueDims(builder, indices);
auto *boxesDimSizeVal = emitConstDimT(builder, boxes->dims().size());
auto *scoresDimSizeVal = emitConstDimT(builder, scores->dims().size());
auto *indicesDimSizeVal = emitConstDimT(builder, indices->dims().size());
auto *isV4Val = emitConstI1(builder, isV4);
auto *F = getFunction("nms", indices->getElementType());
createCall(builder, F,
{indicesPtr, numDetectedPtr, boxesPtr, boxesDimVal,
boxesDimSizeVal, scoresPtr, scoreDimVal, scoresDimSizeVal,
indicesDimVal, indicesDimSizeVal, centerPointBoxVal,
maxBoxesPerClassVal, iouThresholdVal, scoreThresholdVal,
isV4Val});
break;
}
case Kinded::Kind::TFLiteDetectionPostProcessInstKind: {
auto *DPPI = llvm::cast<TFLiteDetectionPostProcessInst>(I);
auto boxes = DPPI->getBoxes();
auto scores = DPPI->getScores();
auto anchors = DPPI->getAnchors();
auto detectionBoxes = DPPI->getDetectionBoxes();
auto detectionClasses = DPPI->getDetectionClasses();
auto detectionScores = DPPI->getDetectionScores();
auto numDetections = DPPI->getNumDetections();
auto scratch = DPPI->getScratch();
// Emit pointers.
auto *boxesPtr = emitValueAddress(builder, boxes);
auto *scoresPtr = emitValueAddress(builder, scores);
auto *anchorsPtr = emitValueAddress(builder, anchors);
auto *detectionBoxesPtr = emitValueAddress(builder, detectionBoxes);
auto *detectionClassesPtr = emitValueAddress(builder, detectionClasses);
auto *detectionScoresPtr = emitValueAddress(builder, detectionScores);
auto *numDetectionsPtr = emitValueAddress(builder, numDetections);
auto *scratchPtr = emitValueAddress(builder, scratch);
// Emit parameters.
auto *numBoxes = emitConstI32(builder, boxes->dims()[1]);
auto *numTotalClasses = emitConstI32(builder, scores->dims()[2]);
auto *numClasses = emitConstI32(builder, DPPI->getNumClasses());
auto *maxDetections = emitConstI32(builder, DPPI->getMaxDetections());
auto *maxClassesPerDetection =
emitConstI32(builder, DPPI->getMaxClassesPerDetection());
auto *maxDetectionsPerClass =
emitConstI32(builder, DPPI->getMaxDetectionsPerClass());
auto *iouThreshold = emitConstF32(builder, DPPI->getIouThreshold());
auto *scoreThreshold = emitConstF32(builder, DPPI->getScoreThreshold());
auto *xScaleInv = emitConstF32(builder, 1.0f / DPPI->getXScale());
auto *yScaleInv = emitConstF32(builder, 1.0f / DPPI->getYScale());
auto *hScaleInv = emitConstF32(builder, 1.0f / DPPI->getHScale());
auto *wScaleInv = emitConstF32(builder, 1.0f / DPPI->getWScale());
auto *regularNMS = emitConstI1(builder, DPPI->getRegularNMS());
// Current implementation only supports batch size 1.
assert(boxes->dims()[0] == 1 &&
"TFLiteDetectionPostProcess batch not supported!");
// Call function.
auto *F = getFunction("tflite_detection_post_process_f");
createCall(builder, F,
{boxesPtr,
scoresPtr,
anchorsPtr,
detectionBoxesPtr,
detectionClassesPtr,
detectionScoresPtr,
numDetectionsPtr,
scratchPtr,
numBoxes,
numTotalClasses,
numClasses,
maxDetections,
maxClassesPerDetection,
maxDetectionsPerClass,
iouThreshold,
scoreThreshold,
xScaleInv,
yScaleInv,
hScaleInv,
wScaleInv,
regularNMS});
break;
}
case Kinded::Kind::AudioSpectrogramInstKind: {
auto *ASI = llvm::cast<AudioSpectrogramInst>(I);
auto winOutScratch = ASI->getWinOutScratch();
auto fftOutScratch = ASI->getFftOutScratch();
auto spectrogram = ASI->getSpectrogram();
auto input = ASI->getInput();
auto window = ASI->getWindow();
auto twiddleFactors = ASI->getTwiddleFactors();
auto bitReverseIndices = ASI->getBitReverseIndices();
auto complexToRealWeights = ASI->getComplexToRealWeights();
int64_t windowSize = ASI->getWindowSize();
int64_t windowStride = ASI->getWindowStride();
bool magnitudeSquared = ASI->getMagnitudeSquared();
auto *winOutScratchPtr = emitValueAddress(builder, winOutScratch);
auto *fftOutScratchPtr = emitValueAddress(builder, fftOutScratch);
auto *spectrogramPtr = emitValueAddress(builder, spectrogram);
auto *inputPtr = emitValueAddress(builder, input);
auto *windowPtr = emitValueAddress(builder, window);
auto *twiddleFactorsPtr = emitValueAddress(builder, twiddleFactors);
auto *bitReverseIndicesPtr = emitValueAddress(builder, bitReverseIndices);
auto *complexToRealWeightsPtr =
emitValueAddress(builder, complexToRealWeights);
auto *spectrogramDimVal = emitValueDims(builder, spectrogram);
auto *inputLengthVal = emitConstDimT(builder, input->size());
auto *windowSizeVal = emitConstDimT(builder, windowSize);
auto *windowStrideVal = emitConstDimT(builder, windowStride);
auto *magnitudeSquaredVal = emitConstI1(builder, magnitudeSquared);
auto *F = getFunction("audio_spectrogram", spectrogram->getElementType());
createCall(builder, F,
{winOutScratchPtr, fftOutScratchPtr, spectrogramPtr, inputPtr,
windowPtr, twiddleFactorsPtr, bitReverseIndicesPtr,
complexToRealWeightsPtr, spectrogramDimVal, inputLengthVal,
windowSizeVal, windowStrideVal, magnitudeSquaredVal});
break;
}
case Kinded::Kind::MFCCInstKind: {
auto *MFCCI = llvm::cast<MFCCInst>(I);
auto scratch = MFCCI->getScratch();
auto coefficients = MFCCI->getCoefficients();
auto spectrogram = MFCCI->getSpectrogram();
auto melWeights = MFCCI->getMelWeights();
auto melRanges = MFCCI->getMelRanges();
auto dctMat = MFCCI->getDctMat();
int64_t filterBankCount = MFCCI->getFilterBankCount();
auto *scratchPtr = emitValueAddress(builder, scratch);
auto *coefficientsPtr = emitValueAddress(builder, coefficients);
auto *spectrogramPtr = emitValueAddress(builder, spectrogram);
auto *melWeightsPtr = emitValueAddress(builder, melWeights);
auto *melRangesPtr = emitValueAddress(builder, melRanges);
auto *dctMatPtr = emitValueAddress(builder, dctMat);
auto *coefficientsDimVal = emitValueDims(builder, coefficients);
auto *spectrogramDimVal = emitValueDims(builder, spectrogram);
auto *filterBankCountVal = emitConstDimT(builder, filterBankCount);
auto *F = getFunction("mfcc", coefficients->getElementType());
createCall(builder, F,
{scratchPtr, coefficientsPtr, spectrogramPtr, melWeightsPtr,
melRangesPtr, dctMatPtr, coefficientsDimVal, spectrogramDimVal,
filterBankCountVal});
break;
}
case Kinded::Kind::ConvertToInstKind: {
auto *CTI = llvm::cast<ConvertToInst>(I);
auto *input = CTI->getInput();
auto *output = CTI->getResult();
auto *inputVal = emitValueAddress(builder, input);
auto *outptVal = emitValueAddress(builder, output);
auto *dimsVal = emitValueDims(builder, output);
auto *dimSizeVal = emitConstDimT(builder, output->dims().size());
auto *F = getFunction("convertTo",
{output->getElementType(), input->getElementType()});
createCall(builder, F, {outptVal, inputVal, dimsVal, dimSizeVal});
break;
}
default:
std::string sBuf;
llvm::raw_string_ostream s(sBuf);
I->dump(s);
LOG(FATAL) << "Cannot select the instruction: " << s.str();
}
}