in recipes/streaming_convnets/inference/inference/module/nn/backend/fbgemm/Conv1dFbGemm.cpp [125:185]
std::shared_ptr<ModuleProcessingState> Conv1dFbGemm::run(
std::shared_ptr<ModuleProcessingState> input) {
assert(input);
assert(!input->buffers().empty());
std::shared_ptr<IOBuffer> inputBuf = input->buffer(0);
assert(inputBuf);
std::shared_ptr<ModuleProcessingState> output = input->next();
assert(output);
assert(!output->buffers().empty());
const int nInFrames = inputBuf->size<float>() / inChannels_;
if (nInFrames < kernelSize_) {
return output;
}
std::shared_ptr<IOBuffer> outputBuf = output->buffer(0);
assert(outputBuf);
int nOutFrames = (nInFrames - kernelSize_) / stride_ + 1;
int outSize = nOutFrames * outChannels_;
int consumedSize = nOutFrames * stride_ * inChannels_;
outputBuf->ensure<float>(outSize);
auto* outPtr = outputBuf->tail<float>();
for (int i = 0; i < nOutFrames * groups_; ++i) {
std::copy_n(
bias_->buffer_.data<float>(),
outChannels_ / groups_,
outPtr + i * (outChannels_ / groups_));
}
if (!memoryManager_) {
throw std::invalid_argument("null memoryManager_ at Conv1dFbGemm::run()");
}
auto workspace = memoryManager_->makeShared<float>(
(kernelSize_ * inChannels_ * outChannels_ * nOutFrames) / groups_);
assert(workspace);
unfoldDepthwise(
workspace.get() /* dst */,
inputBuf->data<float>() /* src */,
inChannels_ / groups_,
kernelSize_,
stride_,
nOutFrames,
groups_);
constexpr float beta = 1.0;
cblas_gemm_compute(
fbgemm::matrix_op_t::NoTranspose,
nOutFrames * groups_,
workspace.get(),
*packedWeights_,
beta,
outPtr);
outputBuf->move<float>(outSize);
inputBuf->consume<float>(consumedSize);
return output;
}