std::shared_ptr Conv1dFbGemm::run()

in recipes/streaming_convnets/inference/inference/module/nn/backend/fbgemm/Conv1dFbGemm.cpp [125:185]


std::shared_ptr<ModuleProcessingState> Conv1dFbGemm::run(
    std::shared_ptr<ModuleProcessingState> input) {
  assert(input);
  assert(!input->buffers().empty());
  std::shared_ptr<IOBuffer> inputBuf = input->buffer(0);
  assert(inputBuf);

  std::shared_ptr<ModuleProcessingState> output = input->next();
  assert(output);
  assert(!output->buffers().empty());

  const int nInFrames = inputBuf->size<float>() / inChannels_;
  if (nInFrames < kernelSize_) {
    return output;
  }

  std::shared_ptr<IOBuffer> outputBuf = output->buffer(0);
  assert(outputBuf);

  int nOutFrames = (nInFrames - kernelSize_) / stride_ + 1;
  int outSize = nOutFrames * outChannels_;
  int consumedSize = nOutFrames * stride_ * inChannels_;

  outputBuf->ensure<float>(outSize);
  auto* outPtr = outputBuf->tail<float>();
  for (int i = 0; i < nOutFrames * groups_; ++i) {
    std::copy_n(
        bias_->buffer_.data<float>(),
        outChannels_ / groups_,
        outPtr + i * (outChannels_ / groups_));
  }

  if (!memoryManager_) {
    throw std::invalid_argument("null memoryManager_ at Conv1dFbGemm::run()");
  }
  auto workspace = memoryManager_->makeShared<float>(
      (kernelSize_ * inChannels_ * outChannels_ * nOutFrames) / groups_);
  assert(workspace);

  unfoldDepthwise(
      workspace.get() /* dst */,
      inputBuf->data<float>() /* src */,
      inChannels_ / groups_,
      kernelSize_,
      stride_,
      nOutFrames,
      groups_);

  constexpr float beta = 1.0;
  cblas_gemm_compute(
      fbgemm::matrix_op_t::NoTranspose,
      nOutFrames * groups_,
      workspace.get(),
      *packedWeights_,
      beta,
      outPtr);

  outputBuf->move<float>(outSize);
  inputBuf->consume<float>(consumedSize);
  return output;
}