void ComputeGradients()

in torchaudio/csrc/rnnt/cpu/cpu_kernels.h [373:421]


void ComputeGradients(
    const Options& options,
    const DTYPE* logits,
    const int* targets,
    const int* srcLengths,
    const int* tgtLengths,
    const CAST_DTYPE* denominators,
    const CAST_DTYPE* alphas,
    const CAST_DTYPE* betas,
    DTYPE* gradients) {
  std::vector<TensorView<const DTYPE>> seqLogits;
  std::vector<const int*> seqTargets;
  std::vector<TensorView<const CAST_DTYPE>> seqDenoms;
  std::vector<TensorView<const CAST_DTYPE>> seq_alphas;
  std::vector<TensorView<const CAST_DTYPE>> seq_betas;
  std::vector<TensorView<DTYPE>> seq_gradients;

  const int& B = options.batchSize_;
  const int& maxT = options.maxSrcLen_;
  const int& maxU = options.maxTgtLen_;
  const int& D = options.numTargets_;
  for (int b = 0; b < B; ++b) {
    seqLogits.push_back(
        TensorView<const DTYPE>({maxT, maxU, D}, logits + b * maxT * maxU * D));
    seqTargets.push_back(targets + b * (maxU - 1));
    seqDenoms.push_back(TensorView<const CAST_DTYPE>(
        {maxT, maxU}, denominators + b * maxT * maxU));
    seq_alphas.push_back(
        TensorView<const CAST_DTYPE>({maxT, maxU}, alphas + b * maxT * maxU));
    seq_betas.push_back(
        TensorView<const CAST_DTYPE>({maxT, maxU}, betas + b * maxT * maxU));
    seq_gradients.push_back(
        TensorView<DTYPE>({maxT, maxU, D}, gradients + b * maxT * maxU * D));
  }

  //#pragma omp parallel for
  for (int b = 0; b < B; ++b) { // use max 2 * B threads.
    ComputeGradientsOneSequence<DTYPE, CAST_DTYPE>(
        /*options=*/options,
        /*logits=*/seqLogits[b],
        /*targets=*/seqTargets[b],
        /*srcLen=*/srcLengths[b],
        /*tgtLen=*/tgtLengths[b] + 1, // with prepended blank.
        /*denom=*/seqDenoms[b],
        /*alpha=*/seq_alphas[b],
        /*beta=*/seq_betas[b],
        /*gradients=*/seq_gradients[b]);
  }
}