in torchaudio/csrc/rnnt/cpu/cpu_kernels.h [373:421]
void ComputeGradients(
const Options& options,
const DTYPE* logits,
const int* targets,
const int* srcLengths,
const int* tgtLengths,
const CAST_DTYPE* denominators,
const CAST_DTYPE* alphas,
const CAST_DTYPE* betas,
DTYPE* gradients) {
std::vector<TensorView<const DTYPE>> seqLogits;
std::vector<const int*> seqTargets;
std::vector<TensorView<const CAST_DTYPE>> seqDenoms;
std::vector<TensorView<const CAST_DTYPE>> seq_alphas;
std::vector<TensorView<const CAST_DTYPE>> seq_betas;
std::vector<TensorView<DTYPE>> seq_gradients;
const int& B = options.batchSize_;
const int& maxT = options.maxSrcLen_;
const int& maxU = options.maxTgtLen_;
const int& D = options.numTargets_;
for (int b = 0; b < B; ++b) {
seqLogits.push_back(
TensorView<const DTYPE>({maxT, maxU, D}, logits + b * maxT * maxU * D));
seqTargets.push_back(targets + b * (maxU - 1));
seqDenoms.push_back(TensorView<const CAST_DTYPE>(
{maxT, maxU}, denominators + b * maxT * maxU));
seq_alphas.push_back(
TensorView<const CAST_DTYPE>({maxT, maxU}, alphas + b * maxT * maxU));
seq_betas.push_back(
TensorView<const CAST_DTYPE>({maxT, maxU}, betas + b * maxT * maxU));
seq_gradients.push_back(
TensorView<DTYPE>({maxT, maxU, D}, gradients + b * maxT * maxU * D));
}
//#pragma omp parallel for
for (int b = 0; b < B; ++b) { // use max 2 * B threads.
ComputeGradientsOneSequence<DTYPE, CAST_DTYPE>(
/*options=*/options,
/*logits=*/seqLogits[b],
/*targets=*/seqTargets[b],
/*srcLen=*/srcLengths[b],
/*tgtLen=*/tgtLengths[b] + 1, // with prepended blank.
/*denom=*/seqDenoms[b],
/*alpha=*/seq_alphas[b],
/*beta=*/seq_betas[b],
/*gradients=*/seq_gradients[b]);
}
}