private opusPacketHasVoiceActivity()

in src/redundantaudioencoder/RedundantAudioEncoder.ts [1385:1448]


  private opusPacketHasVoiceActivity(packet: DataView, lenBytes: number): number {
    if (!packet || lenBytes <= 0) return 0;

    // In CELT-only mode, we can not determine whether there is VAD.
    if (this.opusPacketIsCeltOnly(packet)) return -1;

    const numSilkFrames = this.opusNumSilkFrames(packet);

    // It is not possible for `opusNumSilkFrames()` to return 0, so we ignore the next sanity check for test coverage.
    /* istanbul ignore next */
    if (numSilkFrames === 0) return -1;

    const opusFrameOffsets = new Array<[number]>(this.OPUS_MAX_OPUS_FRAMES);
    const opusFrameSizes = new Array<[number]>(this.OPUS_MAX_OPUS_FRAMES);
    for (let i = 0; i < this.OPUS_MAX_OPUS_FRAMES; ++i) {
      opusFrameOffsets[i] = [undefined];
      opusFrameSizes[i] = [undefined];
    }

    // Parse packet to get the Opus frames.
    const numOpusFrames = this.opusPacketParse(
      packet,
      lenBytes,
      null,
      opusFrameOffsets,
      opusFrameSizes,
      null
    );

    // VAD status cannot be determined for invalid packets.
    if (numOpusFrames < 0) return -1;

    // Iterate over all Opus frames, which may contain multiple SILK frames, to determine the VAD status.
    for (let i = 0; i < numOpusFrames; ++i) {
      if (opusFrameSizes[i][0] < 1) continue;

      // LP layer header bits format (https://www.rfc-editor.org/rfc/rfc6716#section-4.2.3):
      //
      // Mono case:
      // +-----------------+----------+
      // | 1 to 3 VAD bits | LBRR bit |
      // +-----------------+----------+
      //
      // Stereo case:
      // +---------------------+--------------+----------------------+---------------+
      // | 1 to 3 mid VAD bits | mid LBRR bit | 1 to 3 side VAD bits | side LBRR bit |
      // +---------------------+--------------+----------------------+---------------+

      // The upper 1 to 3 bits (dependent on the number of SILK frames) of the LP layer contain VAD bits. If any of
      // these VAD bits are 1, then voice activity is present.
      if (packet.getUint8(opusFrameOffsets[i][0]) >> (8 - numSilkFrames)) return 1;

      // In the stereo case, there is a second set of 1 to 3 VAD bits, so also check these VAD bits.
      const channels = this.opusPacketGetNumChannels(packet);
      if (
        channels === 2 &&
        (packet.getUint8(opusFrameOffsets[i][0]) << (numSilkFrames + 1)) >> (8 - numSilkFrames)
      ) {
        return 1;
      }
    }
    // No voice activity was detected.
    return 0;
  }