src/redundantaudioencoder/RedundantAudioEncoder.ts (968 lines of code) (raw):
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0
export default class RedundantAudioEncoder {
// Each payload must be less than 1024 bytes to fit the 10 bit block length
private readonly maxRedPacketSizeBytes = 1 << 10;
// Limit payload to 1000 bytes to handle small MTU. 1000 is chosen because in Chromium-based browsers, writing audio
// payloads larger than 1000 bytes using the WebRTC Insertable Streams API (which is used to enable dynamic audio
// redundancy) will cause an error to be thrown and cause audio flow to permanently stop. See
// https://crbug.com/1248479.
private readonly maxAudioPayloadSizeBytes = 1000;
// Each payload can encode a timestamp delta of 14 bits
private readonly maxRedTimestampOffset = 1 << 14;
// 4 byte RED header
private readonly redHeaderSizeBytes = 4;
// reduced size for last RED header
private readonly redLastHeaderSizeBytes = 1;
// P-Time for Opus 20 msec packets
// We do not support other p-times or clock rates
private readonly redPacketizationTime = 960;
// distance between redundant payloads, Opus FEC handles a distance of 1
// TODO(https://issues.amazon.com/issues/ChimeSDKAudio-55):
// Consider making this dynamic
private readonly redPacketDistance = 2;
// maximum number of redundant payloads per RTP packet
private readonly maxRedEncodings = 2;
// Maximum number of encodings that can be recovered with a single RED packet, assuming the primary and redundant
// payloads have FEC.
private readonly redMaxRecoveryDistance = this.redPacketDistance * this.maxRedEncodings + 1;
// maximum history of prior payloads to keep
// generally we will expire old entries based on timestamp
// this limit is in place just to make sure the history does not
// grow too large in the case of erroneous timestamp inputs
private readonly maxEncodingHistorySize = 10;
// Holds the RED payload type obtained from the local SDP
// This is updated when the transceiver controller sends
// a message of type RedPayloadType
private redPayloadType: number;
// Holds the RED payload type obtained from the local SDP
// This is updated when the transceiver controller sends
// a message of type RedPayloadType
private opusPayloadType: number;
// Current number of encodings we want to send
// to the remote end. This will be dynamically
// updated through the setNumEncodingsFromPacketloss API
private numRedundantEncodings: number = 0;
// Holds a history of primary payloads in order
// oldest to newest
private encodingHistory: Array<RedundantAudioEncoder.Encoding>;
// Used to enable or disable redundancy
// in response to very high packet loss events
private redundancyEnabled: boolean = true;
// This is a workaround for unit testing due to issues with mocking `self`
// which is a part of DedicatedWorker scope and is currently used to post
// messages to the main thread
static shouldLog: boolean = false;
// This is a workaround for unit testing due to issues with mocking `self`
// which is a part of DedicatedWorker scope and is currently used to post
// messages to the main thread
static shouldReportStats: boolean = false;
constructor() {
this.encodingHistory = new Array<RedundantAudioEncoder.Encoding>();
this.opusPayloadType = 0;
this.redPayloadType = 0;
this.initializePacketLogs();
}
/**
* Creates an instance of RedundantAudioEncoder and sets up callbacks.
*/
static initializeWorker(): void {
RedundantAudioEncoder.log('Initializing RedundantAudioEncoder');
const encoder = new RedundantAudioEncoder();
// RED encoding is done using WebRTC Encoded Transform
// https://github.com/w3c/webrtc-encoded-transform/blob/main/explainer.md
// Check the DedicatedWorkerGlobalScope for existence of
// RTCRtpScriptTransformer interface. If exists, then
// RTCRtpScriptTransform is supported by this browser.
// @ts-ignore
if (self.RTCRtpScriptTransformer) {
// @ts-ignore
self.onrtctransform = (event: RTCTransformEvent) => {
if (event.transformer.options.type === 'SenderTransform') {
encoder.setupSenderTransform(event.transformer.readable, event.transformer.writable);
} else if (event.transformer.options.type === 'ReceiverTransform') {
encoder.setupReceiverTransform(event.transformer.readable, event.transformer.writable);
} else if (event.transformer.options.type === 'PassthroughTransform') {
encoder.setupPassthroughTransform(event.transformer.readable, event.transformer.writable);
}
};
}
self.onmessage = (event: MessageEvent) => {
if (event.data.msgType === 'StartRedWorker') {
encoder.setupSenderTransform(event.data.send.readable, event.data.send.writable);
encoder.setupReceiverTransform(event.data.receive.readable, event.data.receive.writable);
} else if (event.data.msgType === 'PassthroughTransform') {
encoder.setupPassthroughTransform(event.data.send.readable, event.data.send.writable);
encoder.setupPassthroughTransform(event.data.receive.readable, event.data.receive.writable);
} else if (event.data.msgType === 'RedPayloadType') {
encoder.setRedPayloadType(event.data.payloadType);
} else if (event.data.msgType === 'OpusPayloadType') {
encoder.setOpusPayloadType(event.data.payloadType);
} else if (event.data.msgType === 'UpdateNumRedundantEncodings') {
encoder.setNumRedundantEncodings(event.data.numRedundantEncodings);
} else if (event.data.msgType === 'Enable') {
encoder.setRedundancyEnabled(true);
} else if (event.data.msgType === 'Disable') {
encoder.setRedundancyEnabled(false);
}
};
}
/**
* Post logs to the main thread
*/
static log(msg: string): void {
if (RedundantAudioEncoder.shouldLog) {
// @ts-ignore
self.postMessage({
type: 'REDWorkerLog',
log: `[AudioRed] ${msg}`,
});
}
}
/**
* Returns the number of encodings based on packetLoss value. This is used by `DefaultTransceiverController` to
* determine when to alert the encoder to update the number of encodings. It also determines if we need to
* turn off red in cases of very high packet loss to avoid congestion collapse.
*/
static getNumRedundantEncodingsForPacketLoss(packetLoss: number): [number, boolean] {
let recommendedRedundantEncodings = 0;
let shouldTurnOffRed = false;
if (packetLoss <= 8) {
recommendedRedundantEncodings = 0;
} else if (packetLoss <= 18) {
recommendedRedundantEncodings = 1;
} else if (packetLoss <= 75) {
recommendedRedundantEncodings = 2;
} else {
recommendedRedundantEncodings = 0;
shouldTurnOffRed = true;
}
return [recommendedRedundantEncodings, shouldTurnOffRed];
}
/**
* Sets up a passthrough (no-op) transform for the given streams.
*/
setupPassthroughTransform(readable: ReadableStream, writable: WritableStream): void {
RedundantAudioEncoder.log('Setting up passthrough transform');
readable.pipeTo(writable);
}
/**
* Sets up the transform stream and pipes the outgoing encoded audio frames through the transform function.
*/
setupSenderTransform(readable: ReadableStream, writable: WritableStream): void {
RedundantAudioEncoder.log('Setting up sender RED transform');
const transformStream = new TransformStream({
transform: this.senderTransform.bind(this),
});
readable.pipeThrough(transformStream).pipeTo(writable);
return;
}
/**
* Sets up the transform stream and pipes the received encoded audio frames through the transform function.
*/
setupReceiverTransform(readable: ReadableStream, writable: WritableStream): void {
RedundantAudioEncoder.log('Setting up receiver RED transform');
const transformStream = new TransformStream({
transform: this.receivePacketLogTransform.bind(this),
});
readable.pipeThrough(transformStream).pipeTo(writable);
return;
}
/**
* Set the RED payload type ideally obtained from local offer.
*/
setRedPayloadType(payloadType: number): void {
this.redPayloadType = payloadType;
RedundantAudioEncoder.log(`red payload type set to ${this.redPayloadType}`);
}
/**
* Set the opus payload type ideally obtained from local offer.
*/
setOpusPayloadType(payloadType: number): void {
this.opusPayloadType = payloadType;
RedundantAudioEncoder.log(`opus payload type set to ${this.opusPayloadType}`);
}
/**
* Set the number of redundant encodings
*/
setNumRedundantEncodings(numRedundantEncodings: number): void {
this.numRedundantEncodings = numRedundantEncodings;
if (this.numRedundantEncodings > this.maxRedEncodings) {
this.numRedundantEncodings = this.maxRedEncodings;
}
RedundantAudioEncoder.log(`Updated numRedundantEncodings to ${this.numRedundantEncodings}`);
}
/**
* Enable or disable redundancy in response to
* high packet loss event.
*/
setRedundancyEnabled(enabled: boolean): void {
this.redundancyEnabled = enabled;
RedundantAudioEncoder.log(`redundancy ${this.redundancyEnabled ? 'enabled' : 'disabled'}`);
}
/**
* Helper function to only enqueue audio frames if they do not exceed the audio payload byte limit imposed by
* Chromium-based browsers. Chromium will throw an error (https://crbug.com/1248479) if an audio payload larger than
* 1000 bytes is enqueued. Any controller that attempts to enqueue an audio payload larger than 1000 bytes will
* encounter this error and will permanently stop sending or receiving audio.
*/
private enqueueAudioFrameIfPayloadSizeIsValid(
// @ts-ignore
frame: RTCEncodedAudioFrame,
controller: TransformStreamDefaultController
): void {
if (frame.data.byteLength > this.maxAudioPayloadSizeBytes) return;
controller.enqueue(frame);
}
/**
* Receives encoded frames and modifies as needed before sending to transport.
*/
private senderTransform(
// @ts-ignore
frame: RTCEncodedAudioFrame,
controller: TransformStreamDefaultController
): void {
const frameMetadata = frame.getMetadata();
// @ts-ignore
if (frameMetadata.payloadType !== this.redPayloadType) {
this.enqueueAudioFrameIfPayloadSizeIsValid(frame, controller);
return;
}
const primaryPayloadBuffer = this.getPrimaryPayload(frame.timestamp, frame.data);
if (!primaryPayloadBuffer) {
this.enqueueAudioFrameIfPayloadSizeIsValid(frame, controller);
return;
}
const encodedBuffer = this.encode(frame.timestamp, primaryPayloadBuffer);
/* istanbul ignore next */
if (!encodedBuffer) {
this.enqueueAudioFrameIfPayloadSizeIsValid(frame, controller);
return;
}
frame.data = encodedBuffer;
this.enqueueAudioFrameIfPayloadSizeIsValid(frame, controller);
return;
}
/**
* Get the primary payload from encoding
*/
private getPrimaryPayload(primaryTimestamp: number, frame: ArrayBuffer): ArrayBuffer | null {
const encodings = this.splitEncodings(primaryTimestamp, frame);
if (!encodings || encodings.length < 1) return null;
return encodings[encodings.length - 1].payload;
}
/**
* Split up the encoding received into primary and redundant encodings
* These will be ordered oldest to newest which is the same ordering
* in the RTP red payload.
*/
private splitEncodings(
primaryTimestamp: number,
frame: ArrayBuffer,
getFecInfo: boolean = false,
primarySequenceNumber: number = undefined
): RedundantAudioEncoder.Encoding[] | null {
// process RED headers (according to RFC 2198)
// 0 1 2 3
// 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
// |F| block PT | timestamp offset | block length |
// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
//
// last header
// 0 1 2 3 4 5 6 7
// +-+-+-+-+-+-+-+-+
// |0| Block PT |
// +-+-+-+-+-+-+-+-+
const payload = new DataView(frame);
let payloadSizeBytes = payload.byteLength;
let totalPayloadSizeBytes = 0;
let totalHeaderSizeBytes = 0;
let primaryPayloadSizeBytes = 0;
let payloadOffset = 0;
let gotLastBlock = false;
const encodings = new Array<RedundantAudioEncoder.Encoding>();
const redundantEncodingBlockLengths = new Array();
const redundantEncodingTimestamps = new Array();
while (payloadSizeBytes > 0) {
gotLastBlock = (payload.getUint8(payloadOffset) & 0x80) === 0;
if (gotLastBlock) {
// Bits 1 through 7 are payload type
const payloadType = payload.getUint8(payloadOffset) & 0x7f;
// Unexpected payload type. This is a bad packet.
if (payloadType !== this.opusPayloadType) {
return null;
}
totalPayloadSizeBytes += this.redLastHeaderSizeBytes;
totalHeaderSizeBytes += this.redLastHeaderSizeBytes;
// Accumulated block lengths are equal to or larger than the buffer, which means there is no primary block. This
// is a bad packet.
if (totalPayloadSizeBytes >= payload.byteLength) {
return null;
}
primaryPayloadSizeBytes = payload.byteLength - totalPayloadSizeBytes;
break;
} else {
if (payloadSizeBytes < this.redHeaderSizeBytes) {
return null;
}
// Bits 22 through 31 are payload length
const blockLength =
((payload.getUint8(payloadOffset + 2) & 0x03) << 8) + payload.getUint8(payloadOffset + 3);
redundantEncodingBlockLengths.push(blockLength);
const timestampOffset = payload.getUint16(payloadOffset + 1) >> 2;
const timestamp = primaryTimestamp - timestampOffset;
redundantEncodingTimestamps.push(timestamp);
totalPayloadSizeBytes += blockLength + this.redHeaderSizeBytes;
totalHeaderSizeBytes += this.redHeaderSizeBytes;
payloadOffset += this.redHeaderSizeBytes;
payloadSizeBytes -= this.redHeaderSizeBytes;
}
}
// The last block was never found. The packet we received
// does not have a good RED payload.
if (!gotLastBlock) {
// Note that sequence numbers only exist for
// incoming audio frames.
if (primarySequenceNumber !== undefined) {
// This could be a possible padding packet used
// for BWE with a good sequence number.
// Create a dummy encoding to make sure loss values
// are calculated correctly by consuming sequence number.
// Note that for the receive side, we process packets only
// for loss/recovery calculations and forward the original
// packet without changing it even in the error case.
encodings.push({
payload: frame,
isRedundant: false,
seq: primarySequenceNumber,
});
return encodings;
}
// This is a bad packet.
return null;
}
let redundantPayloadOffset = totalHeaderSizeBytes;
for (let i = 0; i < redundantEncodingTimestamps.length; i++) {
const redundantPayloadBuffer = new ArrayBuffer(redundantEncodingBlockLengths[i]);
const redundantPayloadArray = new Uint8Array(redundantPayloadBuffer);
redundantPayloadArray.set(
new Uint8Array(payload.buffer, redundantPayloadOffset, redundantEncodingBlockLengths[i]),
0
);
const encoding: RedundantAudioEncoder.Encoding = {
timestamp: redundantEncodingTimestamps[i],
payload: redundantPayloadBuffer,
isRedundant: true,
};
if (getFecInfo) {
encoding.hasFec = this.opusPacketHasFec(
new DataView(redundantPayloadBuffer),
redundantPayloadBuffer.byteLength
);
}
encodings.push(encoding);
redundantPayloadOffset += redundantEncodingBlockLengths[i];
}
const primaryPayloadOffset = payload.byteLength - primaryPayloadSizeBytes;
const primaryPayloadBuffer = new ArrayBuffer(primaryPayloadSizeBytes);
const primaryArray = new Uint8Array(primaryPayloadBuffer);
primaryArray.set(
new Uint8Array(payload.buffer, primaryPayloadOffset, primaryPayloadSizeBytes),
0
);
const encoding: RedundantAudioEncoder.Encoding = {
timestamp: primaryTimestamp,
payload: primaryPayloadBuffer,
isRedundant: false,
seq: primarySequenceNumber,
};
if (getFecInfo) {
encoding.hasFec = this.opusPacketHasFec(
new DataView(primaryPayloadBuffer),
primaryPayloadBuffer.byteLength
);
}
encodings.push(encoding);
return encodings;
}
/**
* Create a new encoding with current primary payload and the older payloads of choice.
*/
private encode(primaryTimestamp: number, primaryPayload: ArrayBuffer): ArrayBuffer | null {
const primaryPayloadSize = primaryPayload.byteLength;
// Payload size needs to be valid.
if (
primaryPayloadSize === 0 ||
primaryPayloadSize >= this.maxRedPacketSizeBytes ||
primaryPayloadSize >= this.maxAudioPayloadSizeBytes
) {
return null;
}
const numRedundantEncodings = this.numRedundantEncodings;
let headerSizeBytes = this.redLastHeaderSizeBytes;
let payloadSizeBytes = primaryPayloadSize;
let bytesAvailable = this.maxAudioPayloadSizeBytes - primaryPayloadSize - headerSizeBytes;
const redundantEncodingTimestamps: Array<number> = new Array();
const redundantEncodingPayloads: Array<ArrayBuffer> = new Array();
// If redundancy is disabled then only send the primary payload
if (this.redundancyEnabled) {
// Determine how much redundancy we can fit into our packet
let redundantTimestamp = this.uint32WrapAround(
primaryTimestamp - this.redPacketizationTime * this.redPacketDistance
);
for (let i = 0; i < numRedundantEncodings; i++) {
// Do not add redundant encodings that are beyond the maximum timestamp offset.
if (
this.uint32WrapAround(primaryTimestamp - redundantTimestamp) >= this.maxRedTimestampOffset
) {
break;
}
let findTimestamp = redundantTimestamp;
let encoding = this.encodingHistory.find(e => e.timestamp === findTimestamp);
if (!encoding) {
// If not found or not important then look for the previous packet.
// The current packet may have included FEC for the previous, so just
// use the previous packet instead provided that it has voice activity.
findTimestamp = this.uint32WrapAround(redundantTimestamp - this.redPacketizationTime);
encoding = this.encodingHistory.find(e => e.timestamp === findTimestamp);
}
if (encoding) {
const redundantEncodingSizeBytes = encoding.payload.byteLength;
// Only add redundancy if there are enough bytes available.
if (bytesAvailable < this.redHeaderSizeBytes + redundantEncodingSizeBytes) break;
bytesAvailable -= this.redHeaderSizeBytes + redundantEncodingSizeBytes;
headerSizeBytes += this.redHeaderSizeBytes;
payloadSizeBytes += redundantEncodingSizeBytes;
redundantEncodingTimestamps.unshift(encoding.timestamp);
redundantEncodingPayloads.unshift(encoding.payload);
}
redundantTimestamp -= this.redPacketizationTime * this.redPacketDistance;
redundantTimestamp = this.uint32WrapAround(redundantTimestamp);
}
}
const redPayloadBuffer = new ArrayBuffer(headerSizeBytes + payloadSizeBytes);
const redPayloadView = new DataView(redPayloadBuffer);
// Add redundant encoding header(s) to new buffer
let redPayloadOffset = 0;
for (let i = 0; i < redundantEncodingTimestamps.length; i++) {
const timestampDelta = primaryTimestamp - redundantEncodingTimestamps[i];
redPayloadView.setUint8(redPayloadOffset, this.opusPayloadType | 0x80);
redPayloadView.setUint16(
redPayloadOffset + 1,
(timestampDelta << 2) | (redundantEncodingPayloads[i].byteLength >> 8)
);
redPayloadView.setUint8(redPayloadOffset + 3, redundantEncodingPayloads[i].byteLength & 0xff);
redPayloadOffset += this.redHeaderSizeBytes;
}
// Add primary encoding header to new buffer
redPayloadView.setUint8(redPayloadOffset, this.opusPayloadType);
redPayloadOffset += this.redLastHeaderSizeBytes;
// Add redundant payload(s) to new buffer
const redPayloadArray = new Uint8Array(redPayloadBuffer);
for (let i = 0; i < redundantEncodingPayloads.length; i++) {
redPayloadArray.set(new Uint8Array(redundantEncodingPayloads[i]), redPayloadOffset);
redPayloadOffset += redundantEncodingPayloads[i].byteLength;
}
// Add primary payload to new buffer
redPayloadArray.set(new Uint8Array(primaryPayload), redPayloadOffset);
redPayloadOffset += primaryPayload.byteLength;
/* istanbul ignore next */
// Sanity check that we got the expected total payload size.
if (redPayloadOffset !== headerSizeBytes + payloadSizeBytes) return null;
this.updateEncodingHistory(primaryTimestamp, primaryPayload);
return redPayloadBuffer;
}
/**
* Update the encoding history with the latest primary encoding
*/
private updateEncodingHistory(primaryTimestamp: number, primaryPayload: ArrayBuffer): void {
// Remove encodings from the history if they are too old.
for (const encoding of this.encodingHistory) {
const maxTimestampDelta = this.redPacketizationTime * this.redMaxRecoveryDistance;
if (primaryTimestamp - encoding.timestamp >= maxTimestampDelta) {
this.encodingHistory.shift();
} else {
break;
}
}
// Only add an encoding to the history if the encoding is deemed to be important. An encoding is important if it is
// a CELT-only packet or contains voice activity.
const packet = new DataView(primaryPayload);
if (
this.opusPacketIsCeltOnly(packet) ||
this.opusPacketHasVoiceActivity(packet, packet.byteLength) > 0
) {
// Check if adding an encoding will cause the length of the encoding history to exceed the maximum history size.
// This is not expected to happen but could occur if we get incorrect timestamps. We want to make sure our memory
// usage is bounded. In this case, just clear the history and start over from empty.
if (this.encodingHistory.length + 1 > this.maxEncodingHistorySize)
this.encodingHistory.length = 0;
this.encodingHistory.push({ timestamp: primaryTimestamp, payload: primaryPayload });
}
}
// Keep track of timestamps of primary packets received
// from the server
private primaryPacketLog: RedundantAudioEncoder.PacketLog;
// Keeps track of timestamps of payloads we recovered
// through redundant payloads.
private redRecoveryLog: RedundantAudioEncoder.PacketLog;
// Keeps track of timestamps of payloads we recovered
// through fec payloads.
private fecRecoveryLog: RedundantAudioEncoder.PacketLog;
// Most recent sequence number of a primary packet received
// from the server
private newestSequenceNumber: number;
// Total number of packets we expected from the server.
private totalAudioPacketsExpected: number;
// Total number of packets from the server that were lost.
private totalAudioPacketsLost: number;
// Total number of packets we recovered by consuming
// redundant payloads.
private totalAudioPacketsRecoveredRed: number;
// Total number of packets we recovered by consuming
// payloads with FEC.
private totalAudioPacketsRecoveredFec: number;
// The timestamp at which we last reported loss stats
// to the main thread.
private lastLossReportTimestamp: number;
// Loss stats are reported to the main thread every 5 seconds.
// Since timestamp differences between 2 consecutive packets
// give us the number of samples in each channel, 1 second
// is equivalent to 48000 samples:
// P-time * (1000ms/1s)
// = (960 samples/20ms) * (1000ms/1s)
// = 48000 samples/s
private readonly lossReportInterval: number = 48000 * 5;
// Maximum distance of a packet from the most recent packet timestamp
// that we will consider for recovery.
private readonly maxOutOfOrderPacketDistance = 16;
/**
* Initialize packet logs and metric values.
*/
private initializePacketLogs(): void {
// The extra space from the max RED recovery distance is to ensure that we do not incorrectly count recovery for
// packets that have already been received but are outside of the max out-of-order distance.
const packetLogSize = this.maxOutOfOrderPacketDistance + this.redMaxRecoveryDistance;
this.primaryPacketLog = {
window: new Array<number>(packetLogSize),
index: 0,
windowSize: packetLogSize,
};
this.redRecoveryLog = {
window: new Array<number>(packetLogSize),
index: 0,
windowSize: packetLogSize,
};
this.fecRecoveryLog = {
window: new Array<number>(packetLogSize),
index: 0,
windowSize: packetLogSize,
};
this.totalAudioPacketsExpected = 0;
this.totalAudioPacketsLost = 0;
this.totalAudioPacketsRecoveredRed = 0;
this.totalAudioPacketsRecoveredFec = 0;
}
/**
* Receives encoded frames from the server
* and adds the timestamps to a packet log
* to calculate an approximate recovery metric.
*/
private receivePacketLogTransform(
// @ts-ignore
frame: RTCEncodedAudioFrame,
controller: TransformStreamDefaultController
): void {
const frameMetadata = frame.getMetadata();
// @ts-ignore
if (frameMetadata.payloadType !== this.redPayloadType) {
this.enqueueAudioFrameIfPayloadSizeIsValid(frame, controller);
return;
}
// @ts-ignore
const encodings = this.splitEncodings(
frame.timestamp,
frame.data,
/*getFecInfo*/ true,
frameMetadata.sequenceNumber
);
if (!encodings) {
this.enqueueAudioFrameIfPayloadSizeIsValid(frame, controller);
return;
}
for (let i = encodings.length - 1; i >= 0; i--) {
if (this.updateLossStats(encodings[i])) {
this.updateRedStats(encodings[i]);
this.updateFecStats(encodings[i]);
}
}
this.maybeReportLossStats(
frameMetadata.synchronizationSource,
encodings[encodings.length - 1].timestamp
);
this.enqueueAudioFrameIfPayloadSizeIsValid(frame, controller);
}
/**
* Adds a timestamp to the primary packet log.
* This also updates totalAudioPacketsLost and totalAudioPacketsExpected by looking
* at the difference between timestamps.
*
* @param encoding : The encoding to be analyzed
* @returns false if sequence number was greater than max out of order distance
* true otherwise
*/
private updateLossStats(encoding: RedundantAudioEncoder.Encoding): boolean {
if (encoding.isRedundant) return true;
const timestamp = encoding.timestamp;
const seq = encoding.seq;
if (this.totalAudioPacketsExpected === 0) {
this.totalAudioPacketsExpected = 1;
this.newestSequenceNumber = seq;
this.addTimestamp(this.primaryPacketLog, timestamp);
return true;
}
const diff = this.int16(seq - this.newestSequenceNumber);
if (diff < -this.maxOutOfOrderPacketDistance) return false;
if (diff < 0) {
if (!this.hasTimestamp(this.primaryPacketLog, timestamp)) {
if (this.totalAudioPacketsLost > 0) this.totalAudioPacketsLost--;
this.addTimestamp(this.primaryPacketLog, timestamp);
this.removeFromRecoveryWindows(timestamp);
}
} else if (diff > 1) {
this.totalAudioPacketsLost += diff - 1;
}
if (diff > 0) {
this.totalAudioPacketsExpected += diff;
this.newestSequenceNumber = encoding.seq;
this.addTimestamp(this.primaryPacketLog, timestamp);
}
return true;
}
/**
* Adds a timestamp to the red recovery log if it is not present in
* the primary packet log and if it's not too old.
*
* @param encoding : The encoding to be analyzed
*/
private updateRedStats(encoding: RedundantAudioEncoder.Encoding): void {
if (!encoding.isRedundant || this.totalAudioPacketsLost === 0) return;
const timestamp = encoding.timestamp;
if (!this.hasTimestamp(this.primaryPacketLog, timestamp)) {
if (!this.hasTimestamp(this.redRecoveryLog, timestamp)) {
this.totalAudioPacketsRecoveredRed++;
this.addTimestamp(this.redRecoveryLog, timestamp);
}
if (this.removeTimestamp(this.fecRecoveryLog, timestamp)) {
/* istanbul ignore else */
if (this.totalAudioPacketsRecoveredFec > 0) this.totalAudioPacketsRecoveredFec--;
}
}
}
/**
* Adds a timestamp to the fec recovery log if it is not present in
* the primary packet log and red recovery log and if it is not too old.
*
* @param encoding : The encoding to be analyzed
*/
private updateFecStats(encoding: RedundantAudioEncoder.Encoding): void {
if (!encoding.hasFec || this.totalAudioPacketsLost === 0) return;
const fecTimestamp = encoding.timestamp - this.redPacketizationTime;
if (
this.hasTimestamp(this.primaryPacketLog, fecTimestamp) ||
this.hasTimestamp(this.redRecoveryLog, fecTimestamp) ||
this.hasTimestamp(this.fecRecoveryLog, fecTimestamp)
) {
return;
}
this.totalAudioPacketsRecoveredFec++;
this.addTimestamp(this.fecRecoveryLog, fecTimestamp);
}
/**
* Reports loss metrics to DefaultTransceiverController
*
* @param timestamp : Timestamp of most recent primary packet
*/
private maybeReportLossStats(ssrc: number, timestamp?: number): void {
if (
timestamp === undefined ||
timestamp - this.lastLossReportTimestamp < this.lossReportInterval
)
return;
/* istanbul ignore next */
if (RedundantAudioEncoder.shouldReportStats) {
// @ts-ignore
self.postMessage({
type: 'RedundantAudioEncoderStats',
ssrc,
totalAudioPacketsLost: this.totalAudioPacketsLost,
totalAudioPacketsExpected: this.totalAudioPacketsExpected,
totalAudioPacketsRecoveredRed: this.totalAudioPacketsRecoveredRed,
totalAudioPacketsRecoveredFec: this.totalAudioPacketsRecoveredFec,
});
}
this.lastLossReportTimestamp = timestamp;
}
/**
* Adds a timestamp to a packet log
*
* @param packetLog : The packetlog to add the timestamp to
* @param timestamp : The timestamp that should be added
*/
private addTimestamp(packetLog: RedundantAudioEncoder.PacketLog, timestamp?: number): void {
if (timestamp === undefined) {
return;
}
packetLog.window[packetLog.index] = timestamp;
packetLog.index = (packetLog.index + 1) % packetLog.windowSize;
}
/**
* Checks if a timestamp is in a packetlog
*
* @param packetLog : The packetlog to search
* @param timestamp : The timestamp to search for
* @returns true if timestamp is present, false otherwise
*/
private hasTimestamp(packetLog: RedundantAudioEncoder.PacketLog, timestamp: number): boolean {
const element = packetLog.window.find(t => t === timestamp);
return !!element;
}
/**
* Removes a timestamp from a packet log
*
* @param packetLog : The packetlog from which the timestamp should be removed
* @param timestamp : The timestamp to be removed
* @returns true if timestamp was present in the log and removed, false otherwise
*/
private removeTimestamp(packetLog: RedundantAudioEncoder.PacketLog, timestamp: number): boolean {
const index = packetLog.window.indexOf(timestamp);
if (index >= 0) {
packetLog.window[index] = undefined;
return true;
}
return false;
}
/**
* Removes a timestamp from red and fec recovery windows.
*
* @param timestamp : The timestamp to be removed
*/
private removeFromRecoveryWindows(timestamp: number): void {
let removed = this.removeTimestamp(this.redRecoveryLog, timestamp);
if (removed) {
if (this.totalAudioPacketsRecoveredRed > 0) this.totalAudioPacketsRecoveredRed--;
}
removed = this.removeTimestamp(this.fecRecoveryLog, timestamp);
if (removed) {
if (this.totalAudioPacketsRecoveredFec > 0) this.totalAudioPacketsRecoveredFec--;
}
}
/**
* Converts the supplied argument to 32-bit unsigned integer
*/
private uint32WrapAround(num: number): number {
const mod = 4294967296; // 2^32
let res: number = num;
if (num >= mod) {
res = num - mod;
} else if (num < 0) {
res = mod + num;
}
return res;
}
/**
* Converts the supplied argument to 16-bit signed integer
*/
private int16(num: number): number {
return (num << 16) >> 16;
}
/**
* Below are Opus helper methods and constants.
*/
private readonly OPUS_BAD_ARG = -1;
private readonly OPUS_INVALID_PACKET = -4;
// Max number of Opus frames in an Opus packet is 48 (https://www.rfc-editor.org/rfc/rfc6716#section-3.2.5).
private readonly OPUS_MAX_OPUS_FRAMES = 48;
// Max number of bytes that any individual Opus frame can have.
private readonly OPUS_MAX_FRAME_SIZE_BYTES = 1275;
/**
* Determines if an Opus packet is in CELT-only mode.
*
* @param packet Opus packet.
* @returns `true` if the packet is in CELT-only mode.
*/
private opusPacketIsCeltOnly(packet: DataView): boolean {
// TOC byte format (https://www.rfc-editor.org/rfc/rfc6716#section-3.1):
// 0
// 0 1 2 3 4 5 6 7
// +-+-+-+-+-+-+-+-+
// | config |s| c |
// +-+-+-+-+-+-+-+-+
// Since CELT-only packets are represented using configurations 16 to 31, the highest 'config' bit will always be 1
// for CELT-only packets.
return !!(packet.getUint8(0) & 0x80);
}
/**
* Gets the number of samples per frame from an Opus packet.
*
* @param packet Opus packet. This must contain at least one byte of data.
* @param sampleRateHz 32-bit integer sampling rate in Hz. This must be a multiple of 400 or inaccurate results will
* be returned.
* @returns Number of samples per frame.
*/
private opusPacketGetSamplesPerFrame(packet: DataView, sampleRateHz: number): number {
// Sample rate must be a 32-bit integer.
sampleRateHz = Math.round(sampleRateHz);
sampleRateHz = Math.min(Math.max(sampleRateHz, -(2 ** 32)), 2 ** 32 - 1);
// TOC byte format (https://www.rfc-editor.org/rfc/rfc6716#section-3.1):
// 0
// 0 1 2 3 4 5 6 7
// +-+-+-+-+-+-+-+-+
// | config |s| c |
// +-+-+-+-+-+-+-+-+
let numSamples: number;
let frameSizeOption: number;
// Case for CELT-only packet.
if (this.opusPacketIsCeltOnly(packet)) {
// The lower 3 'config' bits indicate the frame size option.
frameSizeOption = (packet.getUint8(0) >> 3) & 0x3;
// The frame size options 0, 1, 2, 3 correspond to frame sizes of 2.5, 5, 10, 20 ms. Notice that the frame sizes
// can be represented as (2.5 * 2^0), (2.5 * 2^1), (2.5 * 2^2), (2.5 * 2^3) ms. So, the number of samples can be
// calculated as follows:
// (sample/s) * (1s/1000ms) * (2.5ms) * 2^(frameSizeOption)
// = (sample/s) * (1s/400) * 2^(frameSizeOption)
// = (sample/s) * 2^(frameSizeOption) * (1s/400)
numSamples = (sampleRateHz << frameSizeOption) / 400;
}
// Case for Hybrid packet. Since Hybrid packets are represented using configurations 12 to 15, bits 1 and 2 in the
// above TOC byte diagram will both be 1.
else if ((packet.getUint8(0) & 0x60) === 0x60) {
// In the case of configuration 13 or 15, bit 4 in the above TOC byte diagram will be 1. Configurations 13 and 15
// correspond to a 20ms frame size, so the number of samples is calculated as follows:
// (sample/s) * (1s/1000ms) * (20ms)
// = (sample/s) * (1s/50)
//
// In the case of configuration 12 or 14, bit 4 in the above TOC byte diagram will be 0. Configurations 12 and 14
// correspond to a 10ms frame size, so the number of samples is calculated as follows:
// (sample/s) * (1s/1000ms) * (10ms)
// = (sample/s) * (1s/100)
numSamples = packet.getUint8(0) & 0x08 ? sampleRateHz / 50 : sampleRateHz / 100;
}
// Case for SILK-only packet.
else {
// The lower 3 'config' bits indicate the frame size option for SILK-only packets.
frameSizeOption = (packet.getUint8(0) >> 3) & 0x3;
if (frameSizeOption === 3) {
// Frame size option 3 corresponds to a frame size of 60ms, so the number of samples is calculated as follows:
// (sample/s) * (1s/1000ms) * (60ms)
// = (sample/s) * (60ms) * (1s/1000ms)
numSamples = (sampleRateHz * 60) / 1000;
} else {
// The frame size options 0, 1, 2 correspond to frame sizes of 10, 20, 40 ms. Notice that the frame sizes can be
// represented as (10 * 2^0), (10 * 2^1), (10 * 2^2) ms. So, the number of samples can be calculated as follows:
// (sample/s) * (1s/1000ms) * (10ms) * 2^(frameSizeOption)
// = (sample/s) * (1s/100) * 2^(frameSizeOption)
// = (sample/s) * 2^(frameSizeOption) * (1s/100)
numSamples = (sampleRateHz << frameSizeOption) / 100;
}
}
return numSamples;
}
/**
* Gets the number of SILK frames per Opus frame.
*
* @param packet Opus packet.
* @returns Number of SILK frames per Opus frame.
*/
private opusNumSilkFrames(packet: DataView): number {
// For computing the frame length in ms, the sample rate is not important since it cancels out. We use 48 kHz, but
// any valid sample rate would work.
//
// To calculate the length of a frame (with a 48kHz sample rate) in ms:
// (samples/frame) * (1s/48000 samples) * (1000ms/s)
// = (samples/frame) * (1000ms/48000 samples)
// = (samples/frame) * (1ms/48 samples)
let frameLengthMs = this.opusPacketGetSamplesPerFrame(packet, 48000) / 48;
if (frameLengthMs < 10) frameLengthMs = 10;
// The number of SILK frames per Opus frame is described in https://www.rfc-editor.org/rfc/rfc6716#section-4.2.2.
switch (frameLengthMs) {
case 10:
case 20:
return 1;
case 40:
return 2;
case 60:
return 3;
// It is not possible to reach the default case since an Opus packet can only encode sizes of 2.5, 5, 10, 20, 40,
// or 60 ms, so we ignore the default case for test coverage.
/* istanbul ignore next */
default:
return 0;
}
}
/**
* Gets the number of channels from an Opus packet.
*
* @param packet Opus packet.
* @returns Number of channels.
*/
private opusPacketGetNumChannels(packet: DataView): number {
// TOC byte format (https://www.rfc-editor.org/rfc/rfc6716#section-3.1):
// 0
// 0 1 2 3 4 5 6 7
// +-+-+-+-+-+-+-+-+
// | config |s| c |
// +-+-+-+-+-+-+-+-+
// The 's' bit indicates mono or stereo audio, with 0 indicating mono and 1 indicating stereo.
return packet.getUint8(0) & 0x4 ? 2 : 1;
}
/**
* Determine the size (in bytes) of an Opus frame.
*
* @param packet Opus packet.
* @param byteOffset Offset (from the start of the packet) to the byte containing the size information.
* @param remainingBytes Remaining number of bytes to parse from the Opus packet.
* @param sizeBytes Variable to store the parsed frame size (in bytes).
* @returns Number of bytes that were parsed to determine the frame size.
*/
private opusParseSize(
packet: DataView,
byteOffset: number,
remainingBytes: number,
sizeBytes: [number]
): number {
// See https://www.rfc-editor.org/rfc/rfc6716#section-3.2.1 for an explanation of how frame size is represented.
// If there are no remaining bytes to parse the size from, then the size cannot be determined.
if (remainingBytes < 1) {
sizeBytes[0] = -1;
return -1;
}
// If the first byte is in the range 0...251, then this value is the size of the frame.
else if (packet.getUint8(byteOffset) < 252) {
sizeBytes[0] = packet.getUint8(byteOffset);
return 1;
}
// If the first byte is in the range 252...255, a second byte is needed. If there is no second byte, then the size
// cannot be determined.
else if (remainingBytes < 2) {
sizeBytes[0] = -1;
return -1;
}
// The total size of the frame given two size bytes is:
// (4 * secondSizeByte) + firstSizeByte
else {
sizeBytes[0] = 4 * packet.getUint8(byteOffset + 1) + packet.getUint8(byteOffset);
return 2;
}
}
/**
* Parse binary data containing an Opus packet into one or more Opus frames.
*
* @param data Binary data containing an Opus packet to be parsed. The data should begin with the first byte (i.e the
* TOC byte) of an Opus packet. Note that the size of the data does not have to equal the size of the
* contained Opus packet.
* @param lenBytes Size of the data (in bytes).
* @param selfDelimited Indicates if the Opus packet is self-delimiting
* (https://www.rfc-editor.org/rfc/rfc6716#appendix-B).
* @param tocByte Optional variable to store the TOC (table of contents) byte.
* @param frameOffsets Optional variable to store the offsets (from the start of the data) to the first bytes of each
* Opus frame.
* @param frameSizes Required variable to store the sizes (in bytes) of each Opus frame.
* @param payloadOffset Optional variable to store the offset (from the start of the data) to the first byte of the
* payload.
* @param packetLenBytes Optional variable to store the length of the Opus packet (in bytes).
* @returns Number of Opus frames.
*/
private opusPacketParseImpl(
data: DataView,
lenBytes: number,
selfDelimited: boolean,
tocByte: [number],
frameOffsets: Array<[number]>,
frameSizes: Array<[number]>,
payloadOffset: [number],
packetLenBytes: [number]
): number {
if (!frameSizes || lenBytes < 0) return this.OPUS_BAD_ARG;
if (lenBytes === 0) return this.OPUS_INVALID_PACKET;
// The number of Opus frames in the packet.
let numFrames: number;
// Intermediate storage for the number of bytes parsed to determine the size of a frame.
let numBytesParsed: number;
// The number of the padding bytes (excluding the padding count bytes) in the packet.
let paddingBytes = 0;
// Indicates whether CBR (constant bitrate) framing is used.
let cbr = false;
// The TOC (table of contents) byte (https://www.rfc-editor.org/rfc/rfc6716#section-3.1).
const toc = data.getUint8(0);
// Store the TOC byte.
if (tocByte) tocByte[0] = toc;
// The remaining number of bytes to parse from the packet. Note that the TOC byte has already been parsed, hence the
// minus 1.
let remainingBytes = lenBytes - 1;
// This keeps track of where we are in the packet. This starts at 1 since the TOC byte has already been read.
let byteOffset = 1;
// The size of the last Opus frame in bytes.
let lastSizeBytes = remainingBytes;
// Read the `c` bits (i.e. code bits) from the TOC byte.
switch (toc & 0x3) {
// A code 0 packet (https://www.rfc-editor.org/rfc/rfc6716#section-3.2.2) has one frame.
case 0:
numFrames = 1;
break;
// A code 1 packet (https://www.rfc-editor.org/rfc/rfc6716#section-3.2.3) has two CBR (constant bitrate) frames.
case 1:
numFrames = 2;
cbr = true;
if (!selfDelimited) {
// Undelimited code 1 packets must be an even number of data bytes, otherwise the packet is invalid.
if (remainingBytes & 0x1) return this.OPUS_INVALID_PACKET;
// The sizes of both frames are equal (i.e. half of the number of data bytes).
lastSizeBytes = remainingBytes / 2;
// If `lastSizeBytes` is too large, we will catch it later.
frameSizes[0][0] = lastSizeBytes;
}
break;
// A code 2 packet (https://www.rfc-editor.org/rfc/rfc6716#section-3.2.4) has two VBR (variable bitrate) frames.
case 2:
numFrames = 2;
numBytesParsed = this.opusParseSize(data, byteOffset, remainingBytes, frameSizes[0]);
remainingBytes -= numBytesParsed;
// The parsed size of the first frame cannot be larger than the number of remaining bytes in the packet.
if (frameSizes[0][0] < 0 || frameSizes[0][0] > remainingBytes) {
return this.OPUS_INVALID_PACKET;
}
byteOffset += numBytesParsed;
// The size of the second frame is the remaining number of bytes after the first frame.
lastSizeBytes = remainingBytes - frameSizes[0][0];
break;
// A code 3 packet (https://www.rfc-editor.org/rfc/rfc6716#section-3.2.5) has multiple CBR/VBR frames (from 0 to
// 120 ms).
default:
// Code 3 packets must have at least 2 bytes (i.e. at least 1 byte after the TOC byte).
if (remainingBytes < 1) return this.OPUS_INVALID_PACKET;
// Frame count byte format:
// 0
// 0 1 2 3 4 5 6 7
// +-+-+-+-+-+-+-+-+
// |v|p| M |
// +-+-+-+-+-+-+-+-+
//
// Read the frame count byte, which immediately follows the TOC byte.
const frameCountByte = data.getUint8(byteOffset++);
--remainingBytes;
// Read the 'M' bits of the frame count byte, which encode the number of frames.
numFrames = frameCountByte & 0x3f;
// The number of frames in a code 3 packet must not be 0.
if (numFrames <= 0) return this.OPUS_INVALID_PACKET;
const samplesPerFrame = this.opusPacketGetSamplesPerFrame(data, 48000);
// A single frame can have at most 2880 samples, which happens in the case where 60ms of 48kHz audio is encoded
// per frame. A code 3 packet cannot contain more than 120ms of audio, so the total number of samples cannot
// exceed 2880 * 2 = 5760.
if (samplesPerFrame * numFrames > 5760) return this.OPUS_INVALID_PACKET;
// Parse padding bytes if the 'p' bit is 1.
if (frameCountByte & 0x40) {
let paddingCountByte: number;
let numPaddingBytes: number;
// Remove padding bytes (including padding count bytes) from the remaining byte count.
do {
// Sanity check that there are enough bytes to parse and remove the padding.
if (remainingBytes <= 0) return this.OPUS_INVALID_PACKET;
// Get the next padding count byte.
paddingCountByte = data.getUint8(byteOffset++);
--remainingBytes;
// If the padding count byte has a value in the range 0...254, then the total size of the padding is the
// value in the padding count byte.
//
// If the padding count byte has value 255, then the total size of the padding is 254 plus the value in the
// next padding count byte. Therefore, keep reading padding count bytes while the value is 255.
numPaddingBytes = paddingCountByte === 255 ? 254 : paddingCountByte;
remainingBytes -= numPaddingBytes;
paddingBytes += numPaddingBytes;
} while (paddingCountByte === 255);
}
// Sanity check that the remaining number of bytes is not negative after removing the padding.
if (remainingBytes < 0) return this.OPUS_INVALID_PACKET;
// Read the 'v' bit (i.e. VBR bit).
cbr = !(frameCountByte & 0x80);
// VBR case
if (!cbr) {
lastSizeBytes = remainingBytes;
// Let M be the number of frames. There will be M - 1 frame length indicators (which can be 1 or 2 bytes)
// corresponding to the lengths of frames 0 to M - 2. The size of the last frame (i.e. frame M - 1) is the
// number of data bytes after the end of frame M - 2 and before the start of the padding bytes.
for (let i = 0; i < numFrames - 1; ++i) {
numBytesParsed = this.opusParseSize(data, byteOffset, remainingBytes, frameSizes[i]);
remainingBytes -= numBytesParsed;
// The remaining number of data bytes must be enough to contain each frame.
if (frameSizes[i][0] < 0 || frameSizes[i][0] > remainingBytes) {
return this.OPUS_INVALID_PACKET;
}
byteOffset += numBytesParsed;
lastSizeBytes -= numBytesParsed + frameSizes[i][0];
}
// Sanity check that the size of the last frame is not negative.
if (lastSizeBytes < 0) return this.OPUS_INVALID_PACKET;
}
// CBR case
else if (!selfDelimited) {
// The size of each frame is the number of data bytes divided by the number of frames.
lastSizeBytes = Math.trunc(remainingBytes / numFrames);
// The number of data bytes must be a non-negative integer multiple of the number of frames.
if (lastSizeBytes * numFrames !== remainingBytes) return this.OPUS_INVALID_PACKET;
// All frames have equal size in the undelimited CBR case.
for (let i = 0; i < numFrames - 1; ++i) {
frameSizes[i][0] = lastSizeBytes;
}
}
}
// Self-delimited framing uses an extra 1 or 2 bytes, immediately preceding the data bytes, to indicate either the
// size of the last frame (for code 0, code 2, and VBR code 3 packets) or the size of all the frames (for code 1 and
// CBR code 3 packets). See https://www.rfc-editor.org/rfc/rfc6716#appendix-B.
if (selfDelimited) {
// The extra frame size byte(s) will always indicate the size of the last frame.
numBytesParsed = this.opusParseSize(
data,
byteOffset,
remainingBytes,
frameSizes[numFrames - 1]
);
remainingBytes -= numBytesParsed;
// There must be enough data bytes for the last frame.
if (frameSizes[numFrames - 1][0] < 0 || frameSizes[numFrames - 1][0] > remainingBytes) {
return this.OPUS_INVALID_PACKET;
}
byteOffset += numBytesParsed;
// For CBR packets, the sizes of all the frames are equal.
if (cbr) {
// There must be enough data bytes for all the frames.
if (frameSizes[numFrames - 1][0] * numFrames > remainingBytes) {
return this.OPUS_INVALID_PACKET;
}
for (let i = 0; i < numFrames - 1; ++i) {
frameSizes[i][0] = frameSizes[numFrames - 1][0];
}
}
// At this point, `lastSizeBytes` contains the size of the last frame plus the size of the extra frame size
// byte(s), so sanity check that `lastSizeBytes` is the upper bound for the size of the last frame.
else if (!(numBytesParsed + frameSizes[numFrames - 1][0] <= lastSizeBytes)) {
return this.OPUS_INVALID_PACKET;
}
}
// Undelimited case
else {
// Because the size of the last packet is not encoded explicitly, it is possible that the size of the last packet
// (or of all the packets, for the CBR case) is larger than maximum frame size.
if (lastSizeBytes > this.OPUS_MAX_FRAME_SIZE_BYTES) return this.OPUS_INVALID_PACKET;
frameSizes[numFrames - 1][0] = lastSizeBytes;
}
// Store the offset to the start of the payload.
if (payloadOffset) payloadOffset[0] = byteOffset;
// Store the offsets to the start of each frame.
for (let i = 0; i < numFrames; ++i) {
if (frameOffsets) frameOffsets[i][0] = byteOffset;
byteOffset += frameSizes[i][0];
}
// Store the length of the Opus packet.
if (packetLenBytes) packetLenBytes[0] = byteOffset + paddingBytes;
return numFrames;
}
/**
* Parse a single undelimited Opus packet into one or more Opus frames.
*
* @param packet Opus packet to be parsed.
* @param lenBytes Size of the packet (in bytes).
* @param tocByte Optional variable to store the TOC (table of contents) byte.
* @param frameOffsets Optional variable to store the offsets (from the start of the packet) to the first bytes of
* each Opus frame.
* @param frameSizes Required variable to store the sizes (in bytes) of each Opus frame.
* @param payloadOffset Optional variable to store the offset (from the start of the packet) to the first byte of the
* payload.
* @returns Number of Opus frames.
*/
private opusPacketParse(
packet: DataView,
lenBytes: number,
tocByte: [number],
frameOffsets: Array<[number]>,
frameSizes: Array<[number]>,
payloadOffset: [number]
): number {
return this.opusPacketParseImpl(
packet,
lenBytes,
/* selfDelimited */ false,
tocByte,
frameOffsets,
frameSizes,
payloadOffset,
null
);
}
/**
* This function returns the SILK VAD (voice activity detection) information encoded in the Opus packet. For CELT-only
* packets that do not have VAD information, it returns -1.
*
* @param packet Opus packet.
* @param lenBytes Size of the packet (in bytes).
* @returns 0: no frame had the VAD flag set.
* 1: at least one frame had the VAD flag set.
* -1: VAD status could not be determined.
*/
private opusPacketHasVoiceActivity(packet: DataView, lenBytes: number): number {
if (!packet || lenBytes <= 0) return 0;
// In CELT-only mode, we can not determine whether there is VAD.
if (this.opusPacketIsCeltOnly(packet)) return -1;
const numSilkFrames = this.opusNumSilkFrames(packet);
// It is not possible for `opusNumSilkFrames()` to return 0, so we ignore the next sanity check for test coverage.
/* istanbul ignore next */
if (numSilkFrames === 0) return -1;
const opusFrameOffsets = new Array<[number]>(this.OPUS_MAX_OPUS_FRAMES);
const opusFrameSizes = new Array<[number]>(this.OPUS_MAX_OPUS_FRAMES);
for (let i = 0; i < this.OPUS_MAX_OPUS_FRAMES; ++i) {
opusFrameOffsets[i] = [undefined];
opusFrameSizes[i] = [undefined];
}
// Parse packet to get the Opus frames.
const numOpusFrames = this.opusPacketParse(
packet,
lenBytes,
null,
opusFrameOffsets,
opusFrameSizes,
null
);
// VAD status cannot be determined for invalid packets.
if (numOpusFrames < 0) return -1;
// Iterate over all Opus frames, which may contain multiple SILK frames, to determine the VAD status.
for (let i = 0; i < numOpusFrames; ++i) {
if (opusFrameSizes[i][0] < 1) continue;
// LP layer header bits format (https://www.rfc-editor.org/rfc/rfc6716#section-4.2.3):
//
// Mono case:
// +-----------------+----------+
// | 1 to 3 VAD bits | LBRR bit |
// +-----------------+----------+
//
// Stereo case:
// +---------------------+--------------+----------------------+---------------+
// | 1 to 3 mid VAD bits | mid LBRR bit | 1 to 3 side VAD bits | side LBRR bit |
// +---------------------+--------------+----------------------+---------------+
// The upper 1 to 3 bits (dependent on the number of SILK frames) of the LP layer contain VAD bits. If any of
// these VAD bits are 1, then voice activity is present.
if (packet.getUint8(opusFrameOffsets[i][0]) >> (8 - numSilkFrames)) return 1;
// In the stereo case, there is a second set of 1 to 3 VAD bits, so also check these VAD bits.
const channels = this.opusPacketGetNumChannels(packet);
if (
channels === 2 &&
(packet.getUint8(opusFrameOffsets[i][0]) << (numSilkFrames + 1)) >> (8 - numSilkFrames)
) {
return 1;
}
}
// No voice activity was detected.
return 0;
}
/**
* This method is based on Definition of the Opus Audio Codec
* (https://tools.ietf.org/html/rfc6716). Basically, this method is based on
* parsing the LP layer of an Opus packet, particularly the LBRR flag.
*
* @param packet Opus packet.
* @param lenBytes Size of the packet (in bytes).
* @returns true: packet has fec encoding about previous packet.
* false: no fec encoding present.
*/
private opusPacketHasFec(packet: DataView, lenBytes: number): boolean {
if (!packet || lenBytes <= 0) return false;
// In CELT-only mode, packets should not have FEC.
if (this.opusPacketIsCeltOnly(packet)) return false;
const opusFrameOffsets = new Array<[number]>(this.OPUS_MAX_OPUS_FRAMES);
const opusFrameSizes = new Array<[number]>(this.OPUS_MAX_OPUS_FRAMES);
for (let i = 0; i < this.OPUS_MAX_OPUS_FRAMES; ++i) {
opusFrameOffsets[i] = [undefined];
opusFrameSizes[i] = [undefined];
}
// Parse packet to get the Opus frames.
const numOpusFrames = this.opusPacketParse(
packet,
lenBytes,
null,
opusFrameOffsets,
opusFrameSizes,
null
);
if (numOpusFrames < 0) return false;
/* istanbul ignore next */
if (opusFrameSizes[0][0] <= 1) return false;
const numSilkFrames = this.opusNumSilkFrames(packet);
/* istanbul ignore next */
if (numSilkFrames === 0) return false;
const channels = this.opusPacketGetNumChannels(packet);
/* istanbul ignore next */
if (channels !== 1 && channels !== 2) return false;
// A frame starts with the LP layer. The LP layer begins with two to eight
// header bits.These consist of one VAD bit per SILK frame (up to 3),
// followed by a single flag indicating the presence of LBRR frames.
// For a stereo packet, these first flags correspond to the mid channel, and
// a second set of flags is included for the side channel. Because these are
// the first symbols decoded by the range coder and because they are coded
// as binary values with uniform probability, they can be extracted directly
// from the most significant bits of the first byte of compressed data.
for (let i = 0; i < channels; i++) {
if (packet.getUint8(opusFrameOffsets[0][0]) & (0x80 >> ((i + 1) * (numSilkFrames + 1) - 1)))
return true;
}
return false;
}
}
namespace RedundantAudioEncoder {
export interface Encoding {
timestamp?: number;
seq?: number;
payload: ArrayBuffer;
isRedundant?: boolean;
hasFec?: boolean;
}
export interface PacketLog {
window: Array<number>;
index: number;
windowSize: number;
}
}