in src/tokenizers.js [3902:4004]
findLongestCommonSequence(sequences, token_timestamp_sequences = null) {
// It would be much harder to do O(n) because of fault tolerance.
// We actually have a really good property which is that the total sequence
// MUST be those subsequences in order.
// If token_timestamp_sequences is provided, will split those sequences in
// exactly the same way.
let leftSequence = sequences[0];
let leftLength = leftSequence.length;
let totalSequence = [];
const use_token_timestamp_sequences = Array.isArray(token_timestamp_sequences) && token_timestamp_sequences.length > 0;
let total_token_timestamp_sequence = use_token_timestamp_sequences ? [] : null;
let left_token_timestamp_sequence = use_token_timestamp_sequences ? token_timestamp_sequences[0] : null;
for (let i = 1; i < sequences.length; ++i) {
const rightSequence = sequences[i];
let max = 0.0;
let maxIndices = [leftLength, leftLength, 0, 0];
// Here we're sliding matches
// [a, b, c, d]
// [c, d, f]
// = [c] == [d]
// [a, b, c, d]
// [c, d, f]
// = [c, d] == [c, d]
// [a, b, c, d]
// [c, d, f]
// = [b, c, d] == [c, d, f]
// [a, b, c, d]
// [c, d, f]
// [a, b, c] == [c, d, f]
// [a, b, c, d]
// [d, f]
// [a, b] == [d, f]
// [a, b, c, d]
// [f]
// [a] == [f]
const rightLength = rightSequence.length;
for (let j = 1; j < leftLength + rightLength; ++j) {
// Slightly convoluted because we don't want out of bound indices
// This will be necessary for a small conflict resolution optimization
// later
const leftStart = Math.max(0, leftLength - j);
const leftStop = Math.min(leftLength, leftLength + rightLength - j);
const left = leftSequence.slice(leftStart, leftStop);
const rightStart = Math.max(0, j - leftLength);
const rightStop = Math.min(rightLength, j);
const right = rightSequence.slice(rightStart, rightStop);
if (left.length !== right.length) {
throw new Error("There is a bug within whisper `decode_asr` function, please report it. Dropping to prevent bad inference.");
}
let matches;
if (use_token_timestamp_sequences) {
// Get length of longest subsequence of tokens that match
// and have timestamps that are in order
matches = left.filter((elem, idx) => (
elem === right[idx]
&& left_token_timestamp_sequence[leftStart + idx] <= token_timestamp_sequences[i][rightStart + idx]
)).length;
} else {
matches = left.filter((elem, idx) => elem === right[idx]).length;
}
// epsilon to favor long perfect matches
const eps = j / 10000.0;
const matching = matches / j + eps;
if (matches > 1 && matching > max) {
max = matching;
maxIndices = [leftStart, leftStop, rightStart, rightStop];
}
}
const [leftStart, leftStop, rightStart, rightStop] = maxIndices;
const leftMid = Math.floor((leftStop + leftStart) / 2);
const rightMid = Math.floor((rightStop + rightStart) / 2);
totalSequence.push(...leftSequence.slice(0, leftMid));
leftSequence = rightSequence.slice(rightMid);
leftLength = leftSequence.length;
if (use_token_timestamp_sequences) {
total_token_timestamp_sequence.push(...left_token_timestamp_sequence.slice(0, leftMid));
left_token_timestamp_sequence = token_timestamp_sequences[i].slice(rightMid);
}
}
totalSequence.push(...leftSequence);
if (use_token_timestamp_sequences) {
total_token_timestamp_sequence.push(...left_token_timestamp_sequence);
return [totalSequence, total_token_timestamp_sequence];
} else {
return [totalSequence, []];
}
}