in src/models.js [3407:3472]
async generate({
inputs = null,
generation_config = null,
logits_processor = null,
stopping_criteria = null,
// Whisper-specific options (passed to kwargs)
// prompt_ids = null,
// language = null,
// task = null,
...kwargs
}) {
generation_config = this._prepare_generation_config(generation_config, kwargs);
const init_tokens = kwargs.decoder_input_ids ?? this._retrieve_init_tokens(generation_config);
if (generation_config.return_timestamps) {
logits_processor ??= new LogitsProcessorList();
logits_processor.push(
new WhisperTimeStampLogitsProcessor(generation_config, init_tokens)
);
}
if (generation_config.begin_suppress_tokens) {
logits_processor ??= new LogitsProcessorList();
logits_processor.push(
new SuppressTokensAtBeginLogitsProcessor(generation_config.begin_suppress_tokens, init_tokens.length)
);
}
if (generation_config.return_token_timestamps) {
if (!generation_config.alignment_heads) {
throw new Error(
"Model generation config has no `alignment_heads`, token-level timestamps not available. " +
"See https://gist.github.com/hollance/42e32852f24243b748ae6bc1f985b13a on how to add this property to the generation config."
)
}
if (generation_config.task === 'translate') {
console.warn("Token-level timestamps may not be reliable for task 'translate'.")
}
generation_config.output_attentions = true;
generation_config.return_dict_in_generate = true;
}
const outputs = await super.generate({
inputs,
generation_config,
logits_processor,
decoder_input_ids: init_tokens,
...kwargs
});
if (generation_config.return_token_timestamps) {
outputs["token_timestamps"] = this._extract_token_timestamps(
// @ts-expect-error TS2345
outputs,
generation_config.alignment_heads,
generation_config.num_frames,
);
}
return outputs;
}