in src/pipelines.js [995:1076]
async _call(texts, generate_kwargs = {}) {
let isBatched = false;
let isChatInput = false;
// Normalize inputs
/** @type {string[]} */
let inputs;
if (typeof texts === 'string') {
inputs = texts = [texts];
} else if (Array.isArray(texts) && texts.every(x => typeof x === 'string')) {
isBatched = true;
inputs = /** @type {string[]} */(texts);
} else {
if (isChat(texts)) {
texts = [/** @type {Chat} */(texts)];
} else if (Array.isArray(texts) && texts.every(isChat)) {
isBatched = true;
} else {
throw new Error('Input must be a string, an array of strings, a Chat, or an array of Chats');
}
isChatInput = true;
// If the input is a chat, we need to apply the chat template
inputs = /** @type {string[]} */(/** @type {Chat[]} */ (texts).map(
x => this.tokenizer.apply_chat_template(x, {
tokenize: false,
add_generation_prompt: true,
})
));
}
// By default, do not add special tokens
const add_special_tokens = generate_kwargs.add_special_tokens ?? false;
// By default, return full text
const return_full_text = isChatInput
? false
: generate_kwargs.return_full_text ?? true;
this.tokenizer.padding_side = 'left';
const text_inputs = this.tokenizer(inputs, {
add_special_tokens,
padding: true,
truncation: true,
});
const outputTokenIds = /** @type {Tensor} */(await this.model.generate({
...text_inputs,
...generate_kwargs
}));
const decoded = this.tokenizer.batch_decode(outputTokenIds, {
skip_special_tokens: true,
});
let promptLengths;
if (!return_full_text && text_inputs.input_ids.dims.at(-1) > 0) {
promptLengths = this.tokenizer.batch_decode(text_inputs.input_ids, {
skip_special_tokens: true,
}).map(x => x.length);
}
/** @type {TextGenerationOutput[]} */
const toReturn = Array.from({ length: texts.length }, _ => []);
for (let i = 0; i < decoded.length; ++i) {
const textIndex = Math.floor(i / outputTokenIds.dims[0] * texts.length);
if (promptLengths) {
// Trim the decoded text to only include the generated part
decoded[i] = decoded[i].slice(promptLengths[textIndex]);
}
toReturn[textIndex].push({
generated_text: isChatInput
? [
...((/** @type {Chat[]} */(texts)[textIndex])),
{ role: 'assistant', content: decoded[i] },
]
: decoded[i]
});
}
return (!isBatched && toReturn.length === 1) ? toReturn[0] : toReturn;
}