async _call()

in src/pipelines.js [995:1076]


    async _call(texts, generate_kwargs = {}) {
        let isBatched = false;
        let isChatInput = false;

        // Normalize inputs
        /** @type {string[]} */
        let inputs;
        if (typeof texts === 'string') {
            inputs = texts = [texts];
        } else if (Array.isArray(texts) && texts.every(x => typeof x === 'string')) {
            isBatched = true;
            inputs = /** @type {string[]} */(texts);
        } else {
            if (isChat(texts)) {
                texts = [/** @type {Chat} */(texts)];
            } else if (Array.isArray(texts) && texts.every(isChat)) {
                isBatched = true;
            } else {
                throw new Error('Input must be a string, an array of strings, a Chat, or an array of Chats');
            }
            isChatInput = true;

            // If the input is a chat, we need to apply the chat template
            inputs = /** @type {string[]} */(/** @type {Chat[]} */ (texts).map(
                x => this.tokenizer.apply_chat_template(x, {
                    tokenize: false,
                    add_generation_prompt: true,
                })
            ));
        }

        // By default, do not add special tokens
        const add_special_tokens = generate_kwargs.add_special_tokens ?? false;

        // By default, return full text
        const return_full_text = isChatInput
            ? false
            : generate_kwargs.return_full_text ?? true;

        this.tokenizer.padding_side = 'left';
        const text_inputs = this.tokenizer(inputs, {
            add_special_tokens,
            padding: true,
            truncation: true,
        });

        const outputTokenIds = /** @type {Tensor} */(await this.model.generate({
            ...text_inputs,
            ...generate_kwargs
        }));

        const decoded = this.tokenizer.batch_decode(outputTokenIds, {
            skip_special_tokens: true,
        });

        let promptLengths;
        if (!return_full_text && text_inputs.input_ids.dims.at(-1) > 0) {
            promptLengths = this.tokenizer.batch_decode(text_inputs.input_ids, {
                skip_special_tokens: true,
            }).map(x => x.length);
        }

        /** @type {TextGenerationOutput[]} */
        const toReturn = Array.from({ length: texts.length }, _ => []);
        for (let i = 0; i < decoded.length; ++i) {
            const textIndex = Math.floor(i / outputTokenIds.dims[0] * texts.length);

            if (promptLengths) {
                // Trim the decoded text to only include the generated part
                decoded[i] = decoded[i].slice(promptLengths[textIndex]);
            }
            toReturn[textIndex].push({
                generated_text: isChatInput
                    ? [
                        ...((/** @type {Chat[]} */(texts)[textIndex])),
                        { role: 'assistant', content: decoded[i] },
                    ]
                    : decoded[i]
            });
        }
        return (!isBatched && toReturn.length === 1) ? toReturn[0] : toReturn;
    }