async preprocess()

in src/base/image_processors_utils.js [936:1054]


    async preprocess(image, {
        do_normalize = null,
        do_pad = null,
        do_convert_rgb = null,
        do_convert_grayscale = null,
        do_flip_channel_order = null,
    } = {}) {
        if (this.do_crop_margin) {
            // NOTE: Specific to nougat processors. This is done before resizing,
            // and can be interpreted as a pre-preprocessing step.
            image = await this.crop_margin(image);
        }

        const [srcWidth, srcHeight] = image.size; // original image size

        // Convert image to RGB if specified in config.
        if (do_convert_rgb ?? this.do_convert_rgb) {
            image = image.rgb();
        } else if (do_convert_grayscale) {
            image = image.grayscale();
        }

        // TODO:
        // For efficiency reasons, it might be best to merge the resize and center crop operations into one.

        // Resize all images
        if (this.do_resize) {
            image = await this.resize(image);
        }

        // Resize the image using thumbnail method.
        if (this.do_thumbnail) {
            // @ts-expect-error TS2345
            image = await this.thumbnail(image, this.size, this.resample);
        }

        if (this.do_center_crop) {

            let crop_width;
            let crop_height;
            if (Number.isInteger(this.crop_size)) {
                crop_width = this.crop_size;
                crop_height = this.crop_size;
            } else {
                crop_width = this.crop_size.width;
                crop_height = this.crop_size.height;
            }

            image = await image.center_crop(crop_width, crop_height);
        }

        /** @type {HeightWidth} */
        const reshaped_input_size = [image.height, image.width];

        // NOTE: All pixel-level manipulation (i.e., modifying `pixelData`)
        // occurs with data in the hwc format (height, width, channels), 
        // to emulate the behavior of the original Python code (w/ numpy).
        /** @type {Float32Array} */
        let pixelData = Float32Array.from(image.data);
        let imgDims = [image.height, image.width, image.channels];

        if (this.do_rescale) {
            this.rescale(pixelData);
        }

        if (do_normalize ?? this.do_normalize) {
            let image_mean = this.image_mean;
            if (!Array.isArray(this.image_mean)) {
                image_mean = new Array(image.channels).fill(image_mean);
            }

            let image_std = this.image_std;
            if (!Array.isArray(this.image_std)) {
                image_std = new Array(image.channels).fill(image_mean);
            }

            if (image_mean.length !== image.channels || image_std.length !== image.channels) {
                throw new Error(`When set to arrays, the length of \`image_mean\` (${image_mean.length}) and \`image_std\` (${image_std.length}) must match the number of channels in the image (${image.channels}).`);
            }

            for (let i = 0; i < pixelData.length; i += image.channels) {
                for (let j = 0; j < image.channels; ++j) {
                    pixelData[i + j] = (pixelData[i + j] - image_mean[j]) / image_std[j];
                }
            }
        }

        // do padding after rescaling/normalizing
        if (do_pad ?? this.do_pad) {
            if (this.pad_size) {
                const padded = this.pad_image(pixelData, [image.height, image.width, image.channels], this.pad_size);
                [pixelData, imgDims] = padded; // Update pixel data and image dimensions
            } else if (this.size_divisibility) {
                const [paddedWidth, paddedHeight] = enforce_size_divisibility([imgDims[1], imgDims[0]], this.size_divisibility);
                [pixelData, imgDims] = this.pad_image(pixelData, imgDims, { width: paddedWidth, height: paddedHeight });
            }
        }

        if (do_flip_channel_order ?? this.do_flip_channel_order) {
            if (imgDims[2] !== 3) {
                throw new Error('Flipping channel order is only supported for RGB images.');
            }
            // Convert RGB to BGR
            for (let i = 0; i < pixelData.length; i += 3) {
                const temp = pixelData[i];
                pixelData[i] = pixelData[i + 2];
                pixelData[i + 2] = temp;
            }
        }

        const pixel_values = new Tensor('float32', pixelData, imgDims)
            .permute(2, 0, 1); // convert to channel dimension format (hwc -> chw)

        return {
            original_size: [srcHeight, srcWidth],
            reshaped_input_size: reshaped_input_size,
            pixel_values,
        }
    }