Replace `Jimp` with `canvas` for loading images

For the following reasons: 1. Size - Jimp is a large package, making it less suitable for use in the browser. Most of the functions aren't even used by this library. 2. Speed - According to https://github.com/ivanoff/images-manipulation-performance, `node-canvas` is around 9.3x faster than Jimp. 3. `Image` class - Their `Image` class did not support easily converting between colour formats (RGB vs. RGBA vs L), which is needed for mask images (for example). The main downside is that when resizing, the interpolation method isn't able to be specified (e.g., CLIP using bicubic). We aim to fix this in the future.
2023-04-11 02:09:43 +02:00 · 2023-04-11 02:09:43 +02:00 · c4c0df5696
parent fdfdd625a8
commit c4c0df5696
6 changed files with 810 additions and 946 deletions
--- a/package-lock.json
+++ b/package-lock.json
--- a/package.json
+++ b/package.json
@ -30,7 +30,7 @@
  },
  "homepage": "https://github.com/xenova/transformers.js#readme",
  "dependencies": {
-    "jimp": "^0.22.7",
+    "canvas": "^2.11.2",
    "onnxruntime-web": "^1.14.0"
  },
  "optionalDependencies": {
@ -54,4 +54,4 @@
    "fs": false,
    "path": false
  }
-}
+}
--- a/src/image_utils.js
+++ b/src/image_utils.js
@ -1,44 +1,283 @@

-// For some reason, Jimp attaches to self, even in Node.
-// https://github.com/jimp-dev/jimp/issues/466
-const _Jimp = require('jimp');
+const fs = require('fs');
+const { getFile, isString } = require('./utils.js');
+const { env } = require('./env.js');

-// @ts-ignore
-const Jimp = (typeof self !== 'undefined') ? (self.Jimp || _Jimp) : _Jimp;
+let CanvasClass;
+let ImageClass = typeof Image !== 'undefined' ? Image : null; // Only used for type-checking

-const B64_STRING = /^data:image\/\w+;base64,/;
+let ImageDataClass;
+let loadImageFunction;
+if (typeof self !== 'undefined') {
+    CanvasClass = OffscreenCanvas;
+    loadImageFunction = self.createImageBitmap;
+    ImageDataClass = ImageData;

-/**
- * 
- * @param {string} url 
- * @returns {Promise<any>}
- */
-async function loadImage(url) {
-    // TODO if already is a Jimp image, return it
-
-    /** @type {any} */
-    let imgToLoad = url;
-    if (B64_STRING.test(url)) {
-        imgToLoad = imgToLoad.replace(B64_STRING, '');
-        if (typeof Buffer !== 'undefined') {
-            imgToLoad = Buffer.from(imgToLoad, 'base64');
-
-        } else {
-            let bytes = atob(imgToLoad);
-            // create new ArrayBuffer from binary string
-            imgToLoad = new Uint8Array(new ArrayBuffer(bytes.length));
-            for (let i = 0; i < bytes.length; i++) {
-                imgToLoad[i] = bytes.charCodeAt(i);
-            }
-        }
-    }
-    return await Jimp.read(imgToLoad);
+} else {
+    const { Canvas, loadImage, ImageData, Image } = require('canvas');
+    CanvasClass = Canvas;
+    loadImageFunction = async (/**@type {Blob}*/ b) => await loadImage(Buffer.from(await b.arrayBuffer()));
+    ImageDataClass = ImageData;
+    ImageClass = Image;
 }

-const ImageType = Jimp.JimpType;
+
+class CustomImage {
+
+    /**
+     * Create a new CustomImage object.
+     * @param {Uint8ClampedArray} data - The pixel data.
+     * @param {number} width - The width of the image.
+     * @param {number} height - The height of the image.
+     * @param {number} channels - The number of channels.
+     */
+    constructor(data, width, height, channels) {
+        this._update(data, width, height, channels);
+    }
+
+    /**
+     * Helper method for reading an image from a variety of input types.
+     * @param {CustomImage|string|URL} input 
+     * @returns The image object.
+     */
+    static async read(input) {
+        if (input instanceof CustomImage) {
+            return input;
+        } else if (isString(input) || input instanceof URL) {
+            return await this.fromURL(input);
+        } else {
+            throw new Error(`Unsupported input type: ${typeof input}`);
+        }
+    }
+
+
+    /**
+     * Read an image from a URL or file path.
+     * @param {string|URL} url - The URL or file path to read the image from.
+     * @returns {Promise<CustomImage>} - The image object.
+     */
+    static async fromURL(url) {
+        let response = await getFile(url);
+        let blob = await response.blob();
+        let img = await loadImageFunction(blob);
+
+        return this.createCanvasAndDraw(img);
+
+    }
+
+    /**
+     * Helper method to create a new canvas, draw an image/canvas to it, then return the pixel data.     * @param {ImageClass|CanvasClass} img - The image/canvas to draw to the canvas.
+     * @param {number} [width=null] - Width of the canvas. If null, the width of the image is used.
+     * @param {number} [height=null] - Height of the canvas. If null, the height of the image is used.
+     * @returns {CustomImage} - The image object.
+     */
+    static createCanvasAndDraw(img, width = null, height = null) {
+        width = width ?? img.width;
+        height = height ?? img.height;
+
+        const ctx = new CanvasClass(width, height).getContext('2d');
+
+        // Draw image to context
+        ctx.drawImage(img, 0, 0, width, height);
+
+        return new this(ctx.getImageData(0, 0, width, height).data, width, height, 4);
+    }
+
+    /**
+     * Convert the image to grayscale format.
+     * @returns {CustomImage} - `this` to support chaining.
+     */
+    grayscale() {
+        if (this.channels === 1) {
+            return this;
+        }
+
+        let newData = new Uint8ClampedArray(this.width * this.height * 3);
+        switch (this.channels) {
+            case 3: // rgb to grayscale
+            case 4: // rgba to grayscale
+                for (let i = 0, offset = 0; i < this.data.length; i += this.channels) {
+                    const red = this.data[i];
+                    const green = this.data[i + 1];
+                    const blue = this.data[i + 2];
+
+                    newData[offset++] = Math.round(0.2989 * red + 0.5870 * green + 0.1140 * blue);
+                }
+                break;
+            default:
+                throw new Error(`Conversion failed due to unsupported number of channels: ${this.channels}`);
+        }
+        return this._update(newData, this.width, this.height, 1);
+    }
+
+    /**
+     * Convert the image to RGB format.
+     * @returns {CustomImage} - `this` to support chaining.
+     */
+    rgb() {
+        if (this.channels === 3) {
+            return this;
+        }
+
+        let newData = new Uint8ClampedArray(this.width * this.height * 3);
+
+        switch (this.channels) {
+            case 1: // grayscale to rgb
+                for (let i = 0, offset = 0; i < this.data.length; ++i) {
+                    newData[offset++] = this.data[i];
+                    newData[offset++] = this.data[i];
+                    newData[offset++] = this.data[i];
+                }
+                break;
+            case 4: // rgba to rgb
+                for (let i = 0, offset = 0; i < this.data.length; i += 4) {
+                    newData[offset++] = this.data[i];
+                    newData[offset++] = this.data[i + 1];
+                    newData[offset++] = this.data[i + 2];
+                }
+                break;
+            default:
+                throw new Error(`Conversion failed due to unsupported number of channels: ${this.channels}`);
+        }
+        return this._update(newData, this.width, this.height, 3);
+
+    }
+
+    /**
+     * Convert the image to RGBA format.
+     * @returns {CustomImage} - `this` to support chaining.
+     */
+    rgba() {
+        if (this.channels === 4) {
+            return this;
+        }
+
+        let newData = new Uint8ClampedArray(this.width * this.height * 4);
+
+        switch (this.channels) {
+            case 1: // grayscale to rgba
+                for (let i = 0, offset = 0; i < this.data.length; ++i) {
+                    newData[offset++] = this.data[i];
+                    newData[offset++] = this.data[i];
+                    newData[offset++] = this.data[i];
+                    newData[offset++] = 255;
+                }
+                break;
+            case 3: // rgb to rgba
+                for (let i = 0, offset = 0; i < this.data.length; i += 3) {
+                    newData[offset++] = this.data[i];
+                    newData[offset++] = this.data[i + 1];
+                    newData[offset++] = this.data[i + 2];
+                    newData[offset++] = 255;
+                }
+                break;
+            default:
+                throw new Error(`Conversion failed due to unsupported number of channels: ${this.channels}`);
+        }
+
+        return this._update(newData, this.width, this.height, 4);
+    }
+
+    /**
+     * Resize the image to the given dimensions. This method uses the canvas API to perform the resizing.
+     * @param {number} width - The width of the new image.
+     * @param {number} height - The height of the new image.
+     * @returns {CustomImage} - `this` to support chaining.
+     */
+    resize(width, height) {
+        // Store number of channels before resizing
+        let numChannels = this.channels;
+
+        // Create canvas object for this image
+        let canvas = this.toCanvas();
+
+        // Actually perform resizing using the canvas API
+        let resizedImage = CustomImage.createCanvasAndDraw(canvas, width, height);
+
+        // Convert back so that image has the same number of channels as before
+        return resizedImage.convert(numChannels);
+    }
+
+    toCanvas() {
+        // Clone, and convert data to RGBA before drawing to canvas.
+        // This is because the canvas API only supports RGBA
+        let cloned = this.clone().rgba();
+
+        // Create canvas object for the cloned image
+        let clonedCanvas = new CanvasClass(cloned.width, cloned.height);
+
+        // Draw image to context
+        let data = new ImageDataClass(cloned.data, cloned.width, cloned.height);
+        clonedCanvas.getContext('2d').putImageData(data, 0, 0);
+
+        return clonedCanvas;
+    }
+
+    /**
+     * Helper method to update the image data.
+     * @param {Uint8ClampedArray} data - The new image data.
+     * @param {number} width - The new width of the image.
+     * @param {number} height - The new height of the image.
+     * @param {number} channels - The new number of channels of the image.
+     */
+    _update(data, width, height, channels = null) {
+        this.data = data;
+        this.width = width;
+        this.height = height;
+        if (channels !== null) {
+            this.channels = channels;
+        }
+        return this;
+    }
+
+    /**
+     * Clone the image
+     * @returns {CustomImage} - The cloned image
+     */
+    clone() {
+        return new CustomImage(this.data.slice(), this.width, this.height, this.channels);
+    }
+
+    /**
+     * Helper method for converting image to have a certain number of channels
+     * @param {number} numChannels - The number of channels. Must be 1, 3, or 4.
+     * @returns {CustomImage} - `this` to support chaining.
+     */
+    convert(numChannels) {
+        if (this.channels === numChannels) return this; // Already correct number of channels
+
+        switch (numChannels) {
+            case 1:
+                this.grayscale();
+                break;
+            case 3:
+                this.rgb();
+                break;
+            case 4:
+                this.rgba();
+                break;
+            default:
+                throw new Error(`Conversion failed due to unsupported number of channels: ${this.channels}`);
+        }
+        return this;
+    }
+
+    /**
+     * Save the image to the given path. This method is only available in environments with access to the FileSystem.
+     * @param {string|Buffer|URL} path - The path to save the image to.
+     * @param {string} [mime='image/png'] - The mime type of the image.
+     */
+    save(path, mime = 'image/png') {
+        if (!env.useFS) {
+            throw new Error('Unable to save the image because filesystem is disabled in this environment.')
+        }
+
+        let canvas = this.toCanvas();
+        const buffer = canvas.toBuffer(mime);
+        fs.writeFileSync(path, buffer);
+    }
+}

 module.exports = {
-    loadImage,
-    Jimp,
-    ImageType
+    CustomImage,
 };
--- a/src/pipelines.js
+++ b/src/pipelines.js
@ -27,7 +27,8 @@ const {
    AutoModelForObjectDetection
 } = require("./models.js");
 const {
-    AutoProcessor
+    AutoProcessor,
+    Processor
 } = require("./processors.js");


@ -36,7 +37,7 @@ const {
 } = require('./env.js');

 const { Tensor, transpose_data } = require("./tensor_utils.js");
-const { Jimp, ImageType, loadImage } = require("./image_utils.js");
+const { CustomImage } = require("./image_utils.js");

 /**
 * Prepare images for further tasks.
@ -50,8 +51,7 @@ async function prepareImages(images) {
    }

    // Possibly convert any non-images to images
-    images = await Promise.all(images.map(loadImage));
-
+    images = await Promise.all(images.map(x => CustomImage.read(x)));
    return images;
 }

@ -938,10 +938,6 @@ class ImageClassificationPipeline extends Pipeline {

 }

-/**
- * @typedef {'panoptic'|'instance'|'semantic'} ImageSegmentationSubTask
- */
-
 /**
 * ImageSegmentationPipeline class for executing an image-segmentation task.
 * @extends Pipeline
@ -951,15 +947,12 @@ class ImageSegmentationPipeline extends Pipeline {
     * Create a new ImageSegmentationPipeline.
     * @param {string} task - The task of the pipeline.
     * @param {Object} model - The model to use for classification.
-     * @param {Function} processor - The function to preprocess images.
+     * @param {Processor} processor - The function to preprocess images.
     */
    constructor(task, model, processor) {
        super(task, null, model); // TODO tokenizer
        this.processor = processor;

-        /** 
-         * @type {Object<ImageSegmentationSubTask, string>}
-         */
        this.subtasks_mapping = {
            // Mapping of subtasks to their corresponding post-processing function names.
            panoptic: 'post_process_panoptic_segmentation',
@ -975,7 +968,7 @@ class ImageSegmentationPipeline extends Pipeline {
     * @param {number} [options.threshold=0.5] - Probability threshold to filter out predicted masks.
     * @param {number} [options.mask_threshold=0.5] - Threshold to use when turning the predicted masks into binary values.
     * @param {number} [options.overlap_mask_area_threshold=0.8] - Mask overlap threshold to eliminate small, disconnected segments.
-     * @param {null|ImageSegmentationSubTask} [options.subtask=null] - Segmentation task to be performed. One of [`panoptic`, `instance`, and `semantic`], depending on model capabilities. If not set, the pipeline will attempt to resolve (in that order).
+     * @param {null|string} [options.subtask=null] - Segmentation task to be performed. One of [`panoptic`, `instance`, and `semantic`], depending on model capabilities. If not set, the pipeline will attempt to resolve (in that order).
     * @param {Array} [options.label_ids_to_fuse=null] - List of label ids to fuse. If not set, do not fuse any labels.
     * @param {Array} [options.target_sizes=null] - List of target sizes for the input images. If not set, use the original image sizes.
     * @returns {Promise<Array>} - The annotated segments.
@ -995,7 +988,7 @@ class ImageSegmentationPipeline extends Pipeline {
        }

        images = await prepareImages(images);
-        let imageSizes = images.map(x => [x.bitmap.height, x.bitmap.width]);
+        let imageSizes = images.map(x => [x.height, x.width]);

        let inputs = await this.processor(images);
        let output = await this.model(inputs);
@ -1028,21 +1021,17 @@ class ImageSegmentationPipeline extends Pipeline {
            )[0];

            let segmentation = processed.segmentation;
-
            let id2label = this.model.config.id2label;

            for (let segment of processed.segments_info) {
-                let maskData = new Uint8Array(segmentation.data.length);
+                let maskData = new Uint8ClampedArray(segmentation.data.length);
                for (let i = 0; i < segmentation.data.length; ++i) {
                    if (segmentation.data[i] === segment.id) {
                        maskData[i] = 255;
                    }
                }

-                let [transposedData, shape] = transpose_data(maskData, segmentation.dims, [0, 1]);
-
-                const mask = new Jimp(...shape);
-                mask.bitmap.data = transposedData;
+                let mask = new CustomImage(maskData, segmentation.dims[1], segmentation.dims[0], 1)

                annotation.push({
                    score: segment.score,
@ -1154,7 +1143,7 @@ class ObjectDetectionPipeline extends Pipeline {
        }
        images = await prepareImages(images);

-        let imageSizes = percentage ? null : images.map(x => [x.bitmap.height, x.bitmap.width]);
+        let imageSizes = percentage ? null : images.map(x => [x.height, x.width]);

        let inputs = await this.processor(images);
        let output = await this.model(inputs);
--- a/src/processors.js
+++ b/src/processors.js
@ -10,6 +10,7 @@ const {
 const FFT = require('./fft.js');
 const { Tensor, transpose, cat, interpolate } = require("./tensor_utils.js");

+const { CustomImage } = require('./image_utils.js');
 /**
 * Helper class to determine model type from config
 */
@ -129,16 +130,15 @@ class ImageFeatureExtractor extends FeatureExtractor {
    /**
     * Preprocesses the given image.
     *
-     * @param {any} image - The URL of the image to preprocess.
+     * @param {CustomImage} image - The image to preprocess.
     * @returns {Promise<any>} The preprocessed image as a Tensor.
     */
    async preprocess(image) {
-        // image is a Jimp image

-        const srcWidth = image.bitmap.width;   // original width
-        const srcHeight = image.bitmap.height; // original height
+        const srcWidth = image.width;   // original width
+        const srcHeight = image.height; // original height

-        // resize all images
+        // First, resize all images
        if (this.do_resize) {
            // If `max_size` is set, maintain aspect ratio and resize to `size`
            // while keeping the largest dimension <= `max_size`
@ -161,34 +161,27 @@ class ImageFeatureExtractor extends FeatureExtractor {
            }
        }

-        const data = image.bitmap.data;
+        // Convert image to RGB
+        image = image.rgb();

-        // Do not include alpha channel
-        let convData = new Float32Array(data.length * 3 / 4);
-
-        let outIndex = 0;
-        for (let i = 0; i < data.length; i += 4) {
-            for (let j = 0; j < 3; ++j) {
-                convData[outIndex++] = data[i + j];
-            }
-        }
+        const pixelData = Float32Array.from(image.data);

        if (this.do_rescale) {
-            for (let i = 0; i < convData.length; ++i) {
-                convData[i] = convData[i] / 255;
+            for (let i = 0; i < pixelData.length; ++i) {
+                pixelData[i] = pixelData[i] / 255;
            }
        }

        if (this.do_normalize) {
-            for (let i = 0; i < convData.length; i += 3) {
+            for (let i = 0; i < pixelData.length; i += 3) {
                for (let j = 0; j < 3; ++j) {
-                    convData[i + j] = (convData[i + j] - this.image_mean[j]) / this.image_std[j];
+                    pixelData[i + j] = (pixelData[i + j] - this.image_mean[j]) / this.image_std[j];
                }
            }
        }

-        let imgDims = [image.bitmap.height, image.bitmap.width, 3];
-        let img = new Tensor('float32', convData, imgDims);
+        let imgDims = [image.height, image.width, 3];
+        let img = new Tensor('float32', pixelData, imgDims);
        let transposed = transpose(img, [2, 0, 1]); // hwc -> chw

        return transposed;
@ -432,20 +425,23 @@ class DetrFeatureExtractor extends ImageFeatureExtractor {
        );
        let segments = [];

-        // 1. Weigh each mask by its prediction score
+        // 1. If target_size is not null, we need to resize the masks to the target size
+        if (target_size !== null) {
+            // resize the masks to the target size
+            for (let i = 0; i < mask_probs.length; ++i) {
+                mask_probs[i] = interpolate(mask_probs[i], target_size, 'bilinear', false);
+            }
+        }
+
+        // 2. Weigh each mask by its prediction score
        // NOTE: `mask_probs` is updated in-place
        // 
        // Temporary storage for the best label/scores for each pixel ([height, width]):
        let mask_labels = new Int32Array(mask_probs[0].data.length);
        let bestScores = new Float32Array(mask_probs[0].data.length);
+
        for (let i = 0; i < mask_probs.length; ++i) {
-
-            if (target_size !== null) {
-                mask_probs[i] = interpolate(mask_probs[i], target_size, 'bilinear', false);
-            }
-
            let score = pred_scores[i];
-            // console.log({ i, item, score, a: bestScores.slice() })

            for (let j = 0; j < mask_probs[i].data.length; ++j) {
                mask_probs[i].data[j] *= score
@ -982,5 +978,6 @@ class WhisperProcessor extends Processor {


 module.exports = {
-    AutoProcessor
+    AutoProcessor,
+    Processor,
 }
--- a/tests/index.js
+++ b/tests/index.js
@ -679,7 +679,7 @@ async function image_to_text() {
    ) && isDeepEqual(
        output2,
        [{
-            "generated_text": "a herd of giraffes and zebras grazing in a field"
+            "generated_text": "a herd of giraffes and zebras standing in a field"
        }, {
            "generated_text": "a herd of giraffes and zebras are grazing in a field"
        }]
@ -689,7 +689,7 @@ async function image_to_text() {
            [{
                "generated_text": "a soccer player is kicking a soccer ball"
            }], [{
-                "generated_text": "a plane is parked at an airport with other planes"
+                "generated_text": "a plane is parked at an airport with a lot of people"
            }]
        ]
    ) && isDeepEqual(
@ -698,11 +698,11 @@ async function image_to_text() {
            [{
                "generated_text": "a soccer player is kicking a soccer ball"
            }, {
-                "generated_text": "a soccer player is kicking a ball"
+                "generated_text": "a soccer player is kicking a ball in the air"
            }], [{
-                "generated_text": "airplanes parked at an airport"
-            }, {
                "generated_text": "airplanes are parked at an airport"
+            }, {
+                "generated_text": "airplanes are parked on the tarmac at an airport"
            }]
        ]
    ), duration];
@ -736,18 +736,18 @@ async function image_classification() {

    return [isDeepEqual(
        output1,
-        [{ "label": "tiger, Panthera tigris", "score": 0.7521011829376221 }]
+        [{ "label": "tiger, Panthera tigris", "score": 0.7717679142951965 }]
    ) && isDeepEqual(
        output2,
-        [{ "label": "tiger, Panthera tigris", "score": 0.7521011829376221 }, { "label": "tiger cat", "score": 0.24334438145160675 }]
+        [{ "label": "tiger, Panthera tigris", "score": 0.7717679142951965 }, { "label": "tiger cat", "score": 0.22388941049575806 }]
    ) && isDeepEqual(
        output3,
-        [{ "label": "palace", "score": 0.9980287551879883 }, { "label": "teapot", "score": 0.9890381693840027 }]
+        [{ "label": "palace", "score": 0.9983996152877808 }, { "label": "teapot", "score": 0.9884148836135864 }]
    ) && isDeepEqual(
        output4,
        [
-            [{ "label": "palace", "score": 0.9980287551879883 }, { "label": "monastery", "score": 0.0006073643453419209 }],
-            [{ "label": "teapot", "score": 0.9890381693840027 }, { "label": "coffeepot", "score": 0.0057989382185041904 }]
+            [{ "label": "palace", "score": 0.9983996152877808 }, { "label": "monastery", "score": 0.0005066859303042293 }],
+            [{ "label": "teapot", "score": 0.9884148836135864 }, { "label": "coffeepot", "score": 0.006186090875416994 }]
        ]
    ), duration];

@ -763,7 +763,7 @@ async function image_segmentation() {
    let outputs = await segmenter(img);

    // Just calculate sum of mask (to avoid having to check the whole mask)
-    outputs.forEach(x => x.mask = x.mask.bitmap.data.reduce((acc, curr) => {
+    outputs.forEach(x => x.mask = x.mask.data.reduce((acc, curr) => {
        if (curr > 0) {
            acc += 1;
        }
@ -812,25 +812,25 @@ async function zero_shot_image_classification() {
    return [isDeepEqual(
        output1,
        [
-            { "score": 0.9937776923179626, "label": "football" },
-            { "score": 0.0010888857068493962, "label": "airport" },
-            { "score": 0.005133440252393484, "label": "animals" }
+            { "score": 0.9930433034896851, "label": "football" },
+            { "score": 0.0010687140747904778, "label": "airport" },
+            { "score": 0.00588800385594368, "label": "animals" }
        ]
    ) && isDeepEqual(
        output2,
        [
            [
-                { "score": 0.9927281141281128, "label": "football" },
-                { "score": 0.0014022591058164835, "label": "airport" },
-                { "score": 0.005869609769433737, "label": "animals" }
+                { "score": 0.9937127232551575, "label": "football" },
+                { "score": 0.001083463546819985, "label": "airport" },
+                { "score": 0.005203814711421728, "label": "animals" }
            ], [
-                { "score": 0.00029855265165679157, "label": "football" },
-                { "score": 0.9982157945632935, "label": "airport" },
-                { "score": 0.0014856450725346804, "label": "animals" }
+                { "score": 0.00045409638551063836, "label": "football" },
+                { "score": 0.9976944923400879, "label": "airport" },
+                { "score": 0.0018514387775212526, "label": "animals" }
            ], [
-                { "score": 0.012006462551653385, "label": "football" },
-                { "score": 0.01685832068324089, "label": "airport" },
-                { "score": 0.9711351990699768, "label": "animals" }
+                { "score": 0.012140189297497272, "label": "football" },
+                { "score": 0.013895479030907154, "label": "airport" },
+                { "score": 0.9739643335342407, "label": "animals" }
            ]
        ]
    ), duration];
@ -864,31 +864,31 @@ async function object_detection() {
    return [isDeepEqual(
        output1,
        {
-            boxes: [
-                [359.76656198501587, 247.15871572494507, 402.9358148574829, 315.704562664032],
-                [109.06712919473648, 237.22267627716064, 234.6556493639946, 324.0059995651245],
-                [2.541865110397339, 148.06851625442505, 221.13489389419556, 255.35571813583374],
-                [186.80795073509216, 230.4118824005127, 322.8719401359558, 305.1347064971924],
-                [349.60298001766205, 95.32436728477478, 547.5894981622696, 311.53558373451233]
+            "boxes": [
+                [358.96632492542267, 247.58064329624176, 401.70598447322845, 315.80701768398285],
+                [110.12272596359253, 236.77275717258453, 235.19042015075684, 323.5002797842026],
+                [3.7028244137763977, 148.02273631095886, 221.12379759550095, 254.5628035068512],
+                [188.50673854351044, 230.62812745571136, 322.1572870016098, 305.23363173007965],
+                [350.2080622315407, 94.52754735946655, 547.5165876746178, 310.9271836280823]
            ],
-            classes: [24, 24, 25, 24, 25],
-            labels: ["zebra", "zebra", "giraffe", "zebra", "giraffe"],
-            scores: [0.9990037083625793, 0.9987165331840515, 0.93809574842453, 0.9977785348892212, 0.9987764358520508],
+            "classes": [24, 24, 25, 24, 25],
+            "scores": [0.9989174008369446, 0.9985705614089966, 0.9560438394546509, 0.9976871013641357, 0.9989231824874878],
+            "labels": ["zebra", "zebra", "giraffe", "zebra", "giraffe"]
        }
    ) && isDeepEqual(
        output2,
        [{
-            boxes: [
-                [0.11961111426353455, 0.8362486660480499, 0.22848913073539734, 0.9653392732143402],
-                [0.12597772479057312, 0.02483522891998291, 0.62779501080513, 0.9831656217575073],
-                [-0.014880642294883728, 0.08133217692375183, 0.21618883311748505, 0.7050653994083405],
-                [0.12513580918312073, 0.022572606801986694, 0.7006023824214935, 0.9825432598590851],
-                [0.6693135350942612, 0.053024232387542725, 0.8096815496683121, 0.6173807978630066],
-                [0.527036190032959, 0.1286628246307373, 0.8026435375213623, 0.9251552820205688]
+            "boxes": [
+                [0.11884483695030212, 0.8355862200260162, 0.2269599735736847, 0.9643512666225433],
+                [0.12690269947052002, 0.023795485496520996, 0.6280449032783508, 0.9843276739120483],
+                [-0.014768391847610474, 0.0791754424571991, 0.21561279892921448, 0.7040039002895355],
+                [0.12554875016212463, 0.021175920963287354, 0.6984966695308685, 0.9823558926582336],
+                [0.665948748588562, 0.05154389142990112, 0.8112401962280273, 0.615310549736023],
+                [0.5251416265964508, 0.12678277492523193, 0.801356166601181, 0.9241014719009399]
            ],
-            classes: [37, 1, 1, 1, 1, 1],
-            labels: ["sports ball", "person", "person", "person", "person", "person"],
-            scores: [0.9995566010475159, 0.9635121822357178, 0.9992444515228271, 0.9051326513290405, 0.9229124188423157, 0.9993530511856079],
+            "classes": [37, 1, 1, 1, 1, 1],
+            "scores": [0.9995761513710022, 0.9574956893920898, 0.9992514848709106, 0.9042971730232239, 0.937954843044281, 0.9991750121116638],
+            "labels": ["sports ball", "person", "person", "person", "person", "person"]
        }]
    ), duration];