From aa910b40984e6bd6c90cff0e68618659f13a025c Mon Sep 17 00:00:00 2001
From: Joshua Lochner <admin@xenova.com>
Date: Sat, 6 May 2023 04:15:12 +0200
Subject: [PATCH] Update JSDocs

---
 src/audio_utils.js   |  63 +++++++++++++
 src/backends/onnx.js |  32 ++++---
 src/env.js           |  75 +++++++++------
 src/generation.js    |   8 ++
 src/image_utils.js   |   9 ++
 src/math_utils.js    |  13 ++-
 src/models.js        | 211 ++++++++++++++++++++++++++-----------------
 src/pipelines.js     | 160 +++++++++++++-------------------
 src/processors.js    |  37 ++++++--
 src/samplers.js      |   9 ++
 src/tensor_utils.js  |  12 ++-
 src/tokenizers.js    |  30 +++++-
 src/transformers.js  |  60 +++++-------
 src/utils.js         |  22 +++++
 src/utils/hub.js     |   8 +-
 15 files changed, 477 insertions(+), 272 deletions(-)
 create mode 100644 src/audio_utils.js

diff --git a/src/audio_utils.js b/src/audio_utils.js
new file mode 100644
index 0000000..46905a0
--- /dev/null
+++ b/src/audio_utils.js
@@ -0,0 +1,63 @@
+/**
+ * @file Helper module for audio processing. 
+ * 
+ * These functions and classes are only used internally, 
+ * meaning an end-user shouldn't need to access anything here.
+ * 
+ * @module audio_utils
+ */
+
+import {
+    getFile,
+} from './utils/hub.js';
+
+export async function read_audio(url, sampling_rate) {
+    // Attempting to load from path/url
+
+    if (typeof AudioContext === 'undefined') {
+        // Running in node or an environment without AudioContext
+        throw Error(
+            "Unable to load audio from path/URL since `AudioContext` is not available in your environment. " +
+            "As a result, audio data must be passed directly to the processor. " +
+            "If you are running in node.js, you can use an external library (e.g., https://github.com/audiojs/web-audio-api) to do this."
+        )
+    }
+    const response = await (await getFile(url)).arrayBuffer();
+    const audioCTX = new AudioContext({ sampleRate: sampling_rate });
+    const decoded = await audioCTX.decodeAudioData(response);
+    let audio;
+
+    // We now replicate HuggingFace's `ffmpeg_read` method:
+    if (decoded.numberOfChannels === 2) {
+        // When downmixing a stereo audio file to mono using the -ac 1 option in FFmpeg,
+        // the audio signal is summed across both channels to create a single mono channel.
+        // However, if the audio is at full scale (i.e. the highest possible volume level),
+        // the summing of the two channels can cause the audio signal to clip or distort.
+
+        // To prevent this clipping, FFmpeg applies a scaling factor of 1/sqrt(2) (~ 0.707)
+        // to the audio signal before summing the two channels. This scaling factor ensures
+        // that the combined audio signal will not exceed the maximum possible level, even
+        // if both channels are at full scale.
+
+        // After applying this scaling factor, the audio signal from both channels is summed
+        // to create a single mono channel. It's worth noting that this scaling factor is
+        // only applied when downmixing stereo audio to mono using the -ac 1 option in FFmpeg.
+        // If you're using a different downmixing method, or if you're not downmixing the
+        // audio at all, this scaling factor may not be needed.
+        const SCALING_FACTOR = Math.sqrt(2);
+
+        let left = decoded.getChannelData(0);
+        let right = decoded.getChannelData(1);
+
+        audio = new Float32Array(left.length);
+        for (let i = 0; i < decoded.length; ++i) {
+            audio[i] = SCALING_FACTOR * (left[i] + right[i]) / 2;
+        }
+
+    } else {
+        // If the audio is not stereo, we can just use the first channel:
+        audio = decoded.getChannelData(0);
+    }
+
+    return audio;
+}
\ No newline at end of file
diff --git a/src/backends/onnx.js b/src/backends/onnx.js
index bc4deb7..de8879d 100644
--- a/src/backends/onnx.js
+++ b/src/backends/onnx.js
@@ -1,13 +1,20 @@
-// Handler file for choosing the correct version of ONNX Runtime, based on the environment.
-// 
-// Ideally, we could import the `onnxruntime-web` and `onnxruntime-node` packages only when needed,
-// but dynamic imports don't seem to work with the current webpack version and/or configuration.
-// This is possibly due to the experimental nature of top-level await statements.
-// 
-// So, we just import both packages, and use the appropriate one based on the environment.
-//  - When running in node, we use `onnxruntime-node`.
-//  - When running in the browser, we use `onnxruntime-web` (`onnxruntime-node` is not bundled).
-
+/**
+ * @file Handler file for choosing the correct version of ONNX Runtime, based on the environment.
+ * Ideally, we could import the `onnxruntime-web` and `onnxruntime-node` packages only when needed,
+ * but dynamic imports don't seem to work with the current webpack version and/or configuration.
+ * This is possibly due to the experimental nature of top-level await statements.
+ * So, we just import both packages, and use the appropriate one based on the environment.
+ *   - When running in node, we use `onnxruntime-node`.
+ *   - When running in the browser, we use `onnxruntime-web` (`onnxruntime-node` is not bundled).
+ * 
+ * This module is not directly exported, but can be accessed through the environment variables:
+ * ```javascript
+ * import { env } from '@xenova/transformers';
+ * console.log(env.backends.onnx);
+ * ```
+ * 
+ * @module backends/onnx
+ */
 
 // NOTE: Import order matters here. We need to import `onnxruntime-node` before `onnxruntime-web`.
 import ONNX_NODE from 'onnxruntime-node';
@@ -15,7 +22,10 @@ import ONNX_WEB from 'onnxruntime-web';
 
 export let ONNX;
 
-export const executionProviders = ['wasm'];
+export const executionProviders = [
+    // 'webgpu',
+    'wasm'
+];
 
 if (typeof process !== 'undefined') {
     // Running in a node-like environment.
diff --git a/src/env.js b/src/env.js
index 9480787..fbbb37e 100644
--- a/src/env.js
+++ b/src/env.js
@@ -1,3 +1,27 @@
+/**
+ * @file Module used to configure Transformers.js. For the full list of possible options, @see {@link env}.
+ * 
+ * **Example:** Disable remote models.
+ * ```javascript
+ * import { env } from '@xenova/transformers';
+ * env.allowRemoteModels = false;
+ * ```
+ * 
+ * **Example:** Set local model path.
+ * ```javascript
+ * import { env } from '@xenova/transformers';
+ * env.localModelPath = '/path/to/local/models/';
+ * ```
+ * 
+ * **Example:** Set cache directory.
+ * ```javascript
+ * import { env } from '@xenova/transformers';
+ * env.cacheDir = '/path/to/cache/directory/';
+ * ```
+ * 
+ * @module env
+ */
+
 import fs from 'fs';
 import path from 'path';
 import url from 'url';
@@ -37,11 +61,23 @@ onnx_env.wasm.wasmPaths = RUNNING_LOCALLY
     : 'https://cdn.jsdelivr.net/npm/@xenova/transformers/dist/';
 
 
-// Global variable used to control exection, with suitable defaults
+/**
+ * Global variable used to control exection, with suitable defaults
+ * @property {object} backends Expose environment variables of different backends,
+ * allowing users to set these variables if they want to.
+ * @property {string} remoteHost Host URL to load models from. Defaults to the Hugging Face Hub.
+ * @property {string} remotePathTemplate Path template to fill in and append to `remoteHost` when loading models.
+ * @property {boolean} allowRemoteModels Whether to allow loading of remote files, defaults to `true`.
+ * If set to `false`, it will have the same effect as setting `local_files_only=true` when loading pipelines, models, tokenizers, processors, etc.
+ * @property {string} localModelPath Path to load local models from. Defaults to `/models/`.
+ * @property {boolean} useFS Whether to use the file system to load files. By default, it is true available.
+ * @property {string} __dirname Directory name of module. Useful for resolving local paths.
+ * @property {boolean} useBrowserCache Whether to use Cache API to cache models. By default, it is true if available.
+ * @property {boolean} useFSCache Whether to use the file system to cache files. By default, it is true available.
+ * @property {string} cacheDir The directory to use for caching files with the file system. By default, it is `./.cache`.
+*/
 export const env = {
-    // Expose environment variables of different backends, allowing users to set
-    // these variables if they want to.
-    // TODO - will be used when we add more backends
+    /////////////////// Backends settings ///////////////////
     backends: {
         // onnxruntime-web/onnxruntime-node
         onnx: onnx_env,
@@ -50,40 +86,27 @@ export const env = {
         tfjs: {},
     },
 
-    // URL to load models from. Defaults to the Hugging Face Hub.
-    remoteHost: 'https://huggingface.co/',
-    remotePathTemplate: '{model}/resolve/{revision}/',
-
-    // Whether to allow loading of remote files, defaults to `true`.
-    // If set to `false`, it will have the same effect as setting `local_files_only=true`
-    // when loading pipelines, models, tokenizers, processors, etc.
-    allowRemoteModels: true,
-
-    // Local URL to load models from.
-    localModelPath: localModelPath,
-
-    // Whether to use the file system to load files. By default, it is true available.
-    useFS: FS_AVAILABLE,
-
-    // Directory name of module. Useful for resolving local paths.
     __dirname,
 
+    /////////////////// Model settings ///////////////////
+    remoteHost: 'https://huggingface.co/',
+    remotePathTemplate: '{model}/resolve/{revision}/',
+    allowRemoteModels: true,
+    localModelPath: localModelPath,
+    useFS: FS_AVAILABLE,
+
     /////////////////// Cache settings ///////////////////
-    // Whether to use Cache API to cache models. By default, it is true if available.
     useBrowserCache: WEB_CACHE_AVAILABLE,
-
-    // Whether to use the file system to cache files. By default, it is true available.
     useFSCache: FS_AVAILABLE,
-
-    // The directory to use for caching files with the file system. By default, it is `./.cache`.
     cacheDir: DEFAULT_CACHE_DIR,
-    //////////////////////////////////////////////////////
 
+    //////////////////////////////////////////////////////
 }
 
 
 /**
  * @param {object} obj
+ * @private
  */
 function isEmpty(obj) {
     return Object.keys(obj).length === 0;
diff --git a/src/generation.js b/src/generation.js
index 9e93132..d55af08 100644
--- a/src/generation.js
+++ b/src/generation.js
@@ -1,3 +1,11 @@
+
+/**
+ * @file Classes, functions, and utilities for generation.
+ * 
+ * @todo Describe how to create a custom `GenerationConfig`.
+ * 
+ * @module generation
+ */
 import { Tensor } from './tensor_utils.js';
 import {
     Callable,
diff --git a/src/image_utils.js b/src/image_utils.js
index d24ab02..8f59797 100644
--- a/src/image_utils.js
+++ b/src/image_utils.js
@@ -1,4 +1,13 @@
 
+/**
+ * @file Helper module for image processing. 
+ * 
+ * These functions and classes are only used internally, 
+ * meaning an end-user shouldn't need to access anything here.
+ * 
+ * @module image_utils
+ */
+
 import fs from 'fs';
 import { isString } from './utils.js';
 import { env } from './env.js';
diff --git a/src/math_utils.js b/src/math_utils.js
index 5ac012f..0ba907b 100644
--- a/src/math_utils.js
+++ b/src/math_utils.js
@@ -1,4 +1,13 @@
 
+/**
+ * @file Helper module for mathematical processing. 
+ * 
+ * These functions and classes are only used internally, 
+ * meaning an end-user shouldn't need to access anything here.
+ * 
+ * @module math_utils
+ */
+
 /**
  * @typedef {Int8Array | Uint8Array | Uint8ClampedArray | Int16Array | Uint16Array | Int32Array | Uint32Array | Float32Array | Float64Array} TypedArray
  * @typedef {BigInt64Array | BigUint64Array} BigTypedArray
@@ -8,7 +17,7 @@
 /**
  * @param {TypedArray} input
  */
-export function interpolate(input, [in_channels, in_height, in_width], [out_height, out_width], mode = 'bilinear', align_corners = false) {
+export function interpolate_data(input, [in_channels, in_height, in_width], [out_height, out_width], mode = 'bilinear', align_corners = false) {
     // TODO use mode and align_corners
 
     // Output image dimensions
@@ -79,7 +88,7 @@ export function interpolate(input, [in_channels, in_height, in_width], [out_heig
 
 
 /**
- * Helper method to transpose a AnyTypedArray directly
+ * Helper method to transpose a `AnyTypedArray` directly
  * @param {T} array 
  * @template {AnyTypedArray} T 
  * @param {number[]} dims 
diff --git a/src/models.js b/src/models.js
index cc7b09d..bedfd12 100644
--- a/src/models.js
+++ b/src/models.js
@@ -1,6 +1,47 @@
+
+/**
+ * @file Definitions of all models available in Transformers.js.
+ * 
+ * **Example:** Load and run an `AutoModel`.
+ * 
+ * ```javascript
+ * import { AutoModel, AutoTokenizer } from '@xenova/transformers';
+ *
+ * let tokenizer = await AutoTokenizer.from_pretrained('Xenova/bert-base-uncased');
+ * let model = await AutoModel.from_pretrained('Xenova/bert-base-uncased');
+ *
+ * let inputs = await tokenizer('I love transformers!');
+ * let { logits } = await model(inputs);
+ * // Tensor {
+ * //     data: Float32Array(183132) [-7.117443084716797, -7.107812881469727, -7.092104911804199, ...]
+ * //     dims: (3) [1, 6, 30522],
+ * //     type: "float32",
+ * //     size: 183132,
+ * // }
+ * ```
+ * 
+ * We also provide other `AutoModel`s (listed below), which you can use in the same way as the Python library. For example:
+ * 
+ * **Example:** Load and run a `AutoModelForSeq2SeqLM`.
+ * ```javascript
+ * import { AutoModelForSeq2SeqLM, AutoTokenizer } from '@xenova/transformers';
+ * 
+ * let tokenizer = await AutoTokenizer.from_pretrained('t5-small');
+ * let model = await AutoModelForSeq2SeqLM.from_pretrained('t5-small');
+ *
+ * let { input_ids } = await tokenizer('translate English to German: I love transformers!');
+ * let outputs = await model.generate(input_ids);
+ * let decoded = await tokenizer.decode(outputs[0][0], { skip_special_tokens: true });
+ * // 'Ich liebe Transformatoren!'
+ * ```
+ * 
+ * @module models
+ */
+
 import {
     Callable,
     isIntegralNumber,
+    isTypedArray,
 } from './utils.js';
 
 import {
@@ -34,6 +75,7 @@ const { InferenceSession, Tensor: ONNXTensor } = ONNX;
 /**
  * @typedef {import('./utils/hub.js').PretrainedOptions} PretrainedOptions
  */
+
 //////////////////////////////////////////////////
 // Helper functions
 /**
@@ -741,6 +783,10 @@ export class PreTrainedModel extends Callable {
         } = {},
     ) {
 
+        if (!(inputs instanceof Tensor) && !isTypedArray(inputs) && !Array.isArray(inputs)) {
+            throw Error(`\`inputs\` must be a Tensor, TypedArray, or Array, but is "${inputs.constructor.name}".`);
+        }
+
         if (inputs.length === 0) {
             throw Error("Must supply a non-empty array of input token ids.")
         }
@@ -796,7 +842,7 @@ export class PreTrainedModel extends Callable {
                     extractedLogits.push(lastLogits)
                 }
                 let logits = cat(extractedLogits);
-                logits_processor(beam.output_token_ids, logits)
+                logits_processor(beam.output_token_ids, logits);
 
                 let sampledTokens = sampler(logits);
                 for (let [newTokenId, logProb] of sampledTokens) {
@@ -819,8 +865,8 @@ export class PreTrainedModel extends Callable {
             // Next, we get the best beams, per ID
             newest_beams = this.groupBeams(newest_beams).map(
                 group => group
-                    .sort((a, b) => b.score - a.score)  // sort based on score
-                    .slice(0, generation_config.num_beams)        // remove outside beam width
+                    .sort((a, b) => b.score - a.score)      // sort based on score
+                    .slice(0, generation_config.num_beams)  // remove outside beam width
             );
 
             // Flatten beams
@@ -832,6 +878,8 @@ export class PreTrainedModel extends Callable {
             }
         }
 
+        // TODO - Ensure that we can return non-batched outputs
+
         return this.groupBeams(beams).map(
             batch => {
                 if (generation_config.num_return_sequences > 1) {
@@ -929,19 +977,19 @@ export class PreTrainedModel extends Callable {
 }
 //////////////////////////////////////////////////
 // Base model output class
-class ModelOutput { }
+export class ModelOutput { }
 
 
 //////////////////////////////////////////////////
 // Bert models
-class BertPreTrainedModel extends PreTrainedModel { }
-class BertModel extends BertPreTrainedModel { }
+export class BertPreTrainedModel extends PreTrainedModel { }
+export class BertModel extends BertPreTrainedModel { }
 
 /**
  * BertForMaskedLM is a class representing a BERT model for masked language modeling.
  * @extends BertPreTrainedModel
  */
-class BertForMaskedLM extends BertPreTrainedModel {
+export class BertForMaskedLM extends BertPreTrainedModel {
     /**
      * Calls the model on new inputs.
      *
@@ -958,7 +1006,7 @@ class BertForMaskedLM extends BertPreTrainedModel {
  * BertForSequenceClassification is a class representing a BERT model for sequence classification.
  * @extends BertPreTrainedModel
  */
-class BertForSequenceClassification extends BertPreTrainedModel {
+export class BertForSequenceClassification extends BertPreTrainedModel {
     /**
      * Calls the model on new inputs.
      *
@@ -975,7 +1023,7 @@ class BertForSequenceClassification extends BertPreTrainedModel {
  * BertForTokenClassification is a class representing a BERT model for token classification.
  * @extends BertPreTrainedModel
  */
-class BertForTokenClassification extends BertPreTrainedModel {
+export class BertForTokenClassification extends BertPreTrainedModel {
     /**
      * Calls the model on new inputs.
      *
@@ -992,7 +1040,7 @@ class BertForTokenClassification extends BertPreTrainedModel {
  * BertForQuestionAnswering is a class representing a BERT model for question answering.
  * @extends BertPreTrainedModel
  */
-class BertForQuestionAnswering extends BertPreTrainedModel {
+export class BertForQuestionAnswering extends BertPreTrainedModel {
     /**
      * Calls the model on new inputs.
      *
@@ -1008,14 +1056,14 @@ class BertForQuestionAnswering extends BertPreTrainedModel {
 
 //////////////////////////////////////////////////
 // DistilBert models
-class DistilBertPreTrainedModel extends PreTrainedModel { }
-class DistilBertModel extends DistilBertPreTrainedModel { }
+export class DistilBertPreTrainedModel extends PreTrainedModel { }
+export class DistilBertModel extends DistilBertPreTrainedModel { }
 
 /**
  * DistilBertForSequenceClassification is a class representing a DistilBERT model for sequence classification.
  * @extends DistilBertPreTrainedModel
  */
-class DistilBertForSequenceClassification extends DistilBertPreTrainedModel {
+export class DistilBertForSequenceClassification extends DistilBertPreTrainedModel {
     /**
      * Calls the model on new inputs.
      *
@@ -1032,7 +1080,7 @@ class DistilBertForSequenceClassification extends DistilBertPreTrainedModel {
  * DistilBertForTokenClassification is a class representing a DistilBERT model for token classification.
  * @extends DistilBertPreTrainedModel
  */
-class DistilBertForTokenClassification extends DistilBertPreTrainedModel {
+export class DistilBertForTokenClassification extends DistilBertPreTrainedModel {
     /**
      * Calls the model on new inputs.
      *
@@ -1050,7 +1098,7 @@ class DistilBertForTokenClassification extends DistilBertPreTrainedModel {
  * DistilBertForQuestionAnswering is a class representing a DistilBERT model for question answering.
  * @extends DistilBertPreTrainedModel
  */
-class DistilBertForQuestionAnswering extends DistilBertPreTrainedModel {
+export class DistilBertForQuestionAnswering extends DistilBertPreTrainedModel {
     /**
      * Calls the model on new inputs.
      *
@@ -1067,7 +1115,7 @@ class DistilBertForQuestionAnswering extends DistilBertPreTrainedModel {
  * DistilBertForMaskedLM is a class representing a DistilBERT model for masking task.
  * @extends DistilBertPreTrainedModel
  */
-class DistilBertForMaskedLM extends DistilBertPreTrainedModel {
+export class DistilBertForMaskedLM extends DistilBertPreTrainedModel {
     /**
      * Calls the model on new inputs.
      *
@@ -1084,14 +1132,14 @@ class DistilBertForMaskedLM extends DistilBertPreTrainedModel {
 
 //////////////////////////////////////////////////
 // MobileBert models
-class MobileBertPreTrainedModel extends PreTrainedModel { }
-class MobileBertModel extends MobileBertPreTrainedModel { }
+export class MobileBertPreTrainedModel extends PreTrainedModel { }
+export class MobileBertModel extends MobileBertPreTrainedModel { }
 
 /**
  * MobileBertForMaskedLM is a class representing a MobileBERT model for masking task.
  * @extends MobileBertPreTrainedModel
  */
-class MobileBertForMaskedLM extends MobileBertPreTrainedModel {
+export class MobileBertForMaskedLM extends MobileBertPreTrainedModel {
     /**
      * Calls the model on new inputs.
      *
@@ -1107,7 +1155,7 @@ class MobileBertForMaskedLM extends MobileBertPreTrainedModel {
 /**
  * @extends MobileBertPreTrainedModel
  */
-class MobileBertForSequenceClassification extends MobileBertPreTrainedModel {
+export class MobileBertForSequenceClassification extends MobileBertPreTrainedModel {
     /**
      * Calls the model on new inputs.
      *
@@ -1123,7 +1171,7 @@ class MobileBertForSequenceClassification extends MobileBertPreTrainedModel {
 /**
  * @extends MobileBertPreTrainedModel
  */
-class MobileBertForQuestionAnswering extends MobileBertPreTrainedModel {
+export class MobileBertForQuestionAnswering extends MobileBertPreTrainedModel {
     /**
      * Calls the model on new inputs.
      *
@@ -1140,9 +1188,9 @@ class MobileBertForQuestionAnswering extends MobileBertPreTrainedModel {
 
 //////////////////////////////////////////////////
 // SqueezeBert models
-class SqueezeBertPreTrainedModel extends PreTrainedModel { }
-class SqueezeBertModel extends SqueezeBertPreTrainedModel { }
-class SqueezeBertForMaskedLM extends SqueezeBertPreTrainedModel {
+export class SqueezeBertPreTrainedModel extends PreTrainedModel { }
+export class SqueezeBertModel extends SqueezeBertPreTrainedModel { }
+export class SqueezeBertForMaskedLM extends SqueezeBertPreTrainedModel {
     /**
      * Calls the model on new inputs.
      *
@@ -1154,7 +1202,7 @@ class SqueezeBertForMaskedLM extends SqueezeBertPreTrainedModel {
         return new MaskedLMOutput(logits)
     }
 }
-class SqueezeBertForSequenceClassification extends SqueezeBertPreTrainedModel {
+export class SqueezeBertForSequenceClassification extends SqueezeBertPreTrainedModel {
     /**
      * Calls the model on new inputs.
      *
@@ -1166,7 +1214,7 @@ class SqueezeBertForSequenceClassification extends SqueezeBertPreTrainedModel {
         return new SequenceClassifierOutput(logits)
     }
 }
-class SqueezeBertForQuestionAnswering extends SqueezeBertPreTrainedModel {
+export class SqueezeBertForQuestionAnswering extends SqueezeBertPreTrainedModel {
     /**
      * Calls the model on new inputs.
      *
@@ -1183,9 +1231,9 @@ class SqueezeBertForQuestionAnswering extends SqueezeBertPreTrainedModel {
 
 //////////////////////////////////////////////////
 // Albert models
-class AlbertPreTrainedModel extends PreTrainedModel { }
-class AlbertModel extends AlbertPreTrainedModel { }
-class AlbertForSequenceClassification extends AlbertPreTrainedModel {
+export class AlbertPreTrainedModel extends PreTrainedModel { }
+export class AlbertModel extends AlbertPreTrainedModel { }
+export class AlbertForSequenceClassification extends AlbertPreTrainedModel {
     /**
      * Calls the model on new inputs.
      *
@@ -1197,7 +1245,7 @@ class AlbertForSequenceClassification extends AlbertPreTrainedModel {
         return new SequenceClassifierOutput(logits)
     }
 }
-class AlbertForQuestionAnswering extends AlbertPreTrainedModel {
+export class AlbertForQuestionAnswering extends AlbertPreTrainedModel {
     /**
      * Calls the model on new inputs.
      *
@@ -1209,7 +1257,7 @@ class AlbertForQuestionAnswering extends AlbertPreTrainedModel {
         return new QuestionAnsweringModelOutput(outputs.start_logits, outputs.end_logits);
     }
 }
-class AlbertForMaskedLM extends AlbertPreTrainedModel {
+export class AlbertForMaskedLM extends AlbertPreTrainedModel {
     /**
      * Calls the model on new inputs.
      *
@@ -1226,9 +1274,9 @@ class AlbertForMaskedLM extends AlbertPreTrainedModel {
 
 //////////////////////////////////////////////////
 // T5 models
-class T5PreTrainedModel extends PreTrainedModel { };
+export class T5PreTrainedModel extends PreTrainedModel { };
 
-class T5Model extends T5PreTrainedModel {
+export class T5Model extends T5PreTrainedModel {
     /**
      * Generates text based on the provided arguments.
      * @throws {Error} - Throws an error as the current model class (T5Model) is not compatible with `.generate()`.
@@ -1246,7 +1294,7 @@ class T5Model extends T5PreTrainedModel {
  * T5Model is a class representing a T5 model for conditional generation.
  * @extends T5PreTrainedModel
  */
-class T5ForConditionalGeneration extends T5PreTrainedModel {
+export class T5ForConditionalGeneration extends T5PreTrainedModel {
     /**
      * Creates a new instance of the `T5ForConditionalGeneration` class.
      * @param {object} config - The model configuration.
@@ -1336,9 +1384,9 @@ class T5ForConditionalGeneration extends T5PreTrainedModel {
 
 //////////////////////////////////////////////////
 // MT5 models
-class MT5PreTrainedModel extends PreTrainedModel { };
+export class MT5PreTrainedModel extends PreTrainedModel { };
 
-class MT5Model extends MT5PreTrainedModel {
+export class MT5Model extends MT5PreTrainedModel {
     /**
      * 
      * @param  {...any} args
@@ -1357,7 +1405,7 @@ class MT5Model extends MT5PreTrainedModel {
  *
  * @extends MT5PreTrainedModel
  */
-class MT5ForConditionalGeneration extends MT5PreTrainedModel {
+export class MT5ForConditionalGeneration extends MT5PreTrainedModel {
     /**
      * Creates a new instance of the `MT5ForConditionalGeneration` class.
      * @param {any} config - The model configuration.
@@ -1449,7 +1497,7 @@ class MT5ForConditionalGeneration extends MT5PreTrainedModel {
 
 //////////////////////////////////////////////////
 // Bart models
-class BartPretrainedModel extends PreTrainedModel { };
+export class BartPretrainedModel extends PreTrainedModel { };
 
 /**
  * BART encoder and decoder model.
@@ -1457,7 +1505,7 @@ class BartPretrainedModel extends PreTrainedModel { };
  * @hideconstructor
  * @extends BartPretrainedModel
  */
-class BartModel extends BartPretrainedModel {
+export class BartModel extends BartPretrainedModel {
     /**
      * Throws an error because the current model class (BartModel) is not compatible with `.generate()`.
      * 
@@ -1475,7 +1523,7 @@ class BartModel extends BartPretrainedModel {
  * BART model with a language model head for conditional generation.
  * @extends BartPretrainedModel
  */
-class BartForConditionalGeneration extends BartPretrainedModel {
+export class BartForConditionalGeneration extends BartPretrainedModel {
     /**
      * Creates a new instance of the `BartForConditionalGeneration` class.
      * @param {object} config - The configuration object for the Bart model.
@@ -1562,7 +1610,7 @@ class BartForConditionalGeneration extends BartPretrainedModel {
     }
 }
 
-class BartForSequenceClassification extends BartPretrainedModel {
+export class BartForSequenceClassification extends BartPretrainedModel {
     /**
      * Calls the model on new inputs.
      *
@@ -1579,14 +1627,14 @@ class BartForSequenceClassification extends BartPretrainedModel {
 
 //////////////////////////////////////////////////
 // Roberta models
-class RobertaPreTrainedModel extends PreTrainedModel { }
-class RobertaModel extends RobertaPreTrainedModel { }
+export class RobertaPreTrainedModel extends PreTrainedModel { }
+export class RobertaModel extends RobertaPreTrainedModel { }
 
 /**
  * RobertaForMaskedLM class for performing masked language modeling on Roberta models.
  * @extends RobertaPreTrainedModel
  */
-class RobertaForMaskedLM extends RobertaPreTrainedModel {
+export class RobertaForMaskedLM extends RobertaPreTrainedModel {
     /**
      * Calls the model on new inputs.
      *
@@ -1603,7 +1651,7 @@ class RobertaForMaskedLM extends RobertaPreTrainedModel {
  * RobertaForSequenceClassification class for performing sequence classification on Roberta models.
  * @extends RobertaPreTrainedModel
  */
-class RobertaForSequenceClassification extends RobertaPreTrainedModel {
+export class RobertaForSequenceClassification extends RobertaPreTrainedModel {
     /**
      * Calls the model on new inputs.
      *
@@ -1620,7 +1668,7 @@ class RobertaForSequenceClassification extends RobertaPreTrainedModel {
  * RobertaForQuestionAnswering class for performing question answering on Roberta models.
  * @extends RobertaPreTrainedModel
  */
-class RobertaForQuestionAnswering extends RobertaPreTrainedModel {
+export class RobertaForQuestionAnswering extends RobertaPreTrainedModel {
     /**
      * Calls the model on new inputs.
      *
@@ -1636,13 +1684,13 @@ class RobertaForQuestionAnswering extends RobertaPreTrainedModel {
 
 //////////////////////////////////////////////////
 // T5 models
-class WhisperPreTrainedModel extends PreTrainedModel { };
+export class WhisperPreTrainedModel extends PreTrainedModel { };
 
 /**
  * WhisperModel class for training Whisper models without a language model head.
  * @extends WhisperPreTrainedModel
  */
-class WhisperModel extends WhisperPreTrainedModel {
+export class WhisperModel extends WhisperPreTrainedModel {
     /**
      * Throws an error when attempting to generate output since this model doesn't have a language model head.
      * @throws Error
@@ -1660,7 +1708,7 @@ class WhisperModel extends WhisperPreTrainedModel {
  * WhisperForConditionalGeneration class for generating conditional outputs from Whisper models.
  * @extends WhisperPreTrainedModel
  */
-class WhisperForConditionalGeneration extends WhisperPreTrainedModel {
+export class WhisperForConditionalGeneration extends WhisperPreTrainedModel {
     /**
      * Creates a new instance of the `WhisperForConditionalGeneration` class.
      * @param {Object} config - Configuration object for the model.
@@ -1788,7 +1836,7 @@ class WhisperForConditionalGeneration extends WhisperPreTrainedModel {
  * Vision Encoder-Decoder model based on OpenAI's GPT architecture for image captioning and other vision tasks
  * @extends PreTrainedModel
  */
-class VisionEncoderDecoderModel extends PreTrainedModel {
+export class VisionEncoderDecoderModel extends PreTrainedModel {
     /**
      * Creates a new instance of the `VisionEncoderDecoderModel` class.
      * @param {object} config - The configuration object specifying the hyperparameters and other model settings.
@@ -1881,8 +1929,8 @@ class VisionEncoderDecoderModel extends PreTrainedModel {
 
 //////////////////////////////////////////////////
 // CLIP models
-class CLIPPreTrainedModel extends PreTrainedModel { }
-class CLIPModel extends CLIPPreTrainedModel {
+export class CLIPPreTrainedModel extends PreTrainedModel { }
+export class CLIPModel extends CLIPPreTrainedModel {
 
 }
 
@@ -1890,12 +1938,12 @@ class CLIPModel extends CLIPPreTrainedModel {
 
 //////////////////////////////////////////////////
 // GPT2 models
-class GPT2PreTrainedModel extends PreTrainedModel { }
+export class GPT2PreTrainedModel extends PreTrainedModel { }
 /**
  * GPT2Model is not compatible with `.generate()`, as it doesn't have a language model head.
  * @extends GPT2PreTrainedModel
  */
-class GPT2Model extends GPT2PreTrainedModel {
+export class GPT2Model extends GPT2PreTrainedModel {
     /**
      * 
      * @param  {...any} args 
@@ -1913,7 +1961,7 @@ class GPT2Model extends GPT2PreTrainedModel {
  * GPT-2 language model head on top of the GPT-2 base model. This model is suitable for text generation tasks.
  * @extends GPT2PreTrainedModel
  */
-class GPT2LMHeadModel extends GPT2PreTrainedModel {
+export class GPT2LMHeadModel extends GPT2PreTrainedModel {
     /**
      * Creates a new instance of the `GPT2LMHeadModel` class.
      * @param {object} config - The configuration of the model.
@@ -1969,12 +2017,12 @@ class GPT2LMHeadModel extends GPT2PreTrainedModel {
     }
 
 }
-// class GPT2ForSequenceClassification extends GPT2PreTrainedModel {
+// export class GPT2ForSequenceClassification extends GPT2PreTrainedModel {
 // TODO
 // }
 //////////////////////////////////////////////////
-class GPTNeoPreTrainedModel extends PreTrainedModel { }
-class GPTNeoModel extends GPTNeoPreTrainedModel {
+export class GPTNeoPreTrainedModel extends PreTrainedModel { }
+export class GPTNeoModel extends GPTNeoPreTrainedModel {
     /**
      * 
      * @param  {...any} args 
@@ -1988,7 +2036,7 @@ class GPTNeoModel extends GPTNeoPreTrainedModel {
     }
 }
 
-class GPTNeoForCausalLM extends GPTNeoPreTrainedModel {
+export class GPTNeoForCausalLM extends GPTNeoPreTrainedModel {
     /**
      * Creates a new instance of the `GPTNeoForCausalLM` class.
      * @param {object} config - The configuration of the model.
@@ -2046,13 +2094,13 @@ class GPTNeoForCausalLM extends GPTNeoPreTrainedModel {
 
 //////////////////////////////////////////////////
 // CodeGen models
-class CodeGenPreTrainedModel extends PreTrainedModel { }
+export class CodeGenPreTrainedModel extends PreTrainedModel { }
 /**
  * CodeGenModel is a class representing a code generation model without a language model head.
  * 
  * @extends CodeGenPreTrainedModel
  */
-class CodeGenModel extends CodeGenPreTrainedModel {
+export class CodeGenModel extends CodeGenPreTrainedModel {
     /**
      * Throws an error indicating that the current model class is not compatible with `.generate()`,
      * as it doesn't have a language model head.
@@ -2073,7 +2121,7 @@ class CodeGenModel extends CodeGenPreTrainedModel {
  * CodeGenForCausalLM is a class that represents a code generation model based on the GPT-2 architecture. It extends the `CodeGenPreTrainedModel` class.
  * @extends CodeGenPreTrainedModel
  */
-class CodeGenForCausalLM extends CodeGenPreTrainedModel {
+export class CodeGenForCausalLM extends CodeGenPreTrainedModel {
     /**
      * Creates a new instance of the `CodeGenForCausalLM` class.
     * @param {object} config The model configuration object.
@@ -2132,8 +2180,8 @@ class CodeGenForCausalLM extends CodeGenPreTrainedModel {
 //////////////////////////////////////////////////
 
 //////////////////////////////////////////////////
-class ViTPreTrainedModel extends PreTrainedModel { }
-class ViTForImageClassification extends ViTPreTrainedModel {
+export class ViTPreTrainedModel extends PreTrainedModel { }
+export class ViTForImageClassification extends ViTPreTrainedModel {
     /**
      * @param {any} model_inputs
      */
@@ -2145,8 +2193,8 @@ class ViTForImageClassification extends ViTPreTrainedModel {
 //////////////////////////////////////////////////
 
 //////////////////////////////////////////////////
-class DetrPreTrainedModel extends PreTrainedModel { }
-class DetrForObjectDetection extends DetrPreTrainedModel {
+export class DetrPreTrainedModel extends PreTrainedModel { }
+export class DetrForObjectDetection extends DetrPreTrainedModel {
     /**
      * @param {any} model_inputs
      */
@@ -2156,7 +2204,7 @@ class DetrForObjectDetection extends DetrPreTrainedModel {
     }
 }
 
-class DetrForSegmentation extends DetrPreTrainedModel {
+export class DetrForSegmentation extends DetrPreTrainedModel {
     /**
      * Runs the model with the provided inputs
      * @param {Object} model_inputs - Model inputs
@@ -2168,7 +2216,7 @@ class DetrForSegmentation extends DetrPreTrainedModel {
     }
 }
 
-class DetrObjectDetectionOutput extends ModelOutput {
+export class DetrObjectDetectionOutput extends ModelOutput {
     /**
      * @param {any} logits
      * @param {any} pred_boxes
@@ -2180,7 +2228,7 @@ class DetrObjectDetectionOutput extends ModelOutput {
     }
 }
 
-class DetrSegmentationOutput extends ModelOutput {
+export class DetrSegmentationOutput extends ModelOutput {
 
     /**
      * @param {Tensor} logits - The output logits of the model.
@@ -2199,9 +2247,9 @@ class DetrSegmentationOutput extends ModelOutput {
 
 //////////////////////////////////////////////////
 // MarianMT models
-class MarianPreTrainedModel extends PreTrainedModel { };
+export class MarianPreTrainedModel extends PreTrainedModel { };
 
-class MarianModel extends MarianPreTrainedModel {
+export class MarianModel extends MarianPreTrainedModel {
     /**
      * 
      * @param  {...any} args 
@@ -2215,7 +2263,7 @@ class MarianModel extends MarianPreTrainedModel {
     }
 }
 
-class MarianMTModel extends MarianPreTrainedModel {
+export class MarianMTModel extends MarianPreTrainedModel {
     /**
      * Creates a new instance of the `MarianMTModel` class.
     * @param {object} config The model configuration object.
@@ -2304,9 +2352,9 @@ class MarianMTModel extends MarianPreTrainedModel {
 
 //////////////////////////////////////////////////
 // M2M100 models
-class M2M100PreTrainedModel extends PreTrainedModel { };
+export class M2M100PreTrainedModel extends PreTrainedModel { };
 
-class M2M100Model extends M2M100PreTrainedModel {
+export class M2M100Model extends M2M100PreTrainedModel {
     /**
      * 
      * @param  {...any} args 
@@ -2320,7 +2368,7 @@ class M2M100Model extends M2M100PreTrainedModel {
     }
 }
 
-class M2M100ForConditionalGeneration extends M2M100PreTrainedModel {
+export class M2M100ForConditionalGeneration extends M2M100PreTrainedModel {
     /**
      * Creates a new instance of the `M2M100ForConditionalGeneration` class.
     * @param {object} config The model configuration object.
@@ -2416,7 +2464,7 @@ class M2M100ForConditionalGeneration extends M2M100PreTrainedModel {
  * Base class of all AutoModels. Contains the `from_pretrained` function
  * which is used to instantiate pretrained models.
  */
-class PretrainedMixin {
+export class PretrainedMixin {
     /**
      * Mapping from model type to model class.
      */
@@ -2508,6 +2556,7 @@ export class AutoModel extends PretrainedMixin {
         'mobilebert': MobileBertModel,
         'squeezebert': SqueezeBertModel,
         'marian': MarianModel,
+        'm2m_100': M2M100Model,
     }
 }
 
@@ -2678,7 +2727,7 @@ export class AutoModelForObjectDetection extends PretrainedMixin {
 //////////////////////////////////////////////////
 
 //////////////////////////////////////////////////
-class Seq2SeqLMOutput extends ModelOutput {
+export class Seq2SeqLMOutput extends ModelOutput {
     /**
      * @param {Tensor} logits - The output logits of the model.
      * @param {Array} past_key_values - An array of key/value pairs that represent the previous state of the model.
@@ -2692,7 +2741,7 @@ class Seq2SeqLMOutput extends ModelOutput {
     }
 }
 
-class SequenceClassifierOutput extends ModelOutput {
+export class SequenceClassifierOutput extends ModelOutput {
     /**
      * @param {Tensor} logits 
      */
@@ -2702,7 +2751,7 @@ class SequenceClassifierOutput extends ModelOutput {
     }
 }
 
-class TokenClassifierOutput extends ModelOutput {
+export class TokenClassifierOutput extends ModelOutput {
     /**
      * @param {Tensor} logits 
      */
@@ -2713,7 +2762,7 @@ class TokenClassifierOutput extends ModelOutput {
 }
 
 
-class MaskedLMOutput extends ModelOutput {
+export class MaskedLMOutput extends ModelOutput {
     /**
      * @param {Tensor} logits 
      */
@@ -2723,7 +2772,7 @@ class MaskedLMOutput extends ModelOutput {
     }
 }
 
-class QuestionAnsweringModelOutput extends ModelOutput {
+export class QuestionAnsweringModelOutput extends ModelOutput {
     /**
      * @param {Float32Array} start_logits - The logits for start positions of the answer.
      * @param {Float32Array} end_logits - The logits for end positions of the answer.
diff --git a/src/pipelines.js b/src/pipelines.js
index 32918f8..3a8e71c 100644
--- a/src/pipelines.js
+++ b/src/pipelines.js
@@ -1,3 +1,18 @@
+/**
+ * @file Pipelines provide a high-level, easy to use, API for running machine learning models.
+ * 
+ * **Example:** Instantiate pipeline using the `pipeline` function.
+ * ```javascript
+ * import { pipeline } from '@xenova/transformers';
+ * 
+ * let pipeline = await pipeline('sentiment-analysis');
+ * let result = await pipeline('I love transformers!');
+ * // [{'label': 'POSITIVE', 'score': 0.999817686}]
+ * ```
+ * 
+ * @module pipelines
+ */
+
 import {
     Callable,
     isString,
@@ -10,10 +25,11 @@ import {
     dot
 } from './math_utils.js';
 import {
-    getFile,
-} from './utils/hub.js';
+    read_audio
+} from './audio_utils.js';
 import {
-    AutoTokenizer
+    AutoTokenizer,
+    PreTrainedTokenizer,
 } from './tokenizers.js';
 import {
     AutoModel,
@@ -26,7 +42,8 @@ import {
     AutoModelForVision2Seq,
     AutoModelForImageClassification,
     AutoModelForImageSegmentation,
-    AutoModelForObjectDetection
+    AutoModelForObjectDetection,
+    PreTrainedModel,
 } from './models.js';
 import {
     AutoProcessor,
@@ -40,7 +57,7 @@ import { CustomImage } from './image_utils.js';
  * Prepare images for further tasks.
  * @param {any[]} images - images to prepare.
  * @returns {Promise<any[]>} - returns processed images.
- * @async
+ * @private
  */
 async function prepareImages(images) {
     if (!Array.isArray(images)) {
@@ -56,12 +73,12 @@ async function prepareImages(images) {
  * Pipeline class for executing a natural language processing task.
  * @extends Callable
  */
-class Pipeline extends Callable {
+export class Pipeline extends Callable {
     /**
      * Creates a new instance of Pipeline.
      * @param {string} task - The natural language processing task to be performed.
-     * @param {object} tokenizer - The tokenizer object to be used for tokenizing input texts.
-     * @param {object} model - The model object to be used for processing input texts.
+     * @param {PreTrainedTokenizer} tokenizer - The tokenizer object to be used for tokenizing input texts.
+     * @param {PreTrainedModel} model - The model object to be used for processing input texts.
      */
     constructor(task, tokenizer, model) {
         super();
@@ -101,7 +118,7 @@ class Pipeline extends Callable {
  * TextClassificationPipeline class for executing a text classification task.
  * @extends Pipeline
  */
-class TextClassificationPipeline extends Pipeline {
+export class TextClassificationPipeline extends Pipeline {
     /**
      * Executes the text classification task.
      * @param {any} texts - The input texts to be classified.
@@ -142,7 +159,7 @@ class TextClassificationPipeline extends Pipeline {
  * TokenClassificationPipeline class for executing a token classification task.
  * @extends Pipeline
  */
-class TokenClassificationPipeline extends Pipeline {
+export class TokenClassificationPipeline extends Pipeline {
     /**
      * Executes the token classification task.
      * @param {any} texts - The input texts to be classified.
@@ -211,7 +228,7 @@ class TokenClassificationPipeline extends Pipeline {
  * QuestionAnsweringPipeline class for executing a question answering task.
  * @extends Pipeline
  */
-class QuestionAnsweringPipeline extends Pipeline {
+export class QuestionAnsweringPipeline extends Pipeline {
     /**
      * Executes the question answering task.
      * @param {string|string[]} question - The question(s) to be answered.
@@ -275,7 +292,7 @@ class QuestionAnsweringPipeline extends Pipeline {
  * Class representing a fill-mask pipeline for natural language processing.
  * @extends Pipeline
  */
-class FillMaskPipeline extends Pipeline {
+export class FillMaskPipeline extends Pipeline {
     /**
      * @param {any} texts
      */
@@ -328,12 +345,11 @@ class FillMaskPipeline extends Pipeline {
  * Text2TextGenerationPipeline class for generating text using a model that performs text-to-text generation tasks.
  * @extends Pipeline
  */
-class Text2TextGenerationPipeline extends Pipeline {
+export class Text2TextGenerationPipeline extends Pipeline {
     _key = null;
 
     /**
      * Fill the masked token in the text(s) given as inputs.
-     * @async
      * @param {string|string[]} texts - The text or array of texts to be processed.
      * @param {Object} [options={}] - Options for the fill-mask pipeline.
      * @param {number} [options.topk=5] - The number of top-k predictions to return.
@@ -399,7 +415,7 @@ class Text2TextGenerationPipeline extends Pipeline {
  * A pipeline for summarization tasks, inheriting from Text2TextGenerationPipeline.
  * @extends Text2TextGenerationPipeline
  */
-class SummarizationPipeline extends Text2TextGenerationPipeline {
+export class SummarizationPipeline extends Text2TextGenerationPipeline {
     _key = 'summary_text';
 }
 
@@ -407,7 +423,7 @@ class SummarizationPipeline extends Text2TextGenerationPipeline {
  * TranslationPipeline class to translate text from one language to another using the provided model and tokenizer.
  * @extends Text2TextGenerationPipeline
  */
-class TranslationPipeline extends Text2TextGenerationPipeline {
+export class TranslationPipeline extends Text2TextGenerationPipeline {
     _key = 'translation_text';
 }
 
@@ -415,10 +431,9 @@ class TranslationPipeline extends Text2TextGenerationPipeline {
  * A pipeline for generating text based on an input prompt.
  * @extends Pipeline
  */
-class TextGenerationPipeline extends Pipeline {
+export class TextGenerationPipeline extends Pipeline {
     /**
      * Generates text based on an input prompt.
-     * @async
      * @param {any} texts - The input prompt or prompts to generate text from.
      * @param {object} [generate_kwargs={}] - Additional arguments for text generation.
      * @returns {Promise<any>} - The generated text or texts.
@@ -466,12 +481,12 @@ class TextGenerationPipeline extends Pipeline {
  * Class representing an Zero Shot Classification Pipeline that should only be used with zero shot classification tasks.
  * @extends Pipeline
  */
-class ZeroShotClassificationPipeline extends Pipeline {
+export class ZeroShotClassificationPipeline extends Pipeline {
 
     /**
      * @param {string} task
-     * @param {any} tokenizer
-     * @param {any} model
+     * @param {PreTrainedTokenizer} tokenizer
+     * @param {PreTrainedModel} model
      */
     constructor(task, tokenizer, model) {
         super(task, tokenizer, model);
@@ -577,7 +592,7 @@ class ZeroShotClassificationPipeline extends Pipeline {
  * 
  * @todo Make sure this works for other models than `sentence-transformers`.
  */
-class FeatureExtractionPipeline extends Pipeline {
+export class FeatureExtractionPipeline extends Pipeline {
     /**
      * Private method to perform mean pooling of the last hidden state followed by a normalization step.
      * @param {Tensor} last_hidden_state - Tensor of shape [batchSize, seqLength, embedDim]
@@ -669,14 +684,14 @@ class FeatureExtractionPipeline extends Pipeline {
  * A class representing an automatic speech recognition pipeline.
  * @extends Pipeline
  */
-class AutomaticSpeechRecognitionPipeline extends Pipeline {
+export class AutomaticSpeechRecognitionPipeline extends Pipeline {
 
     /**
      * Creates an instance of AutomaticSpeechRecognitionPipeline.
-     * @param {string} task - The type of the task for this pipeline. Currently only "asr" is supported.
-     * @param {object} tokenizer - The tokenizer to be used for pre-processing inputs.
-     * @param {object} model - The model to be used for the task.
-     * @param {object} processor - The processor to be used for pre-processing audio inputs.
+     * @param {string} task - The type of the task for this pipeline.
+     * @param {PreTrainedTokenizer} tokenizer - The tokenizer to be used for pre-processing inputs.
+     * @param {PreTrainedModel} model - The model to be used for the task.
+     * @param {Processor} processor - The processor to be used for pre-processing audio inputs.
      */
     constructor(task, tokenizer, model, processor) {
         super(task, tokenizer, model);
@@ -692,52 +707,7 @@ class AutomaticSpeechRecognitionPipeline extends Pipeline {
      */
     async _preprocess(audio, sampling_rate) {
         if (isString(audio)) {
-            // Attempting to load from path
-
-            if (typeof AudioContext === 'undefined') {
-                // Running in node or an environment without AudioContext
-                throw Error(
-                    "Unable to load audio from path/URL since `AudioContext` is not available in your environment. " +
-                    "As a result, audio data must be passed directly to the processor. " +
-                    "If you are running in node.js, you can use an external library (e.g., https://github.com/audiojs/web-audio-api) to do this."
-                )
-            }
-            const response = await (await getFile(audio)).arrayBuffer();
-            const audioCTX = new AudioContext({ sampleRate: sampling_rate });
-            const decoded = await audioCTX.decodeAudioData(response);
-
-            // We now replicate HuggingFace's `ffmpeg_read` method:
-
-            if (decoded.numberOfChannels === 2) {
-                // When downmixing a stereo audio file to mono using the -ac 1 option in FFmpeg,
-                // the audio signal is summed across both channels to create a single mono channel.
-                // However, if the audio is at full scale (i.e. the highest possible volume level),
-                // the summing of the two channels can cause the audio signal to clip or distort.
-
-                // To prevent this clipping, FFmpeg applies a scaling factor of 1/sqrt(2) (~ 0.707)
-                // to the audio signal before summing the two channels. This scaling factor ensures
-                // that the combined audio signal will not exceed the maximum possible level, even
-                // if both channels are at full scale.
-
-                // After applying this scaling factor, the audio signal from both channels is summed
-                // to create a single mono channel. It's worth noting that this scaling factor is
-                // only applied when downmixing stereo audio to mono using the -ac 1 option in FFmpeg.
-                // If you're using a different downmixing method, or if you're not downmixing the
-                // audio at all, this scaling factor may not be needed.
-                const SCALING_FACTOR = Math.sqrt(2);
-
-                let left = decoded.getChannelData(0);
-                let right = decoded.getChannelData(1);
-
-                audio = new Float32Array(left.length);
-                for (let i = 0; i < decoded.length; i++) {
-                    audio[i] = SCALING_FACTOR * (left[i] + right[i]) / 2;
-                }
-
-            } else {
-                // If the audio is not stereo, we can just use the first channel:
-                audio = decoded.getChannelData(0);
-            }
+            audio = await read_audio(audio, sampling_rate);
         }
 
         return audio;
@@ -854,13 +824,13 @@ class AutomaticSpeechRecognitionPipeline extends Pipeline {
  * A pipeline for performing image-to-text tasks.
  * @extends Pipeline
  */
-class ImageToTextPipeline extends Pipeline {
+export class ImageToTextPipeline extends Pipeline {
     /**
      * Create an instance of ImageToTextPipeline.
      * @param {string} task - The task name.
-     * @param {object} tokenizer - The tokenizer to use.
-     * @param {object} model - The generator model to use.
-     * @param {object} processor - The image processor to use.
+     * @param {PreTrainedTokenizer} tokenizer - The tokenizer to use.
+     * @param {PreTrainedModel} model - The generator model to use.
+     * @param {Processor} processor - The image processor to use.
      */
     constructor(task, tokenizer, model, processor) {
         super(task, tokenizer, model);
@@ -897,12 +867,12 @@ class ImageToTextPipeline extends Pipeline {
  * A class representing an image classification pipeline.
  * @extends Pipeline
  */
-class ImageClassificationPipeline extends Pipeline {
+export class ImageClassificationPipeline extends Pipeline {
     /**
      * Create a new ImageClassificationPipeline.
      * @param {string} task - The task of the pipeline.
-     * @param {Object} model - The model to use for classification.
-     * @param {Function} processor - The function to preprocess images.
+     * @param {PreTrainedModel} model - The model to use for classification.
+     * @param {Processor} processor - The function to preprocess images.
      */
     constructor(task, model, processor) {
         super(task, null, model); // TODO tokenizer
@@ -911,7 +881,6 @@ class ImageClassificationPipeline extends Pipeline {
 
     /**
      * Classify the given images.
-     * @async
      * @param {any} images - The images to classify.
      * @param {Object} options - The options to use for classification.
      * @param {number} [options.topk=1] - The number of top results to return.
@@ -953,11 +922,11 @@ class ImageClassificationPipeline extends Pipeline {
  * ImageSegmentationPipeline class for executing an image-segmentation task.
  * @extends Pipeline
  */
-class ImageSegmentationPipeline extends Pipeline {
+export class ImageSegmentationPipeline extends Pipeline {
     /**
      * Create a new ImageSegmentationPipeline.
-     * @param {string} task - The task of the pipeline.
-     * @param {Object} model - The model to use for classification.
+     * @param {PreTrainedTokenizer} task - The task of the pipeline.
+     * @param {PreTrainedModel} model - The model to use for classification.
      * @param {Processor} processor - The function to preprocess images.
      */
     constructor(task, model, processor) {
@@ -1067,14 +1036,14 @@ class ImageSegmentationPipeline extends Pipeline {
  * Class representing a zero-shot image classification pipeline.
  * @extends Pipeline
  */
-class ZeroShotImageClassificationPipeline extends Pipeline {
+export class ZeroShotImageClassificationPipeline extends Pipeline {
 
     /**
      * Create a zero-shot image classification pipeline.
      * @param {string} task - The task of the pipeline.
-     * @param {Object} tokenizer - The tokenizer to use.
-     * @param {Object} model - The model to use.
-     * @param {Function} processor - The image processing function.
+     * @param {PreTrainedTokenizer} tokenizer - The tokenizer to use.
+     * @param {PreTrainedModel} model - The model to use.
+     * @param {Processor} processor - The image processing function.
      */
     constructor(task, tokenizer, model, processor) {
         super(task, tokenizer, model);
@@ -1129,11 +1098,11 @@ class ZeroShotImageClassificationPipeline extends Pipeline {
 }
 
 
-class ObjectDetectionPipeline extends Pipeline {
+export class ObjectDetectionPipeline extends Pipeline {
     /**
-     * @param {string} task
-     * @param {any} model
-     * @param {any} processor
+     * @param {PreTrainedTokenizer} task
+     * @param {PreTrainedModel} model
+     * @param {Processor} processor
      */
     constructor(task, model, processor) {
         super(task, null, model); // TODO tokenizer
@@ -1379,11 +1348,9 @@ const TASK_ALIASES = {
  */
 
 /**
- * Constructs a pipeline for a specified task with optional model and progress callback.
+ * Utility factory method to build a [`Pipeline`] object.
  *
- * @async
- * @function
- * @param {string} task - The task to perform, e.g. "text-generation".
+ * @param {string} task - The task defining which pipeline will be returned.
  * @param {string} [model=null] - The name of the pre-trained model to use. If not specified, the default model for the task will be used.
  * @param {PretrainedOptions} [options] - Optional parameters for the pipeline.
  * @returns {Promise<Pipeline>} A Pipeline object for the specified task.
@@ -1461,6 +1428,7 @@ export async function pipeline(
  * Compute the Cartesian product of given arrays
  * @param {...Array} a - Arrays to compute the product
  * @returns {Array} - Returns the computed Cartesian product as an array
+ * @private
  */
 function product(...a) {
     // Cartesian product of items
diff --git a/src/processors.js b/src/processors.js
index 24a1168..fee26b9 100644
--- a/src/processors.js
+++ b/src/processors.js
@@ -1,4 +1,24 @@
 
+/**
+ * @file Processors are used to prepare non-textual inputs (e.g., image or audio) for a model.
+ * 
+ * **Example:** Using a `WhisperProcessor` to prepare an audio input for a model.
+ * ```javascript
+ * import { AutoProcessor, read_audio } from '@xenova/transformers';
+ *
+ * let processor = await AutoProcessor.from_pretrained('openai/whisper-tiny.en');
+ * let audio = await read_audio('https://huggingface.co/datasets/Narsil/asr_dummy/resolve/main/mlk.flac');
+ * let { input_features } = await processor(audio);
+ * // Tensor {
+ * //   data: Float32Array(240000) [0.4752984642982483, 0.5597258806228638, 0.56434166431427, ...],
+ * //   dims: [1, 80, 3000],
+ * //   type: 'float32',
+ * //   size: 240000,
+ * // }
+ * ```
+ * 
+ * @module processors
+ */
 import {
     Callable,
 } from './utils.js';
@@ -24,7 +44,7 @@ import { CustomImage } from './image_utils.js';
  *
  * @extends Callable
  */
-class FeatureExtractor extends Callable {
+export class FeatureExtractor extends Callable {
     /**
      * Constructs a new FeatureExtractor instance.
      *
@@ -41,7 +61,7 @@ class FeatureExtractor extends Callable {
  *
  * @extends FeatureExtractor
  */
-class ImageFeatureExtractor extends FeatureExtractor {
+export class ImageFeatureExtractor extends FeatureExtractor {
 
     /**
      * Constructs a new ViTFeatureExtractor instance.
@@ -221,14 +241,14 @@ class ImageFeatureExtractor extends FeatureExtractor {
 
 }
 
-class ViTFeatureExtractor extends ImageFeatureExtractor { }
+export class ViTFeatureExtractor extends ImageFeatureExtractor { }
 
 /**
  * Detr Feature Extractor.
  *
  * @extends ImageFeatureExtractor
  */
-class DetrFeatureExtractor extends ImageFeatureExtractor {
+export class DetrFeatureExtractor extends ImageFeatureExtractor {
     /**
      * Calls the feature extraction process on an array of image
      * URLs, preprocesses each image, and concatenates the resulting
@@ -605,7 +625,7 @@ class DetrFeatureExtractor extends ImageFeatureExtractor {
 }
 
 
-class WhisperFeatureExtractor extends FeatureExtractor {
+export class WhisperFeatureExtractor extends FeatureExtractor {
 
     /**
      * Calculates the index offset for a given index and window size.
@@ -921,7 +941,6 @@ class WhisperFeatureExtractor extends FeatureExtractor {
      * Asynchronously extracts features from a given audio using the provided configuration.
      * @param {Float32Array} audio - The audio data as a Float32Array.
      * @returns {Promise<{ input_features: Tensor }>} - A Promise resolving to an object containing the extracted input features as a Tensor.
-     * @async
     */
     async _call(audio) {
         // audio is a float32array
@@ -933,7 +952,7 @@ class WhisperFeatureExtractor extends FeatureExtractor {
                 "remember to specify `chunk_length_s` and/or `stride_length_s`."
             );
         }
-        let waveform = audio.slice(0, this.config.n_samples)
+        let waveform = audio.slice(0, this.config.n_samples);
 
         let features = this._extract_fbank_features(waveform);
 
@@ -965,7 +984,6 @@ export class Processor extends Callable {
      * Calls the feature_extractor function with the given input.
      * @param {any} input - The input to extract features from.
      * @returns {Promise<any>} A Promise that resolves with the extracted features.
-     * @async
      */
     async _call(input) {
         return await this.feature_extractor(input);
@@ -976,12 +994,11 @@ export class Processor extends Callable {
  * Represents a WhisperProcessor that extracts features from an audio input.
  * @extends Processor
  */
-class WhisperProcessor extends Processor {
+export class WhisperProcessor extends Processor {
     /**
      * Calls the feature_extractor function with the given audio input.
      * @param {any} audio - The audio input to extract features from.
      * @returns {Promise<any>} A Promise that resolves with the extracted features.
-     * @async
      */
     async _call(audio) {
         return await this.feature_extractor(audio)
diff --git a/src/samplers.js b/src/samplers.js
index 0c28a73..feb4774 100644
--- a/src/samplers.js
+++ b/src/samplers.js
@@ -1,3 +1,12 @@
+/**
+ * @file Helper module sampling from a model that can generate (i.e., has `.generate()`).
+ * 
+ * These classes are only used internally, meaning an end-user
+ * shouldn't need to access anything here.
+ * 
+ * @module samplers
+ */
+
 import {
     Callable,
 } from "./utils.js";
diff --git a/src/tensor_utils.js b/src/tensor_utils.js
index a0b7e26..f07a968 100644
--- a/src/tensor_utils.js
+++ b/src/tensor_utils.js
@@ -1,7 +1,16 @@
+/**
+ * @file Helper module for `Tensor` processing.
+ * 
+ * These functions and classes are only used internally, 
+ * meaning an end-user shouldn't need to access anything here.
+ * 
+ * @module tensor_utils
+ */
+
 import { ONNX } from './backends/onnx.js';
 
 import {
-    interpolate as interpolate_data,
+    interpolate_data,
     transpose_data
 } from './math_utils.js';
 
@@ -12,7 +21,6 @@ import {
 
 const ONNXTensor = ONNX.Tensor;
 
-// TODO: fix error below
 export class Tensor extends ONNXTensor {
     /**
      * Create a new Tensor or copy an existing Tensor.
diff --git a/src/tokenizers.js b/src/tokenizers.js
index ee1aa19..0c918b0 100644
--- a/src/tokenizers.js
+++ b/src/tokenizers.js
@@ -1,3 +1,25 @@
+
+/**
+ * @file Tokenizers are used to prepare textual inputs for a model.
+ * 
+ * **Example:** Create an `AutoTokenizer` and use it to tokenize a sentence.
+ * This will automatically detect the tokenizer type based on the tokenizer class defined in `tokenizer.json`.
+ * ```javascript
+ * import { AutoTokenizer } from '@xenova/transformers';
+ * 
+ * let tokenizer = await AutoTokenizer.from_pretrained('bert-base-uncased');
+ * let { input_ids } = await tokenizer('I love transformers!');
+ * // Tensor {
+ * //   data: BigInt64Array(6) [101n, 1045n, 2293n, 19081n, 999n, 102n],
+ * //   dims: [1, 6],
+ * //   type: 'int64',
+ * //   size: 6,
+ * // }
+ * ```
+ * 
+ * @module tokenizers
+ */
+
 import {
     Callable,
     reverseDictionary,
@@ -54,7 +76,7 @@ function createPattern(pattern) {
  *
  * @extends Callable
  */
-class TokenizerModel extends Callable {
+export class TokenizerModel extends Callable {
     /**
      * Creates a new instance of TokenizerModel.
      * @param {object} config - The configuration object for the TokenizerModel.
@@ -1474,7 +1496,7 @@ class WhitespaceSplit extends PreTokenizer {
     }
 }
 
-class PreTrainedTokenizer extends Callable {
+export class PreTrainedTokenizer extends Callable {
     /**
      * Create a new PreTrainedTokenizer instance.
      * @param {Object} tokenizerJSON - The JSON of the tokenizer.
@@ -1996,9 +2018,9 @@ export class GPT2Tokenizer extends PreTrainedTokenizer { }
 export class BartTokenizer extends PreTrainedTokenizer { }
 export class RobertaTokenizer extends PreTrainedTokenizer { }
 
-class BloomTokenizer extends PreTrainedTokenizer { }
+export class BloomTokenizer extends PreTrainedTokenizer { }
 
-class NllbTokenizer extends PreTrainedTokenizer {
+export class NllbTokenizer extends PreTrainedTokenizer {
 
     constructor(tokenizerJSON, tokenizerConfig) {
         super(tokenizerJSON, tokenizerConfig);
diff --git a/src/transformers.js b/src/transformers.js
index 5fac75a..c36449a 100644
--- a/src/transformers.js
+++ b/src/transformers.js
@@ -1,41 +1,25 @@
 
-// Tokenizers
-export {
-    AutoTokenizer,
-    BertTokenizer,
-    DistilBertTokenizer,
-    T5Tokenizer,
-    GPT2Tokenizer
-} from './tokenizers.js';
+/**
+ * @file Entry point for the Transformers.js library. Only the exports from this file
+ * are available to the end user, and are grouped as follows:
+ * 
+ * 1. Pipelines
+ * 2. Environment variables
+ * 3. Models
+ * 4. Tokenizers
+ * 5. Processors
+ * 
+ * @todo Add a link to the documentation for each export.
+ * 
+ * @module transformers
+ */
 
-// Models
-export {
-    // Auto classes
-    AutoModel,
-    AutoModelForSequenceClassification,
-    AutoModelForTokenClassification,
-    AutoModelForSeq2SeqLM,
-    AutoModelForCausalLM,
-    AutoModelForMaskedLM,
-    AutoModelForQuestionAnswering,
-    AutoModelForVision2Seq,
-    AutoModelForImageClassification,
-    AutoModelForObjectDetection,
-    
-    // Other
-    PreTrainedModel,
-} from './models.js';
+export * from './pipelines.js';
+export * from './env.js';
+export * from './models.js';
+export * from './tokenizers.js';
+export * from './processors.js';
 
-// Processors
-export {
-    AutoProcessor
-} from './processors.js';
-
-// environment variables
-export { env } from './env.js';
-
-// other
-export {
-    pipeline
-} from './pipelines.js';
-export { Tensor } from './tensor_utils.js';
+export * from './audio_utils.js';
+export * from './tensor_utils.js';
+export * from './math_utils.js';
diff --git a/src/utils.js b/src/utils.js
index f6997b9..15964f2 100644
--- a/src/utils.js
+++ b/src/utils.js
@@ -1,4 +1,12 @@
 
+/**
+ * @file Utility functions/classes for Transformers.js.
+ * 
+ * These are only used internally, meaning an end-user shouldn't
+ * need to access anything here.
+ * 
+ * @module utils
+ */
 
 /**
  * Helper function to dispatch progress callbacks.
@@ -6,6 +14,7 @@
  * @param {function} progress_callback - The progress callback function to dispatch.
  * @param {any} data - The data to pass to the progress callback function.
  * @returns {void}
+ * @private
  */
 export function dispatchCallback(progress_callback, data) {
     if (progress_callback !== null) progress_callback(data);
@@ -77,6 +86,19 @@ export function isString(text) {
     return typeof text === 'string' || text instanceof String
 }
 
+
+/**
+ * Check if a value is a typed array.
+ * @param {*} text - The value to check.
+ * @returns {boolean} - True if the value is a `TypedArray`, false otherwise.
+ * 
+ * Adapted from https://stackoverflow.com/a/71091338/13989043
+ */
+export function isTypedArray(val) {
+    return val?.prototype?.__proto__?.constructor?.name === 'TypedArray';
+}
+
+
 /**
  * Check if a value is an integer.
  * @param {*} x - The value to check.
diff --git a/src/utils/hub.js b/src/utils/hub.js
index 3210faf..29cb98e 100644
--- a/src/utils/hub.js
+++ b/src/utils/hub.js
@@ -1,6 +1,10 @@
-// Utility functions to interact with the Hugging Face Hub (https://huggingface.co/models)
 
-// const path = require('file-system-cache');
+/**
+ * @file Utility functions to interact with the Hugging Face Hub (https://huggingface.co/models)
+ * 
+ * @module utils/hub
+ */
+
 import { env } from '../env.js';
 import fs from 'fs';