From aa910b40984e6bd6c90cff0e68618659f13a025c Mon Sep 17 00:00:00 2001 From: Joshua Lochner Date: Sat, 6 May 2023 04:15:12 +0200 Subject: [PATCH] Update JSDocs --- src/audio_utils.js | 63 +++++++++++++ src/backends/onnx.js | 32 ++++--- src/env.js | 75 +++++++++------ src/generation.js | 8 ++ src/image_utils.js | 9 ++ src/math_utils.js | 13 ++- src/models.js | 211 ++++++++++++++++++++++++++----------------- src/pipelines.js | 160 +++++++++++++------------------- src/processors.js | 37 ++++++-- src/samplers.js | 9 ++ src/tensor_utils.js | 12 ++- src/tokenizers.js | 30 +++++- src/transformers.js | 60 +++++------- src/utils.js | 22 +++++ src/utils/hub.js | 8 +- 15 files changed, 477 insertions(+), 272 deletions(-) create mode 100644 src/audio_utils.js diff --git a/src/audio_utils.js b/src/audio_utils.js new file mode 100644 index 0000000..46905a0 --- /dev/null +++ b/src/audio_utils.js @@ -0,0 +1,63 @@ +/** + * @file Helper module for audio processing. + * + * These functions and classes are only used internally, + * meaning an end-user shouldn't need to access anything here. + * + * @module audio_utils + */ + +import { + getFile, +} from './utils/hub.js'; + +export async function read_audio(url, sampling_rate) { + // Attempting to load from path/url + + if (typeof AudioContext === 'undefined') { + // Running in node or an environment without AudioContext + throw Error( + "Unable to load audio from path/URL since `AudioContext` is not available in your environment. " + + "As a result, audio data must be passed directly to the processor. " + + "If you are running in node.js, you can use an external library (e.g., https://github.com/audiojs/web-audio-api) to do this." + ) + } + const response = await (await getFile(url)).arrayBuffer(); + const audioCTX = new AudioContext({ sampleRate: sampling_rate }); + const decoded = await audioCTX.decodeAudioData(response); + let audio; + + // We now replicate HuggingFace's `ffmpeg_read` method: + if (decoded.numberOfChannels === 2) { + // When downmixing a stereo audio file to mono using the -ac 1 option in FFmpeg, + // the audio signal is summed across both channels to create a single mono channel. + // However, if the audio is at full scale (i.e. the highest possible volume level), + // the summing of the two channels can cause the audio signal to clip or distort. + + // To prevent this clipping, FFmpeg applies a scaling factor of 1/sqrt(2) (~ 0.707) + // to the audio signal before summing the two channels. This scaling factor ensures + // that the combined audio signal will not exceed the maximum possible level, even + // if both channels are at full scale. + + // After applying this scaling factor, the audio signal from both channels is summed + // to create a single mono channel. It's worth noting that this scaling factor is + // only applied when downmixing stereo audio to mono using the -ac 1 option in FFmpeg. + // If you're using a different downmixing method, or if you're not downmixing the + // audio at all, this scaling factor may not be needed. + const SCALING_FACTOR = Math.sqrt(2); + + let left = decoded.getChannelData(0); + let right = decoded.getChannelData(1); + + audio = new Float32Array(left.length); + for (let i = 0; i < decoded.length; ++i) { + audio[i] = SCALING_FACTOR * (left[i] + right[i]) / 2; + } + + } else { + // If the audio is not stereo, we can just use the first channel: + audio = decoded.getChannelData(0); + } + + return audio; +} \ No newline at end of file diff --git a/src/backends/onnx.js b/src/backends/onnx.js index bc4deb7..de8879d 100644 --- a/src/backends/onnx.js +++ b/src/backends/onnx.js @@ -1,13 +1,20 @@ -// Handler file for choosing the correct version of ONNX Runtime, based on the environment. -// -// Ideally, we could import the `onnxruntime-web` and `onnxruntime-node` packages only when needed, -// but dynamic imports don't seem to work with the current webpack version and/or configuration. -// This is possibly due to the experimental nature of top-level await statements. -// -// So, we just import both packages, and use the appropriate one based on the environment. -// - When running in node, we use `onnxruntime-node`. -// - When running in the browser, we use `onnxruntime-web` (`onnxruntime-node` is not bundled). - +/** + * @file Handler file for choosing the correct version of ONNX Runtime, based on the environment. + * Ideally, we could import the `onnxruntime-web` and `onnxruntime-node` packages only when needed, + * but dynamic imports don't seem to work with the current webpack version and/or configuration. + * This is possibly due to the experimental nature of top-level await statements. + * So, we just import both packages, and use the appropriate one based on the environment. + * - When running in node, we use `onnxruntime-node`. + * - When running in the browser, we use `onnxruntime-web` (`onnxruntime-node` is not bundled). + * + * This module is not directly exported, but can be accessed through the environment variables: + * ```javascript + * import { env } from '@xenova/transformers'; + * console.log(env.backends.onnx); + * ``` + * + * @module backends/onnx + */ // NOTE: Import order matters here. We need to import `onnxruntime-node` before `onnxruntime-web`. import ONNX_NODE from 'onnxruntime-node'; @@ -15,7 +22,10 @@ import ONNX_WEB from 'onnxruntime-web'; export let ONNX; -export const executionProviders = ['wasm']; +export const executionProviders = [ + // 'webgpu', + 'wasm' +]; if (typeof process !== 'undefined') { // Running in a node-like environment. diff --git a/src/env.js b/src/env.js index 9480787..fbbb37e 100644 --- a/src/env.js +++ b/src/env.js @@ -1,3 +1,27 @@ +/** + * @file Module used to configure Transformers.js. For the full list of possible options, @see {@link env}. + * + * **Example:** Disable remote models. + * ```javascript + * import { env } from '@xenova/transformers'; + * env.allowRemoteModels = false; + * ``` + * + * **Example:** Set local model path. + * ```javascript + * import { env } from '@xenova/transformers'; + * env.localModelPath = '/path/to/local/models/'; + * ``` + * + * **Example:** Set cache directory. + * ```javascript + * import { env } from '@xenova/transformers'; + * env.cacheDir = '/path/to/cache/directory/'; + * ``` + * + * @module env + */ + import fs from 'fs'; import path from 'path'; import url from 'url'; @@ -37,11 +61,23 @@ onnx_env.wasm.wasmPaths = RUNNING_LOCALLY : 'https://cdn.jsdelivr.net/npm/@xenova/transformers/dist/'; -// Global variable used to control exection, with suitable defaults +/** + * Global variable used to control exection, with suitable defaults + * @property {object} backends Expose environment variables of different backends, + * allowing users to set these variables if they want to. + * @property {string} remoteHost Host URL to load models from. Defaults to the Hugging Face Hub. + * @property {string} remotePathTemplate Path template to fill in and append to `remoteHost` when loading models. + * @property {boolean} allowRemoteModels Whether to allow loading of remote files, defaults to `true`. + * If set to `false`, it will have the same effect as setting `local_files_only=true` when loading pipelines, models, tokenizers, processors, etc. + * @property {string} localModelPath Path to load local models from. Defaults to `/models/`. + * @property {boolean} useFS Whether to use the file system to load files. By default, it is true available. + * @property {string} __dirname Directory name of module. Useful for resolving local paths. + * @property {boolean} useBrowserCache Whether to use Cache API to cache models. By default, it is true if available. + * @property {boolean} useFSCache Whether to use the file system to cache files. By default, it is true available. + * @property {string} cacheDir The directory to use for caching files with the file system. By default, it is `./.cache`. +*/ export const env = { - // Expose environment variables of different backends, allowing users to set - // these variables if they want to. - // TODO - will be used when we add more backends + /////////////////// Backends settings /////////////////// backends: { // onnxruntime-web/onnxruntime-node onnx: onnx_env, @@ -50,40 +86,27 @@ export const env = { tfjs: {}, }, - // URL to load models from. Defaults to the Hugging Face Hub. - remoteHost: 'https://huggingface.co/', - remotePathTemplate: '{model}/resolve/{revision}/', - - // Whether to allow loading of remote files, defaults to `true`. - // If set to `false`, it will have the same effect as setting `local_files_only=true` - // when loading pipelines, models, tokenizers, processors, etc. - allowRemoteModels: true, - - // Local URL to load models from. - localModelPath: localModelPath, - - // Whether to use the file system to load files. By default, it is true available. - useFS: FS_AVAILABLE, - - // Directory name of module. Useful for resolving local paths. __dirname, + /////////////////// Model settings /////////////////// + remoteHost: 'https://huggingface.co/', + remotePathTemplate: '{model}/resolve/{revision}/', + allowRemoteModels: true, + localModelPath: localModelPath, + useFS: FS_AVAILABLE, + /////////////////// Cache settings /////////////////// - // Whether to use Cache API to cache models. By default, it is true if available. useBrowserCache: WEB_CACHE_AVAILABLE, - - // Whether to use the file system to cache files. By default, it is true available. useFSCache: FS_AVAILABLE, - - // The directory to use for caching files with the file system. By default, it is `./.cache`. cacheDir: DEFAULT_CACHE_DIR, - ////////////////////////////////////////////////////// + ////////////////////////////////////////////////////// } /** * @param {object} obj + * @private */ function isEmpty(obj) { return Object.keys(obj).length === 0; diff --git a/src/generation.js b/src/generation.js index 9e93132..d55af08 100644 --- a/src/generation.js +++ b/src/generation.js @@ -1,3 +1,11 @@ + +/** + * @file Classes, functions, and utilities for generation. + * + * @todo Describe how to create a custom `GenerationConfig`. + * + * @module generation + */ import { Tensor } from './tensor_utils.js'; import { Callable, diff --git a/src/image_utils.js b/src/image_utils.js index d24ab02..8f59797 100644 --- a/src/image_utils.js +++ b/src/image_utils.js @@ -1,4 +1,13 @@ +/** + * @file Helper module for image processing. + * + * These functions and classes are only used internally, + * meaning an end-user shouldn't need to access anything here. + * + * @module image_utils + */ + import fs from 'fs'; import { isString } from './utils.js'; import { env } from './env.js'; diff --git a/src/math_utils.js b/src/math_utils.js index 5ac012f..0ba907b 100644 --- a/src/math_utils.js +++ b/src/math_utils.js @@ -1,4 +1,13 @@ +/** + * @file Helper module for mathematical processing. + * + * These functions and classes are only used internally, + * meaning an end-user shouldn't need to access anything here. + * + * @module math_utils + */ + /** * @typedef {Int8Array | Uint8Array | Uint8ClampedArray | Int16Array | Uint16Array | Int32Array | Uint32Array | Float32Array | Float64Array} TypedArray * @typedef {BigInt64Array | BigUint64Array} BigTypedArray @@ -8,7 +17,7 @@ /** * @param {TypedArray} input */ -export function interpolate(input, [in_channels, in_height, in_width], [out_height, out_width], mode = 'bilinear', align_corners = false) { +export function interpolate_data(input, [in_channels, in_height, in_width], [out_height, out_width], mode = 'bilinear', align_corners = false) { // TODO use mode and align_corners // Output image dimensions @@ -79,7 +88,7 @@ export function interpolate(input, [in_channels, in_height, in_width], [out_heig /** - * Helper method to transpose a AnyTypedArray directly + * Helper method to transpose a `AnyTypedArray` directly * @param {T} array * @template {AnyTypedArray} T * @param {number[]} dims diff --git a/src/models.js b/src/models.js index cc7b09d..bedfd12 100644 --- a/src/models.js +++ b/src/models.js @@ -1,6 +1,47 @@ + +/** + * @file Definitions of all models available in Transformers.js. + * + * **Example:** Load and run an `AutoModel`. + * + * ```javascript + * import { AutoModel, AutoTokenizer } from '@xenova/transformers'; + * + * let tokenizer = await AutoTokenizer.from_pretrained('Xenova/bert-base-uncased'); + * let model = await AutoModel.from_pretrained('Xenova/bert-base-uncased'); + * + * let inputs = await tokenizer('I love transformers!'); + * let { logits } = await model(inputs); + * // Tensor { + * // data: Float32Array(183132) [-7.117443084716797, -7.107812881469727, -7.092104911804199, ...] + * // dims: (3) [1, 6, 30522], + * // type: "float32", + * // size: 183132, + * // } + * ``` + * + * We also provide other `AutoModel`s (listed below), which you can use in the same way as the Python library. For example: + * + * **Example:** Load and run a `AutoModelForSeq2SeqLM`. + * ```javascript + * import { AutoModelForSeq2SeqLM, AutoTokenizer } from '@xenova/transformers'; + * + * let tokenizer = await AutoTokenizer.from_pretrained('t5-small'); + * let model = await AutoModelForSeq2SeqLM.from_pretrained('t5-small'); + * + * let { input_ids } = await tokenizer('translate English to German: I love transformers!'); + * let outputs = await model.generate(input_ids); + * let decoded = await tokenizer.decode(outputs[0][0], { skip_special_tokens: true }); + * // 'Ich liebe Transformatoren!' + * ``` + * + * @module models + */ + import { Callable, isIntegralNumber, + isTypedArray, } from './utils.js'; import { @@ -34,6 +75,7 @@ const { InferenceSession, Tensor: ONNXTensor } = ONNX; /** * @typedef {import('./utils/hub.js').PretrainedOptions} PretrainedOptions */ + ////////////////////////////////////////////////// // Helper functions /** @@ -741,6 +783,10 @@ export class PreTrainedModel extends Callable { } = {}, ) { + if (!(inputs instanceof Tensor) && !isTypedArray(inputs) && !Array.isArray(inputs)) { + throw Error(`\`inputs\` must be a Tensor, TypedArray, or Array, but is "${inputs.constructor.name}".`); + } + if (inputs.length === 0) { throw Error("Must supply a non-empty array of input token ids.") } @@ -796,7 +842,7 @@ export class PreTrainedModel extends Callable { extractedLogits.push(lastLogits) } let logits = cat(extractedLogits); - logits_processor(beam.output_token_ids, logits) + logits_processor(beam.output_token_ids, logits); let sampledTokens = sampler(logits); for (let [newTokenId, logProb] of sampledTokens) { @@ -819,8 +865,8 @@ export class PreTrainedModel extends Callable { // Next, we get the best beams, per ID newest_beams = this.groupBeams(newest_beams).map( group => group - .sort((a, b) => b.score - a.score) // sort based on score - .slice(0, generation_config.num_beams) // remove outside beam width + .sort((a, b) => b.score - a.score) // sort based on score + .slice(0, generation_config.num_beams) // remove outside beam width ); // Flatten beams @@ -832,6 +878,8 @@ export class PreTrainedModel extends Callable { } } + // TODO - Ensure that we can return non-batched outputs + return this.groupBeams(beams).map( batch => { if (generation_config.num_return_sequences > 1) { @@ -929,19 +977,19 @@ export class PreTrainedModel extends Callable { } ////////////////////////////////////////////////// // Base model output class -class ModelOutput { } +export class ModelOutput { } ////////////////////////////////////////////////// // Bert models -class BertPreTrainedModel extends PreTrainedModel { } -class BertModel extends BertPreTrainedModel { } +export class BertPreTrainedModel extends PreTrainedModel { } +export class BertModel extends BertPreTrainedModel { } /** * BertForMaskedLM is a class representing a BERT model for masked language modeling. * @extends BertPreTrainedModel */ -class BertForMaskedLM extends BertPreTrainedModel { +export class BertForMaskedLM extends BertPreTrainedModel { /** * Calls the model on new inputs. * @@ -958,7 +1006,7 @@ class BertForMaskedLM extends BertPreTrainedModel { * BertForSequenceClassification is a class representing a BERT model for sequence classification. * @extends BertPreTrainedModel */ -class BertForSequenceClassification extends BertPreTrainedModel { +export class BertForSequenceClassification extends BertPreTrainedModel { /** * Calls the model on new inputs. * @@ -975,7 +1023,7 @@ class BertForSequenceClassification extends BertPreTrainedModel { * BertForTokenClassification is a class representing a BERT model for token classification. * @extends BertPreTrainedModel */ -class BertForTokenClassification extends BertPreTrainedModel { +export class BertForTokenClassification extends BertPreTrainedModel { /** * Calls the model on new inputs. * @@ -992,7 +1040,7 @@ class BertForTokenClassification extends BertPreTrainedModel { * BertForQuestionAnswering is a class representing a BERT model for question answering. * @extends BertPreTrainedModel */ -class BertForQuestionAnswering extends BertPreTrainedModel { +export class BertForQuestionAnswering extends BertPreTrainedModel { /** * Calls the model on new inputs. * @@ -1008,14 +1056,14 @@ class BertForQuestionAnswering extends BertPreTrainedModel { ////////////////////////////////////////////////// // DistilBert models -class DistilBertPreTrainedModel extends PreTrainedModel { } -class DistilBertModel extends DistilBertPreTrainedModel { } +export class DistilBertPreTrainedModel extends PreTrainedModel { } +export class DistilBertModel extends DistilBertPreTrainedModel { } /** * DistilBertForSequenceClassification is a class representing a DistilBERT model for sequence classification. * @extends DistilBertPreTrainedModel */ -class DistilBertForSequenceClassification extends DistilBertPreTrainedModel { +export class DistilBertForSequenceClassification extends DistilBertPreTrainedModel { /** * Calls the model on new inputs. * @@ -1032,7 +1080,7 @@ class DistilBertForSequenceClassification extends DistilBertPreTrainedModel { * DistilBertForTokenClassification is a class representing a DistilBERT model for token classification. * @extends DistilBertPreTrainedModel */ -class DistilBertForTokenClassification extends DistilBertPreTrainedModel { +export class DistilBertForTokenClassification extends DistilBertPreTrainedModel { /** * Calls the model on new inputs. * @@ -1050,7 +1098,7 @@ class DistilBertForTokenClassification extends DistilBertPreTrainedModel { * DistilBertForQuestionAnswering is a class representing a DistilBERT model for question answering. * @extends DistilBertPreTrainedModel */ -class DistilBertForQuestionAnswering extends DistilBertPreTrainedModel { +export class DistilBertForQuestionAnswering extends DistilBertPreTrainedModel { /** * Calls the model on new inputs. * @@ -1067,7 +1115,7 @@ class DistilBertForQuestionAnswering extends DistilBertPreTrainedModel { * DistilBertForMaskedLM is a class representing a DistilBERT model for masking task. * @extends DistilBertPreTrainedModel */ -class DistilBertForMaskedLM extends DistilBertPreTrainedModel { +export class DistilBertForMaskedLM extends DistilBertPreTrainedModel { /** * Calls the model on new inputs. * @@ -1084,14 +1132,14 @@ class DistilBertForMaskedLM extends DistilBertPreTrainedModel { ////////////////////////////////////////////////// // MobileBert models -class MobileBertPreTrainedModel extends PreTrainedModel { } -class MobileBertModel extends MobileBertPreTrainedModel { } +export class MobileBertPreTrainedModel extends PreTrainedModel { } +export class MobileBertModel extends MobileBertPreTrainedModel { } /** * MobileBertForMaskedLM is a class representing a MobileBERT model for masking task. * @extends MobileBertPreTrainedModel */ -class MobileBertForMaskedLM extends MobileBertPreTrainedModel { +export class MobileBertForMaskedLM extends MobileBertPreTrainedModel { /** * Calls the model on new inputs. * @@ -1107,7 +1155,7 @@ class MobileBertForMaskedLM extends MobileBertPreTrainedModel { /** * @extends MobileBertPreTrainedModel */ -class MobileBertForSequenceClassification extends MobileBertPreTrainedModel { +export class MobileBertForSequenceClassification extends MobileBertPreTrainedModel { /** * Calls the model on new inputs. * @@ -1123,7 +1171,7 @@ class MobileBertForSequenceClassification extends MobileBertPreTrainedModel { /** * @extends MobileBertPreTrainedModel */ -class MobileBertForQuestionAnswering extends MobileBertPreTrainedModel { +export class MobileBertForQuestionAnswering extends MobileBertPreTrainedModel { /** * Calls the model on new inputs. * @@ -1140,9 +1188,9 @@ class MobileBertForQuestionAnswering extends MobileBertPreTrainedModel { ////////////////////////////////////////////////// // SqueezeBert models -class SqueezeBertPreTrainedModel extends PreTrainedModel { } -class SqueezeBertModel extends SqueezeBertPreTrainedModel { } -class SqueezeBertForMaskedLM extends SqueezeBertPreTrainedModel { +export class SqueezeBertPreTrainedModel extends PreTrainedModel { } +export class SqueezeBertModel extends SqueezeBertPreTrainedModel { } +export class SqueezeBertForMaskedLM extends SqueezeBertPreTrainedModel { /** * Calls the model on new inputs. * @@ -1154,7 +1202,7 @@ class SqueezeBertForMaskedLM extends SqueezeBertPreTrainedModel { return new MaskedLMOutput(logits) } } -class SqueezeBertForSequenceClassification extends SqueezeBertPreTrainedModel { +export class SqueezeBertForSequenceClassification extends SqueezeBertPreTrainedModel { /** * Calls the model on new inputs. * @@ -1166,7 +1214,7 @@ class SqueezeBertForSequenceClassification extends SqueezeBertPreTrainedModel { return new SequenceClassifierOutput(logits) } } -class SqueezeBertForQuestionAnswering extends SqueezeBertPreTrainedModel { +export class SqueezeBertForQuestionAnswering extends SqueezeBertPreTrainedModel { /** * Calls the model on new inputs. * @@ -1183,9 +1231,9 @@ class SqueezeBertForQuestionAnswering extends SqueezeBertPreTrainedModel { ////////////////////////////////////////////////// // Albert models -class AlbertPreTrainedModel extends PreTrainedModel { } -class AlbertModel extends AlbertPreTrainedModel { } -class AlbertForSequenceClassification extends AlbertPreTrainedModel { +export class AlbertPreTrainedModel extends PreTrainedModel { } +export class AlbertModel extends AlbertPreTrainedModel { } +export class AlbertForSequenceClassification extends AlbertPreTrainedModel { /** * Calls the model on new inputs. * @@ -1197,7 +1245,7 @@ class AlbertForSequenceClassification extends AlbertPreTrainedModel { return new SequenceClassifierOutput(logits) } } -class AlbertForQuestionAnswering extends AlbertPreTrainedModel { +export class AlbertForQuestionAnswering extends AlbertPreTrainedModel { /** * Calls the model on new inputs. * @@ -1209,7 +1257,7 @@ class AlbertForQuestionAnswering extends AlbertPreTrainedModel { return new QuestionAnsweringModelOutput(outputs.start_logits, outputs.end_logits); } } -class AlbertForMaskedLM extends AlbertPreTrainedModel { +export class AlbertForMaskedLM extends AlbertPreTrainedModel { /** * Calls the model on new inputs. * @@ -1226,9 +1274,9 @@ class AlbertForMaskedLM extends AlbertPreTrainedModel { ////////////////////////////////////////////////// // T5 models -class T5PreTrainedModel extends PreTrainedModel { }; +export class T5PreTrainedModel extends PreTrainedModel { }; -class T5Model extends T5PreTrainedModel { +export class T5Model extends T5PreTrainedModel { /** * Generates text based on the provided arguments. * @throws {Error} - Throws an error as the current model class (T5Model) is not compatible with `.generate()`. @@ -1246,7 +1294,7 @@ class T5Model extends T5PreTrainedModel { * T5Model is a class representing a T5 model for conditional generation. * @extends T5PreTrainedModel */ -class T5ForConditionalGeneration extends T5PreTrainedModel { +export class T5ForConditionalGeneration extends T5PreTrainedModel { /** * Creates a new instance of the `T5ForConditionalGeneration` class. * @param {object} config - The model configuration. @@ -1336,9 +1384,9 @@ class T5ForConditionalGeneration extends T5PreTrainedModel { ////////////////////////////////////////////////// // MT5 models -class MT5PreTrainedModel extends PreTrainedModel { }; +export class MT5PreTrainedModel extends PreTrainedModel { }; -class MT5Model extends MT5PreTrainedModel { +export class MT5Model extends MT5PreTrainedModel { /** * * @param {...any} args @@ -1357,7 +1405,7 @@ class MT5Model extends MT5PreTrainedModel { * * @extends MT5PreTrainedModel */ -class MT5ForConditionalGeneration extends MT5PreTrainedModel { +export class MT5ForConditionalGeneration extends MT5PreTrainedModel { /** * Creates a new instance of the `MT5ForConditionalGeneration` class. * @param {any} config - The model configuration. @@ -1449,7 +1497,7 @@ class MT5ForConditionalGeneration extends MT5PreTrainedModel { ////////////////////////////////////////////////// // Bart models -class BartPretrainedModel extends PreTrainedModel { }; +export class BartPretrainedModel extends PreTrainedModel { }; /** * BART encoder and decoder model. @@ -1457,7 +1505,7 @@ class BartPretrainedModel extends PreTrainedModel { }; * @hideconstructor * @extends BartPretrainedModel */ -class BartModel extends BartPretrainedModel { +export class BartModel extends BartPretrainedModel { /** * Throws an error because the current model class (BartModel) is not compatible with `.generate()`. * @@ -1475,7 +1523,7 @@ class BartModel extends BartPretrainedModel { * BART model with a language model head for conditional generation. * @extends BartPretrainedModel */ -class BartForConditionalGeneration extends BartPretrainedModel { +export class BartForConditionalGeneration extends BartPretrainedModel { /** * Creates a new instance of the `BartForConditionalGeneration` class. * @param {object} config - The configuration object for the Bart model. @@ -1562,7 +1610,7 @@ class BartForConditionalGeneration extends BartPretrainedModel { } } -class BartForSequenceClassification extends BartPretrainedModel { +export class BartForSequenceClassification extends BartPretrainedModel { /** * Calls the model on new inputs. * @@ -1579,14 +1627,14 @@ class BartForSequenceClassification extends BartPretrainedModel { ////////////////////////////////////////////////// // Roberta models -class RobertaPreTrainedModel extends PreTrainedModel { } -class RobertaModel extends RobertaPreTrainedModel { } +export class RobertaPreTrainedModel extends PreTrainedModel { } +export class RobertaModel extends RobertaPreTrainedModel { } /** * RobertaForMaskedLM class for performing masked language modeling on Roberta models. * @extends RobertaPreTrainedModel */ -class RobertaForMaskedLM extends RobertaPreTrainedModel { +export class RobertaForMaskedLM extends RobertaPreTrainedModel { /** * Calls the model on new inputs. * @@ -1603,7 +1651,7 @@ class RobertaForMaskedLM extends RobertaPreTrainedModel { * RobertaForSequenceClassification class for performing sequence classification on Roberta models. * @extends RobertaPreTrainedModel */ -class RobertaForSequenceClassification extends RobertaPreTrainedModel { +export class RobertaForSequenceClassification extends RobertaPreTrainedModel { /** * Calls the model on new inputs. * @@ -1620,7 +1668,7 @@ class RobertaForSequenceClassification extends RobertaPreTrainedModel { * RobertaForQuestionAnswering class for performing question answering on Roberta models. * @extends RobertaPreTrainedModel */ -class RobertaForQuestionAnswering extends RobertaPreTrainedModel { +export class RobertaForQuestionAnswering extends RobertaPreTrainedModel { /** * Calls the model on new inputs. * @@ -1636,13 +1684,13 @@ class RobertaForQuestionAnswering extends RobertaPreTrainedModel { ////////////////////////////////////////////////// // T5 models -class WhisperPreTrainedModel extends PreTrainedModel { }; +export class WhisperPreTrainedModel extends PreTrainedModel { }; /** * WhisperModel class for training Whisper models without a language model head. * @extends WhisperPreTrainedModel */ -class WhisperModel extends WhisperPreTrainedModel { +export class WhisperModel extends WhisperPreTrainedModel { /** * Throws an error when attempting to generate output since this model doesn't have a language model head. * @throws Error @@ -1660,7 +1708,7 @@ class WhisperModel extends WhisperPreTrainedModel { * WhisperForConditionalGeneration class for generating conditional outputs from Whisper models. * @extends WhisperPreTrainedModel */ -class WhisperForConditionalGeneration extends WhisperPreTrainedModel { +export class WhisperForConditionalGeneration extends WhisperPreTrainedModel { /** * Creates a new instance of the `WhisperForConditionalGeneration` class. * @param {Object} config - Configuration object for the model. @@ -1788,7 +1836,7 @@ class WhisperForConditionalGeneration extends WhisperPreTrainedModel { * Vision Encoder-Decoder model based on OpenAI's GPT architecture for image captioning and other vision tasks * @extends PreTrainedModel */ -class VisionEncoderDecoderModel extends PreTrainedModel { +export class VisionEncoderDecoderModel extends PreTrainedModel { /** * Creates a new instance of the `VisionEncoderDecoderModel` class. * @param {object} config - The configuration object specifying the hyperparameters and other model settings. @@ -1881,8 +1929,8 @@ class VisionEncoderDecoderModel extends PreTrainedModel { ////////////////////////////////////////////////// // CLIP models -class CLIPPreTrainedModel extends PreTrainedModel { } -class CLIPModel extends CLIPPreTrainedModel { +export class CLIPPreTrainedModel extends PreTrainedModel { } +export class CLIPModel extends CLIPPreTrainedModel { } @@ -1890,12 +1938,12 @@ class CLIPModel extends CLIPPreTrainedModel { ////////////////////////////////////////////////// // GPT2 models -class GPT2PreTrainedModel extends PreTrainedModel { } +export class GPT2PreTrainedModel extends PreTrainedModel { } /** * GPT2Model is not compatible with `.generate()`, as it doesn't have a language model head. * @extends GPT2PreTrainedModel */ -class GPT2Model extends GPT2PreTrainedModel { +export class GPT2Model extends GPT2PreTrainedModel { /** * * @param {...any} args @@ -1913,7 +1961,7 @@ class GPT2Model extends GPT2PreTrainedModel { * GPT-2 language model head on top of the GPT-2 base model. This model is suitable for text generation tasks. * @extends GPT2PreTrainedModel */ -class GPT2LMHeadModel extends GPT2PreTrainedModel { +export class GPT2LMHeadModel extends GPT2PreTrainedModel { /** * Creates a new instance of the `GPT2LMHeadModel` class. * @param {object} config - The configuration of the model. @@ -1969,12 +2017,12 @@ class GPT2LMHeadModel extends GPT2PreTrainedModel { } } -// class GPT2ForSequenceClassification extends GPT2PreTrainedModel { +// export class GPT2ForSequenceClassification extends GPT2PreTrainedModel { // TODO // } ////////////////////////////////////////////////// -class GPTNeoPreTrainedModel extends PreTrainedModel { } -class GPTNeoModel extends GPTNeoPreTrainedModel { +export class GPTNeoPreTrainedModel extends PreTrainedModel { } +export class GPTNeoModel extends GPTNeoPreTrainedModel { /** * * @param {...any} args @@ -1988,7 +2036,7 @@ class GPTNeoModel extends GPTNeoPreTrainedModel { } } -class GPTNeoForCausalLM extends GPTNeoPreTrainedModel { +export class GPTNeoForCausalLM extends GPTNeoPreTrainedModel { /** * Creates a new instance of the `GPTNeoForCausalLM` class. * @param {object} config - The configuration of the model. @@ -2046,13 +2094,13 @@ class GPTNeoForCausalLM extends GPTNeoPreTrainedModel { ////////////////////////////////////////////////// // CodeGen models -class CodeGenPreTrainedModel extends PreTrainedModel { } +export class CodeGenPreTrainedModel extends PreTrainedModel { } /** * CodeGenModel is a class representing a code generation model without a language model head. * * @extends CodeGenPreTrainedModel */ -class CodeGenModel extends CodeGenPreTrainedModel { +export class CodeGenModel extends CodeGenPreTrainedModel { /** * Throws an error indicating that the current model class is not compatible with `.generate()`, * as it doesn't have a language model head. @@ -2073,7 +2121,7 @@ class CodeGenModel extends CodeGenPreTrainedModel { * CodeGenForCausalLM is a class that represents a code generation model based on the GPT-2 architecture. It extends the `CodeGenPreTrainedModel` class. * @extends CodeGenPreTrainedModel */ -class CodeGenForCausalLM extends CodeGenPreTrainedModel { +export class CodeGenForCausalLM extends CodeGenPreTrainedModel { /** * Creates a new instance of the `CodeGenForCausalLM` class. * @param {object} config The model configuration object. @@ -2132,8 +2180,8 @@ class CodeGenForCausalLM extends CodeGenPreTrainedModel { ////////////////////////////////////////////////// ////////////////////////////////////////////////// -class ViTPreTrainedModel extends PreTrainedModel { } -class ViTForImageClassification extends ViTPreTrainedModel { +export class ViTPreTrainedModel extends PreTrainedModel { } +export class ViTForImageClassification extends ViTPreTrainedModel { /** * @param {any} model_inputs */ @@ -2145,8 +2193,8 @@ class ViTForImageClassification extends ViTPreTrainedModel { ////////////////////////////////////////////////// ////////////////////////////////////////////////// -class DetrPreTrainedModel extends PreTrainedModel { } -class DetrForObjectDetection extends DetrPreTrainedModel { +export class DetrPreTrainedModel extends PreTrainedModel { } +export class DetrForObjectDetection extends DetrPreTrainedModel { /** * @param {any} model_inputs */ @@ -2156,7 +2204,7 @@ class DetrForObjectDetection extends DetrPreTrainedModel { } } -class DetrForSegmentation extends DetrPreTrainedModel { +export class DetrForSegmentation extends DetrPreTrainedModel { /** * Runs the model with the provided inputs * @param {Object} model_inputs - Model inputs @@ -2168,7 +2216,7 @@ class DetrForSegmentation extends DetrPreTrainedModel { } } -class DetrObjectDetectionOutput extends ModelOutput { +export class DetrObjectDetectionOutput extends ModelOutput { /** * @param {any} logits * @param {any} pred_boxes @@ -2180,7 +2228,7 @@ class DetrObjectDetectionOutput extends ModelOutput { } } -class DetrSegmentationOutput extends ModelOutput { +export class DetrSegmentationOutput extends ModelOutput { /** * @param {Tensor} logits - The output logits of the model. @@ -2199,9 +2247,9 @@ class DetrSegmentationOutput extends ModelOutput { ////////////////////////////////////////////////// // MarianMT models -class MarianPreTrainedModel extends PreTrainedModel { }; +export class MarianPreTrainedModel extends PreTrainedModel { }; -class MarianModel extends MarianPreTrainedModel { +export class MarianModel extends MarianPreTrainedModel { /** * * @param {...any} args @@ -2215,7 +2263,7 @@ class MarianModel extends MarianPreTrainedModel { } } -class MarianMTModel extends MarianPreTrainedModel { +export class MarianMTModel extends MarianPreTrainedModel { /** * Creates a new instance of the `MarianMTModel` class. * @param {object} config The model configuration object. @@ -2304,9 +2352,9 @@ class MarianMTModel extends MarianPreTrainedModel { ////////////////////////////////////////////////// // M2M100 models -class M2M100PreTrainedModel extends PreTrainedModel { }; +export class M2M100PreTrainedModel extends PreTrainedModel { }; -class M2M100Model extends M2M100PreTrainedModel { +export class M2M100Model extends M2M100PreTrainedModel { /** * * @param {...any} args @@ -2320,7 +2368,7 @@ class M2M100Model extends M2M100PreTrainedModel { } } -class M2M100ForConditionalGeneration extends M2M100PreTrainedModel { +export class M2M100ForConditionalGeneration extends M2M100PreTrainedModel { /** * Creates a new instance of the `M2M100ForConditionalGeneration` class. * @param {object} config The model configuration object. @@ -2416,7 +2464,7 @@ class M2M100ForConditionalGeneration extends M2M100PreTrainedModel { * Base class of all AutoModels. Contains the `from_pretrained` function * which is used to instantiate pretrained models. */ -class PretrainedMixin { +export class PretrainedMixin { /** * Mapping from model type to model class. */ @@ -2508,6 +2556,7 @@ export class AutoModel extends PretrainedMixin { 'mobilebert': MobileBertModel, 'squeezebert': SqueezeBertModel, 'marian': MarianModel, + 'm2m_100': M2M100Model, } } @@ -2678,7 +2727,7 @@ export class AutoModelForObjectDetection extends PretrainedMixin { ////////////////////////////////////////////////// ////////////////////////////////////////////////// -class Seq2SeqLMOutput extends ModelOutput { +export class Seq2SeqLMOutput extends ModelOutput { /** * @param {Tensor} logits - The output logits of the model. * @param {Array} past_key_values - An array of key/value pairs that represent the previous state of the model. @@ -2692,7 +2741,7 @@ class Seq2SeqLMOutput extends ModelOutput { } } -class SequenceClassifierOutput extends ModelOutput { +export class SequenceClassifierOutput extends ModelOutput { /** * @param {Tensor} logits */ @@ -2702,7 +2751,7 @@ class SequenceClassifierOutput extends ModelOutput { } } -class TokenClassifierOutput extends ModelOutput { +export class TokenClassifierOutput extends ModelOutput { /** * @param {Tensor} logits */ @@ -2713,7 +2762,7 @@ class TokenClassifierOutput extends ModelOutput { } -class MaskedLMOutput extends ModelOutput { +export class MaskedLMOutput extends ModelOutput { /** * @param {Tensor} logits */ @@ -2723,7 +2772,7 @@ class MaskedLMOutput extends ModelOutput { } } -class QuestionAnsweringModelOutput extends ModelOutput { +export class QuestionAnsweringModelOutput extends ModelOutput { /** * @param {Float32Array} start_logits - The logits for start positions of the answer. * @param {Float32Array} end_logits - The logits for end positions of the answer. diff --git a/src/pipelines.js b/src/pipelines.js index 32918f8..3a8e71c 100644 --- a/src/pipelines.js +++ b/src/pipelines.js @@ -1,3 +1,18 @@ +/** + * @file Pipelines provide a high-level, easy to use, API for running machine learning models. + * + * **Example:** Instantiate pipeline using the `pipeline` function. + * ```javascript + * import { pipeline } from '@xenova/transformers'; + * + * let pipeline = await pipeline('sentiment-analysis'); + * let result = await pipeline('I love transformers!'); + * // [{'label': 'POSITIVE', 'score': 0.999817686}] + * ``` + * + * @module pipelines + */ + import { Callable, isString, @@ -10,10 +25,11 @@ import { dot } from './math_utils.js'; import { - getFile, -} from './utils/hub.js'; + read_audio +} from './audio_utils.js'; import { - AutoTokenizer + AutoTokenizer, + PreTrainedTokenizer, } from './tokenizers.js'; import { AutoModel, @@ -26,7 +42,8 @@ import { AutoModelForVision2Seq, AutoModelForImageClassification, AutoModelForImageSegmentation, - AutoModelForObjectDetection + AutoModelForObjectDetection, + PreTrainedModel, } from './models.js'; import { AutoProcessor, @@ -40,7 +57,7 @@ import { CustomImage } from './image_utils.js'; * Prepare images for further tasks. * @param {any[]} images - images to prepare. * @returns {Promise} - returns processed images. - * @async + * @private */ async function prepareImages(images) { if (!Array.isArray(images)) { @@ -56,12 +73,12 @@ async function prepareImages(images) { * Pipeline class for executing a natural language processing task. * @extends Callable */ -class Pipeline extends Callable { +export class Pipeline extends Callable { /** * Creates a new instance of Pipeline. * @param {string} task - The natural language processing task to be performed. - * @param {object} tokenizer - The tokenizer object to be used for tokenizing input texts. - * @param {object} model - The model object to be used for processing input texts. + * @param {PreTrainedTokenizer} tokenizer - The tokenizer object to be used for tokenizing input texts. + * @param {PreTrainedModel} model - The model object to be used for processing input texts. */ constructor(task, tokenizer, model) { super(); @@ -101,7 +118,7 @@ class Pipeline extends Callable { * TextClassificationPipeline class for executing a text classification task. * @extends Pipeline */ -class TextClassificationPipeline extends Pipeline { +export class TextClassificationPipeline extends Pipeline { /** * Executes the text classification task. * @param {any} texts - The input texts to be classified. @@ -142,7 +159,7 @@ class TextClassificationPipeline extends Pipeline { * TokenClassificationPipeline class for executing a token classification task. * @extends Pipeline */ -class TokenClassificationPipeline extends Pipeline { +export class TokenClassificationPipeline extends Pipeline { /** * Executes the token classification task. * @param {any} texts - The input texts to be classified. @@ -211,7 +228,7 @@ class TokenClassificationPipeline extends Pipeline { * QuestionAnsweringPipeline class for executing a question answering task. * @extends Pipeline */ -class QuestionAnsweringPipeline extends Pipeline { +export class QuestionAnsweringPipeline extends Pipeline { /** * Executes the question answering task. * @param {string|string[]} question - The question(s) to be answered. @@ -275,7 +292,7 @@ class QuestionAnsweringPipeline extends Pipeline { * Class representing a fill-mask pipeline for natural language processing. * @extends Pipeline */ -class FillMaskPipeline extends Pipeline { +export class FillMaskPipeline extends Pipeline { /** * @param {any} texts */ @@ -328,12 +345,11 @@ class FillMaskPipeline extends Pipeline { * Text2TextGenerationPipeline class for generating text using a model that performs text-to-text generation tasks. * @extends Pipeline */ -class Text2TextGenerationPipeline extends Pipeline { +export class Text2TextGenerationPipeline extends Pipeline { _key = null; /** * Fill the masked token in the text(s) given as inputs. - * @async * @param {string|string[]} texts - The text or array of texts to be processed. * @param {Object} [options={}] - Options for the fill-mask pipeline. * @param {number} [options.topk=5] - The number of top-k predictions to return. @@ -399,7 +415,7 @@ class Text2TextGenerationPipeline extends Pipeline { * A pipeline for summarization tasks, inheriting from Text2TextGenerationPipeline. * @extends Text2TextGenerationPipeline */ -class SummarizationPipeline extends Text2TextGenerationPipeline { +export class SummarizationPipeline extends Text2TextGenerationPipeline { _key = 'summary_text'; } @@ -407,7 +423,7 @@ class SummarizationPipeline extends Text2TextGenerationPipeline { * TranslationPipeline class to translate text from one language to another using the provided model and tokenizer. * @extends Text2TextGenerationPipeline */ -class TranslationPipeline extends Text2TextGenerationPipeline { +export class TranslationPipeline extends Text2TextGenerationPipeline { _key = 'translation_text'; } @@ -415,10 +431,9 @@ class TranslationPipeline extends Text2TextGenerationPipeline { * A pipeline for generating text based on an input prompt. * @extends Pipeline */ -class TextGenerationPipeline extends Pipeline { +export class TextGenerationPipeline extends Pipeline { /** * Generates text based on an input prompt. - * @async * @param {any} texts - The input prompt or prompts to generate text from. * @param {object} [generate_kwargs={}] - Additional arguments for text generation. * @returns {Promise} - The generated text or texts. @@ -466,12 +481,12 @@ class TextGenerationPipeline extends Pipeline { * Class representing an Zero Shot Classification Pipeline that should only be used with zero shot classification tasks. * @extends Pipeline */ -class ZeroShotClassificationPipeline extends Pipeline { +export class ZeroShotClassificationPipeline extends Pipeline { /** * @param {string} task - * @param {any} tokenizer - * @param {any} model + * @param {PreTrainedTokenizer} tokenizer + * @param {PreTrainedModel} model */ constructor(task, tokenizer, model) { super(task, tokenizer, model); @@ -577,7 +592,7 @@ class ZeroShotClassificationPipeline extends Pipeline { * * @todo Make sure this works for other models than `sentence-transformers`. */ -class FeatureExtractionPipeline extends Pipeline { +export class FeatureExtractionPipeline extends Pipeline { /** * Private method to perform mean pooling of the last hidden state followed by a normalization step. * @param {Tensor} last_hidden_state - Tensor of shape [batchSize, seqLength, embedDim] @@ -669,14 +684,14 @@ class FeatureExtractionPipeline extends Pipeline { * A class representing an automatic speech recognition pipeline. * @extends Pipeline */ -class AutomaticSpeechRecognitionPipeline extends Pipeline { +export class AutomaticSpeechRecognitionPipeline extends Pipeline { /** * Creates an instance of AutomaticSpeechRecognitionPipeline. - * @param {string} task - The type of the task for this pipeline. Currently only "asr" is supported. - * @param {object} tokenizer - The tokenizer to be used for pre-processing inputs. - * @param {object} model - The model to be used for the task. - * @param {object} processor - The processor to be used for pre-processing audio inputs. + * @param {string} task - The type of the task for this pipeline. + * @param {PreTrainedTokenizer} tokenizer - The tokenizer to be used for pre-processing inputs. + * @param {PreTrainedModel} model - The model to be used for the task. + * @param {Processor} processor - The processor to be used for pre-processing audio inputs. */ constructor(task, tokenizer, model, processor) { super(task, tokenizer, model); @@ -692,52 +707,7 @@ class AutomaticSpeechRecognitionPipeline extends Pipeline { */ async _preprocess(audio, sampling_rate) { if (isString(audio)) { - // Attempting to load from path - - if (typeof AudioContext === 'undefined') { - // Running in node or an environment without AudioContext - throw Error( - "Unable to load audio from path/URL since `AudioContext` is not available in your environment. " + - "As a result, audio data must be passed directly to the processor. " + - "If you are running in node.js, you can use an external library (e.g., https://github.com/audiojs/web-audio-api) to do this." - ) - } - const response = await (await getFile(audio)).arrayBuffer(); - const audioCTX = new AudioContext({ sampleRate: sampling_rate }); - const decoded = await audioCTX.decodeAudioData(response); - - // We now replicate HuggingFace's `ffmpeg_read` method: - - if (decoded.numberOfChannels === 2) { - // When downmixing a stereo audio file to mono using the -ac 1 option in FFmpeg, - // the audio signal is summed across both channels to create a single mono channel. - // However, if the audio is at full scale (i.e. the highest possible volume level), - // the summing of the two channels can cause the audio signal to clip or distort. - - // To prevent this clipping, FFmpeg applies a scaling factor of 1/sqrt(2) (~ 0.707) - // to the audio signal before summing the two channels. This scaling factor ensures - // that the combined audio signal will not exceed the maximum possible level, even - // if both channels are at full scale. - - // After applying this scaling factor, the audio signal from both channels is summed - // to create a single mono channel. It's worth noting that this scaling factor is - // only applied when downmixing stereo audio to mono using the -ac 1 option in FFmpeg. - // If you're using a different downmixing method, or if you're not downmixing the - // audio at all, this scaling factor may not be needed. - const SCALING_FACTOR = Math.sqrt(2); - - let left = decoded.getChannelData(0); - let right = decoded.getChannelData(1); - - audio = new Float32Array(left.length); - for (let i = 0; i < decoded.length; i++) { - audio[i] = SCALING_FACTOR * (left[i] + right[i]) / 2; - } - - } else { - // If the audio is not stereo, we can just use the first channel: - audio = decoded.getChannelData(0); - } + audio = await read_audio(audio, sampling_rate); } return audio; @@ -854,13 +824,13 @@ class AutomaticSpeechRecognitionPipeline extends Pipeline { * A pipeline for performing image-to-text tasks. * @extends Pipeline */ -class ImageToTextPipeline extends Pipeline { +export class ImageToTextPipeline extends Pipeline { /** * Create an instance of ImageToTextPipeline. * @param {string} task - The task name. - * @param {object} tokenizer - The tokenizer to use. - * @param {object} model - The generator model to use. - * @param {object} processor - The image processor to use. + * @param {PreTrainedTokenizer} tokenizer - The tokenizer to use. + * @param {PreTrainedModel} model - The generator model to use. + * @param {Processor} processor - The image processor to use. */ constructor(task, tokenizer, model, processor) { super(task, tokenizer, model); @@ -897,12 +867,12 @@ class ImageToTextPipeline extends Pipeline { * A class representing an image classification pipeline. * @extends Pipeline */ -class ImageClassificationPipeline extends Pipeline { +export class ImageClassificationPipeline extends Pipeline { /** * Create a new ImageClassificationPipeline. * @param {string} task - The task of the pipeline. - * @param {Object} model - The model to use for classification. - * @param {Function} processor - The function to preprocess images. + * @param {PreTrainedModel} model - The model to use for classification. + * @param {Processor} processor - The function to preprocess images. */ constructor(task, model, processor) { super(task, null, model); // TODO tokenizer @@ -911,7 +881,6 @@ class ImageClassificationPipeline extends Pipeline { /** * Classify the given images. - * @async * @param {any} images - The images to classify. * @param {Object} options - The options to use for classification. * @param {number} [options.topk=1] - The number of top results to return. @@ -953,11 +922,11 @@ class ImageClassificationPipeline extends Pipeline { * ImageSegmentationPipeline class for executing an image-segmentation task. * @extends Pipeline */ -class ImageSegmentationPipeline extends Pipeline { +export class ImageSegmentationPipeline extends Pipeline { /** * Create a new ImageSegmentationPipeline. - * @param {string} task - The task of the pipeline. - * @param {Object} model - The model to use for classification. + * @param {PreTrainedTokenizer} task - The task of the pipeline. + * @param {PreTrainedModel} model - The model to use for classification. * @param {Processor} processor - The function to preprocess images. */ constructor(task, model, processor) { @@ -1067,14 +1036,14 @@ class ImageSegmentationPipeline extends Pipeline { * Class representing a zero-shot image classification pipeline. * @extends Pipeline */ -class ZeroShotImageClassificationPipeline extends Pipeline { +export class ZeroShotImageClassificationPipeline extends Pipeline { /** * Create a zero-shot image classification pipeline. * @param {string} task - The task of the pipeline. - * @param {Object} tokenizer - The tokenizer to use. - * @param {Object} model - The model to use. - * @param {Function} processor - The image processing function. + * @param {PreTrainedTokenizer} tokenizer - The tokenizer to use. + * @param {PreTrainedModel} model - The model to use. + * @param {Processor} processor - The image processing function. */ constructor(task, tokenizer, model, processor) { super(task, tokenizer, model); @@ -1129,11 +1098,11 @@ class ZeroShotImageClassificationPipeline extends Pipeline { } -class ObjectDetectionPipeline extends Pipeline { +export class ObjectDetectionPipeline extends Pipeline { /** - * @param {string} task - * @param {any} model - * @param {any} processor + * @param {PreTrainedTokenizer} task + * @param {PreTrainedModel} model + * @param {Processor} processor */ constructor(task, model, processor) { super(task, null, model); // TODO tokenizer @@ -1379,11 +1348,9 @@ const TASK_ALIASES = { */ /** - * Constructs a pipeline for a specified task with optional model and progress callback. + * Utility factory method to build a [`Pipeline`] object. * - * @async - * @function - * @param {string} task - The task to perform, e.g. "text-generation". + * @param {string} task - The task defining which pipeline will be returned. * @param {string} [model=null] - The name of the pre-trained model to use. If not specified, the default model for the task will be used. * @param {PretrainedOptions} [options] - Optional parameters for the pipeline. * @returns {Promise} A Pipeline object for the specified task. @@ -1461,6 +1428,7 @@ export async function pipeline( * Compute the Cartesian product of given arrays * @param {...Array} a - Arrays to compute the product * @returns {Array} - Returns the computed Cartesian product as an array + * @private */ function product(...a) { // Cartesian product of items diff --git a/src/processors.js b/src/processors.js index 24a1168..fee26b9 100644 --- a/src/processors.js +++ b/src/processors.js @@ -1,4 +1,24 @@ +/** + * @file Processors are used to prepare non-textual inputs (e.g., image or audio) for a model. + * + * **Example:** Using a `WhisperProcessor` to prepare an audio input for a model. + * ```javascript + * import { AutoProcessor, read_audio } from '@xenova/transformers'; + * + * let processor = await AutoProcessor.from_pretrained('openai/whisper-tiny.en'); + * let audio = await read_audio('https://huggingface.co/datasets/Narsil/asr_dummy/resolve/main/mlk.flac'); + * let { input_features } = await processor(audio); + * // Tensor { + * // data: Float32Array(240000) [0.4752984642982483, 0.5597258806228638, 0.56434166431427, ...], + * // dims: [1, 80, 3000], + * // type: 'float32', + * // size: 240000, + * // } + * ``` + * + * @module processors + */ import { Callable, } from './utils.js'; @@ -24,7 +44,7 @@ import { CustomImage } from './image_utils.js'; * * @extends Callable */ -class FeatureExtractor extends Callable { +export class FeatureExtractor extends Callable { /** * Constructs a new FeatureExtractor instance. * @@ -41,7 +61,7 @@ class FeatureExtractor extends Callable { * * @extends FeatureExtractor */ -class ImageFeatureExtractor extends FeatureExtractor { +export class ImageFeatureExtractor extends FeatureExtractor { /** * Constructs a new ViTFeatureExtractor instance. @@ -221,14 +241,14 @@ class ImageFeatureExtractor extends FeatureExtractor { } -class ViTFeatureExtractor extends ImageFeatureExtractor { } +export class ViTFeatureExtractor extends ImageFeatureExtractor { } /** * Detr Feature Extractor. * * @extends ImageFeatureExtractor */ -class DetrFeatureExtractor extends ImageFeatureExtractor { +export class DetrFeatureExtractor extends ImageFeatureExtractor { /** * Calls the feature extraction process on an array of image * URLs, preprocesses each image, and concatenates the resulting @@ -605,7 +625,7 @@ class DetrFeatureExtractor extends ImageFeatureExtractor { } -class WhisperFeatureExtractor extends FeatureExtractor { +export class WhisperFeatureExtractor extends FeatureExtractor { /** * Calculates the index offset for a given index and window size. @@ -921,7 +941,6 @@ class WhisperFeatureExtractor extends FeatureExtractor { * Asynchronously extracts features from a given audio using the provided configuration. * @param {Float32Array} audio - The audio data as a Float32Array. * @returns {Promise<{ input_features: Tensor }>} - A Promise resolving to an object containing the extracted input features as a Tensor. - * @async */ async _call(audio) { // audio is a float32array @@ -933,7 +952,7 @@ class WhisperFeatureExtractor extends FeatureExtractor { "remember to specify `chunk_length_s` and/or `stride_length_s`." ); } - let waveform = audio.slice(0, this.config.n_samples) + let waveform = audio.slice(0, this.config.n_samples); let features = this._extract_fbank_features(waveform); @@ -965,7 +984,6 @@ export class Processor extends Callable { * Calls the feature_extractor function with the given input. * @param {any} input - The input to extract features from. * @returns {Promise} A Promise that resolves with the extracted features. - * @async */ async _call(input) { return await this.feature_extractor(input); @@ -976,12 +994,11 @@ export class Processor extends Callable { * Represents a WhisperProcessor that extracts features from an audio input. * @extends Processor */ -class WhisperProcessor extends Processor { +export class WhisperProcessor extends Processor { /** * Calls the feature_extractor function with the given audio input. * @param {any} audio - The audio input to extract features from. * @returns {Promise} A Promise that resolves with the extracted features. - * @async */ async _call(audio) { return await this.feature_extractor(audio) diff --git a/src/samplers.js b/src/samplers.js index 0c28a73..feb4774 100644 --- a/src/samplers.js +++ b/src/samplers.js @@ -1,3 +1,12 @@ +/** + * @file Helper module sampling from a model that can generate (i.e., has `.generate()`). + * + * These classes are only used internally, meaning an end-user + * shouldn't need to access anything here. + * + * @module samplers + */ + import { Callable, } from "./utils.js"; diff --git a/src/tensor_utils.js b/src/tensor_utils.js index a0b7e26..f07a968 100644 --- a/src/tensor_utils.js +++ b/src/tensor_utils.js @@ -1,7 +1,16 @@ +/** + * @file Helper module for `Tensor` processing. + * + * These functions and classes are only used internally, + * meaning an end-user shouldn't need to access anything here. + * + * @module tensor_utils + */ + import { ONNX } from './backends/onnx.js'; import { - interpolate as interpolate_data, + interpolate_data, transpose_data } from './math_utils.js'; @@ -12,7 +21,6 @@ import { const ONNXTensor = ONNX.Tensor; -// TODO: fix error below export class Tensor extends ONNXTensor { /** * Create a new Tensor or copy an existing Tensor. diff --git a/src/tokenizers.js b/src/tokenizers.js index ee1aa19..0c918b0 100644 --- a/src/tokenizers.js +++ b/src/tokenizers.js @@ -1,3 +1,25 @@ + +/** + * @file Tokenizers are used to prepare textual inputs for a model. + * + * **Example:** Create an `AutoTokenizer` and use it to tokenize a sentence. + * This will automatically detect the tokenizer type based on the tokenizer class defined in `tokenizer.json`. + * ```javascript + * import { AutoTokenizer } from '@xenova/transformers'; + * + * let tokenizer = await AutoTokenizer.from_pretrained('bert-base-uncased'); + * let { input_ids } = await tokenizer('I love transformers!'); + * // Tensor { + * // data: BigInt64Array(6) [101n, 1045n, 2293n, 19081n, 999n, 102n], + * // dims: [1, 6], + * // type: 'int64', + * // size: 6, + * // } + * ``` + * + * @module tokenizers + */ + import { Callable, reverseDictionary, @@ -54,7 +76,7 @@ function createPattern(pattern) { * * @extends Callable */ -class TokenizerModel extends Callable { +export class TokenizerModel extends Callable { /** * Creates a new instance of TokenizerModel. * @param {object} config - The configuration object for the TokenizerModel. @@ -1474,7 +1496,7 @@ class WhitespaceSplit extends PreTokenizer { } } -class PreTrainedTokenizer extends Callable { +export class PreTrainedTokenizer extends Callable { /** * Create a new PreTrainedTokenizer instance. * @param {Object} tokenizerJSON - The JSON of the tokenizer. @@ -1996,9 +2018,9 @@ export class GPT2Tokenizer extends PreTrainedTokenizer { } export class BartTokenizer extends PreTrainedTokenizer { } export class RobertaTokenizer extends PreTrainedTokenizer { } -class BloomTokenizer extends PreTrainedTokenizer { } +export class BloomTokenizer extends PreTrainedTokenizer { } -class NllbTokenizer extends PreTrainedTokenizer { +export class NllbTokenizer extends PreTrainedTokenizer { constructor(tokenizerJSON, tokenizerConfig) { super(tokenizerJSON, tokenizerConfig); diff --git a/src/transformers.js b/src/transformers.js index 5fac75a..c36449a 100644 --- a/src/transformers.js +++ b/src/transformers.js @@ -1,41 +1,25 @@ -// Tokenizers -export { - AutoTokenizer, - BertTokenizer, - DistilBertTokenizer, - T5Tokenizer, - GPT2Tokenizer -} from './tokenizers.js'; +/** + * @file Entry point for the Transformers.js library. Only the exports from this file + * are available to the end user, and are grouped as follows: + * + * 1. Pipelines + * 2. Environment variables + * 3. Models + * 4. Tokenizers + * 5. Processors + * + * @todo Add a link to the documentation for each export. + * + * @module transformers + */ -// Models -export { - // Auto classes - AutoModel, - AutoModelForSequenceClassification, - AutoModelForTokenClassification, - AutoModelForSeq2SeqLM, - AutoModelForCausalLM, - AutoModelForMaskedLM, - AutoModelForQuestionAnswering, - AutoModelForVision2Seq, - AutoModelForImageClassification, - AutoModelForObjectDetection, - - // Other - PreTrainedModel, -} from './models.js'; +export * from './pipelines.js'; +export * from './env.js'; +export * from './models.js'; +export * from './tokenizers.js'; +export * from './processors.js'; -// Processors -export { - AutoProcessor -} from './processors.js'; - -// environment variables -export { env } from './env.js'; - -// other -export { - pipeline -} from './pipelines.js'; -export { Tensor } from './tensor_utils.js'; +export * from './audio_utils.js'; +export * from './tensor_utils.js'; +export * from './math_utils.js'; diff --git a/src/utils.js b/src/utils.js index f6997b9..15964f2 100644 --- a/src/utils.js +++ b/src/utils.js @@ -1,4 +1,12 @@ +/** + * @file Utility functions/classes for Transformers.js. + * + * These are only used internally, meaning an end-user shouldn't + * need to access anything here. + * + * @module utils + */ /** * Helper function to dispatch progress callbacks. @@ -6,6 +14,7 @@ * @param {function} progress_callback - The progress callback function to dispatch. * @param {any} data - The data to pass to the progress callback function. * @returns {void} + * @private */ export function dispatchCallback(progress_callback, data) { if (progress_callback !== null) progress_callback(data); @@ -77,6 +86,19 @@ export function isString(text) { return typeof text === 'string' || text instanceof String } + +/** + * Check if a value is a typed array. + * @param {*} text - The value to check. + * @returns {boolean} - True if the value is a `TypedArray`, false otherwise. + * + * Adapted from https://stackoverflow.com/a/71091338/13989043 + */ +export function isTypedArray(val) { + return val?.prototype?.__proto__?.constructor?.name === 'TypedArray'; +} + + /** * Check if a value is an integer. * @param {*} x - The value to check. diff --git a/src/utils/hub.js b/src/utils/hub.js index 3210faf..29cb98e 100644 --- a/src/utils/hub.js +++ b/src/utils/hub.js @@ -1,6 +1,10 @@ -// Utility functions to interact with the Hugging Face Hub (https://huggingface.co/models) -// const path = require('file-system-cache'); +/** + * @file Utility functions to interact with the Hugging Face Hub (https://huggingface.co/models) + * + * @module utils/hub + */ + import { env } from '../env.js'; import fs from 'fs';