Update JSDocs

This commit is contained in:
Joshua Lochner 2023-05-06 04:15:12 +02:00
parent 91b91e1e22
commit aa910b4098
15 changed files with 477 additions and 272 deletions

63
src/audio_utils.js Normal file
View File

@ -0,0 +1,63 @@
/**
* @file Helper module for audio processing.
*
* These functions and classes are only used internally,
* meaning an end-user shouldn't need to access anything here.
*
* @module audio_utils
*/
import {
getFile,
} from './utils/hub.js';
export async function read_audio(url, sampling_rate) {
// Attempting to load from path/url
if (typeof AudioContext === 'undefined') {
// Running in node or an environment without AudioContext
throw Error(
"Unable to load audio from path/URL since `AudioContext` is not available in your environment. " +
"As a result, audio data must be passed directly to the processor. " +
"If you are running in node.js, you can use an external library (e.g., https://github.com/audiojs/web-audio-api) to do this."
)
}
const response = await (await getFile(url)).arrayBuffer();
const audioCTX = new AudioContext({ sampleRate: sampling_rate });
const decoded = await audioCTX.decodeAudioData(response);
let audio;
// We now replicate HuggingFace's `ffmpeg_read` method:
if (decoded.numberOfChannels === 2) {
// When downmixing a stereo audio file to mono using the -ac 1 option in FFmpeg,
// the audio signal is summed across both channels to create a single mono channel.
// However, if the audio is at full scale (i.e. the highest possible volume level),
// the summing of the two channels can cause the audio signal to clip or distort.
// To prevent this clipping, FFmpeg applies a scaling factor of 1/sqrt(2) (~ 0.707)
// to the audio signal before summing the two channels. This scaling factor ensures
// that the combined audio signal will not exceed the maximum possible level, even
// if both channels are at full scale.
// After applying this scaling factor, the audio signal from both channels is summed
// to create a single mono channel. It's worth noting that this scaling factor is
// only applied when downmixing stereo audio to mono using the -ac 1 option in FFmpeg.
// If you're using a different downmixing method, or if you're not downmixing the
// audio at all, this scaling factor may not be needed.
const SCALING_FACTOR = Math.sqrt(2);
let left = decoded.getChannelData(0);
let right = decoded.getChannelData(1);
audio = new Float32Array(left.length);
for (let i = 0; i < decoded.length; ++i) {
audio[i] = SCALING_FACTOR * (left[i] + right[i]) / 2;
}
} else {
// If the audio is not stereo, we can just use the first channel:
audio = decoded.getChannelData(0);
}
return audio;
}

View File

@ -1,13 +1,20 @@
// Handler file for choosing the correct version of ONNX Runtime, based on the environment.
//
// Ideally, we could import the `onnxruntime-web` and `onnxruntime-node` packages only when needed,
// but dynamic imports don't seem to work with the current webpack version and/or configuration.
// This is possibly due to the experimental nature of top-level await statements.
//
// So, we just import both packages, and use the appropriate one based on the environment.
// - When running in node, we use `onnxruntime-node`.
// - When running in the browser, we use `onnxruntime-web` (`onnxruntime-node` is not bundled).
/**
* @file Handler file for choosing the correct version of ONNX Runtime, based on the environment.
* Ideally, we could import the `onnxruntime-web` and `onnxruntime-node` packages only when needed,
* but dynamic imports don't seem to work with the current webpack version and/or configuration.
* This is possibly due to the experimental nature of top-level await statements.
* So, we just import both packages, and use the appropriate one based on the environment.
* - When running in node, we use `onnxruntime-node`.
* - When running in the browser, we use `onnxruntime-web` (`onnxruntime-node` is not bundled).
*
* This module is not directly exported, but can be accessed through the environment variables:
* ```javascript
* import { env } from '@xenova/transformers';
* console.log(env.backends.onnx);
* ```
*
* @module backends/onnx
*/
// NOTE: Import order matters here. We need to import `onnxruntime-node` before `onnxruntime-web`.
import ONNX_NODE from 'onnxruntime-node';
@ -15,7 +22,10 @@ import ONNX_WEB from 'onnxruntime-web';
export let ONNX;
export const executionProviders = ['wasm'];
export const executionProviders = [
// 'webgpu',
'wasm'
];
if (typeof process !== 'undefined') {
// Running in a node-like environment.

View File

@ -1,3 +1,27 @@
/**
* @file Module used to configure Transformers.js. For the full list of possible options, @see {@link env}.
*
* **Example:** Disable remote models.
* ```javascript
* import { env } from '@xenova/transformers';
* env.allowRemoteModels = false;
* ```
*
* **Example:** Set local model path.
* ```javascript
* import { env } from '@xenova/transformers';
* env.localModelPath = '/path/to/local/models/';
* ```
*
* **Example:** Set cache directory.
* ```javascript
* import { env } from '@xenova/transformers';
* env.cacheDir = '/path/to/cache/directory/';
* ```
*
* @module env
*/
import fs from 'fs';
import path from 'path';
import url from 'url';
@ -37,11 +61,23 @@ onnx_env.wasm.wasmPaths = RUNNING_LOCALLY
: 'https://cdn.jsdelivr.net/npm/@xenova/transformers/dist/';
// Global variable used to control exection, with suitable defaults
/**
* Global variable used to control exection, with suitable defaults
* @property {object} backends Expose environment variables of different backends,
* allowing users to set these variables if they want to.
* @property {string} remoteHost Host URL to load models from. Defaults to the Hugging Face Hub.
* @property {string} remotePathTemplate Path template to fill in and append to `remoteHost` when loading models.
* @property {boolean} allowRemoteModels Whether to allow loading of remote files, defaults to `true`.
* If set to `false`, it will have the same effect as setting `local_files_only=true` when loading pipelines, models, tokenizers, processors, etc.
* @property {string} localModelPath Path to load local models from. Defaults to `/models/`.
* @property {boolean} useFS Whether to use the file system to load files. By default, it is true available.
* @property {string} __dirname Directory name of module. Useful for resolving local paths.
* @property {boolean} useBrowserCache Whether to use Cache API to cache models. By default, it is true if available.
* @property {boolean} useFSCache Whether to use the file system to cache files. By default, it is true available.
* @property {string} cacheDir The directory to use for caching files with the file system. By default, it is `./.cache`.
*/
export const env = {
// Expose environment variables of different backends, allowing users to set
// these variables if they want to.
// TODO - will be used when we add more backends
/////////////////// Backends settings ///////////////////
backends: {
// onnxruntime-web/onnxruntime-node
onnx: onnx_env,
@ -50,40 +86,27 @@ export const env = {
tfjs: {},
},
// URL to load models from. Defaults to the Hugging Face Hub.
remoteHost: 'https://huggingface.co/',
remotePathTemplate: '{model}/resolve/{revision}/',
// Whether to allow loading of remote files, defaults to `true`.
// If set to `false`, it will have the same effect as setting `local_files_only=true`
// when loading pipelines, models, tokenizers, processors, etc.
allowRemoteModels: true,
// Local URL to load models from.
localModelPath: localModelPath,
// Whether to use the file system to load files. By default, it is true available.
useFS: FS_AVAILABLE,
// Directory name of module. Useful for resolving local paths.
__dirname,
/////////////////// Model settings ///////////////////
remoteHost: 'https://huggingface.co/',
remotePathTemplate: '{model}/resolve/{revision}/',
allowRemoteModels: true,
localModelPath: localModelPath,
useFS: FS_AVAILABLE,
/////////////////// Cache settings ///////////////////
// Whether to use Cache API to cache models. By default, it is true if available.
useBrowserCache: WEB_CACHE_AVAILABLE,
// Whether to use the file system to cache files. By default, it is true available.
useFSCache: FS_AVAILABLE,
// The directory to use for caching files with the file system. By default, it is `./.cache`.
cacheDir: DEFAULT_CACHE_DIR,
//////////////////////////////////////////////////////
//////////////////////////////////////////////////////
}
/**
* @param {object} obj
* @private
*/
function isEmpty(obj) {
return Object.keys(obj).length === 0;

View File

@ -1,3 +1,11 @@
/**
* @file Classes, functions, and utilities for generation.
*
* @todo Describe how to create a custom `GenerationConfig`.
*
* @module generation
*/
import { Tensor } from './tensor_utils.js';
import {
Callable,

View File

@ -1,4 +1,13 @@
/**
* @file Helper module for image processing.
*
* These functions and classes are only used internally,
* meaning an end-user shouldn't need to access anything here.
*
* @module image_utils
*/
import fs from 'fs';
import { isString } from './utils.js';
import { env } from './env.js';

View File

@ -1,4 +1,13 @@
/**
* @file Helper module for mathematical processing.
*
* These functions and classes are only used internally,
* meaning an end-user shouldn't need to access anything here.
*
* @module math_utils
*/
/**
* @typedef {Int8Array | Uint8Array | Uint8ClampedArray | Int16Array | Uint16Array | Int32Array | Uint32Array | Float32Array | Float64Array} TypedArray
* @typedef {BigInt64Array | BigUint64Array} BigTypedArray
@ -8,7 +17,7 @@
/**
* @param {TypedArray} input
*/
export function interpolate(input, [in_channels, in_height, in_width], [out_height, out_width], mode = 'bilinear', align_corners = false) {
export function interpolate_data(input, [in_channels, in_height, in_width], [out_height, out_width], mode = 'bilinear', align_corners = false) {
// TODO use mode and align_corners
// Output image dimensions
@ -79,7 +88,7 @@ export function interpolate(input, [in_channels, in_height, in_width], [out_heig
/**
* Helper method to transpose a AnyTypedArray directly
* Helper method to transpose a `AnyTypedArray` directly
* @param {T} array
* @template {AnyTypedArray} T
* @param {number[]} dims

View File

@ -1,6 +1,47 @@
/**
* @file Definitions of all models available in Transformers.js.
*
* **Example:** Load and run an `AutoModel`.
*
* ```javascript
* import { AutoModel, AutoTokenizer } from '@xenova/transformers';
*
* let tokenizer = await AutoTokenizer.from_pretrained('Xenova/bert-base-uncased');
* let model = await AutoModel.from_pretrained('Xenova/bert-base-uncased');
*
* let inputs = await tokenizer('I love transformers!');
* let { logits } = await model(inputs);
* // Tensor {
* // data: Float32Array(183132) [-7.117443084716797, -7.107812881469727, -7.092104911804199, ...]
* // dims: (3) [1, 6, 30522],
* // type: "float32",
* // size: 183132,
* // }
* ```
*
* We also provide other `AutoModel`s (listed below), which you can use in the same way as the Python library. For example:
*
* **Example:** Load and run a `AutoModelForSeq2SeqLM`.
* ```javascript
* import { AutoModelForSeq2SeqLM, AutoTokenizer } from '@xenova/transformers';
*
* let tokenizer = await AutoTokenizer.from_pretrained('t5-small');
* let model = await AutoModelForSeq2SeqLM.from_pretrained('t5-small');
*
* let { input_ids } = await tokenizer('translate English to German: I love transformers!');
* let outputs = await model.generate(input_ids);
* let decoded = await tokenizer.decode(outputs[0][0], { skip_special_tokens: true });
* // 'Ich liebe Transformatoren!'
* ```
*
* @module models
*/
import {
Callable,
isIntegralNumber,
isTypedArray,
} from './utils.js';
import {
@ -34,6 +75,7 @@ const { InferenceSession, Tensor: ONNXTensor } = ONNX;
/**
* @typedef {import('./utils/hub.js').PretrainedOptions} PretrainedOptions
*/
//////////////////////////////////////////////////
// Helper functions
/**
@ -741,6 +783,10 @@ export class PreTrainedModel extends Callable {
} = {},
) {
if (!(inputs instanceof Tensor) && !isTypedArray(inputs) && !Array.isArray(inputs)) {
throw Error(`\`inputs\` must be a Tensor, TypedArray, or Array, but is "${inputs.constructor.name}".`);
}
if (inputs.length === 0) {
throw Error("Must supply a non-empty array of input token ids.")
}
@ -796,7 +842,7 @@ export class PreTrainedModel extends Callable {
extractedLogits.push(lastLogits)
}
let logits = cat(extractedLogits);
logits_processor(beam.output_token_ids, logits)
logits_processor(beam.output_token_ids, logits);
let sampledTokens = sampler(logits);
for (let [newTokenId, logProb] of sampledTokens) {
@ -819,8 +865,8 @@ export class PreTrainedModel extends Callable {
// Next, we get the best beams, per ID
newest_beams = this.groupBeams(newest_beams).map(
group => group
.sort((a, b) => b.score - a.score) // sort based on score
.slice(0, generation_config.num_beams) // remove outside beam width
.sort((a, b) => b.score - a.score) // sort based on score
.slice(0, generation_config.num_beams) // remove outside beam width
);
// Flatten beams
@ -832,6 +878,8 @@ export class PreTrainedModel extends Callable {
}
}
// TODO - Ensure that we can return non-batched outputs
return this.groupBeams(beams).map(
batch => {
if (generation_config.num_return_sequences > 1) {
@ -929,19 +977,19 @@ export class PreTrainedModel extends Callable {
}
//////////////////////////////////////////////////
// Base model output class
class ModelOutput { }
export class ModelOutput { }
//////////////////////////////////////////////////
// Bert models
class BertPreTrainedModel extends PreTrainedModel { }
class BertModel extends BertPreTrainedModel { }
export class BertPreTrainedModel extends PreTrainedModel { }
export class BertModel extends BertPreTrainedModel { }
/**
* BertForMaskedLM is a class representing a BERT model for masked language modeling.
* @extends BertPreTrainedModel
*/
class BertForMaskedLM extends BertPreTrainedModel {
export class BertForMaskedLM extends BertPreTrainedModel {
/**
* Calls the model on new inputs.
*
@ -958,7 +1006,7 @@ class BertForMaskedLM extends BertPreTrainedModel {
* BertForSequenceClassification is a class representing a BERT model for sequence classification.
* @extends BertPreTrainedModel
*/
class BertForSequenceClassification extends BertPreTrainedModel {
export class BertForSequenceClassification extends BertPreTrainedModel {
/**
* Calls the model on new inputs.
*
@ -975,7 +1023,7 @@ class BertForSequenceClassification extends BertPreTrainedModel {
* BertForTokenClassification is a class representing a BERT model for token classification.
* @extends BertPreTrainedModel
*/
class BertForTokenClassification extends BertPreTrainedModel {
export class BertForTokenClassification extends BertPreTrainedModel {
/**
* Calls the model on new inputs.
*
@ -992,7 +1040,7 @@ class BertForTokenClassification extends BertPreTrainedModel {
* BertForQuestionAnswering is a class representing a BERT model for question answering.
* @extends BertPreTrainedModel
*/
class BertForQuestionAnswering extends BertPreTrainedModel {
export class BertForQuestionAnswering extends BertPreTrainedModel {
/**
* Calls the model on new inputs.
*
@ -1008,14 +1056,14 @@ class BertForQuestionAnswering extends BertPreTrainedModel {
//////////////////////////////////////////////////
// DistilBert models
class DistilBertPreTrainedModel extends PreTrainedModel { }
class DistilBertModel extends DistilBertPreTrainedModel { }
export class DistilBertPreTrainedModel extends PreTrainedModel { }
export class DistilBertModel extends DistilBertPreTrainedModel { }
/**
* DistilBertForSequenceClassification is a class representing a DistilBERT model for sequence classification.
* @extends DistilBertPreTrainedModel
*/
class DistilBertForSequenceClassification extends DistilBertPreTrainedModel {
export class DistilBertForSequenceClassification extends DistilBertPreTrainedModel {
/**
* Calls the model on new inputs.
*
@ -1032,7 +1080,7 @@ class DistilBertForSequenceClassification extends DistilBertPreTrainedModel {
* DistilBertForTokenClassification is a class representing a DistilBERT model for token classification.
* @extends DistilBertPreTrainedModel
*/
class DistilBertForTokenClassification extends DistilBertPreTrainedModel {
export class DistilBertForTokenClassification extends DistilBertPreTrainedModel {
/**
* Calls the model on new inputs.
*
@ -1050,7 +1098,7 @@ class DistilBertForTokenClassification extends DistilBertPreTrainedModel {
* DistilBertForQuestionAnswering is a class representing a DistilBERT model for question answering.
* @extends DistilBertPreTrainedModel
*/
class DistilBertForQuestionAnswering extends DistilBertPreTrainedModel {
export class DistilBertForQuestionAnswering extends DistilBertPreTrainedModel {
/**
* Calls the model on new inputs.
*
@ -1067,7 +1115,7 @@ class DistilBertForQuestionAnswering extends DistilBertPreTrainedModel {
* DistilBertForMaskedLM is a class representing a DistilBERT model for masking task.
* @extends DistilBertPreTrainedModel
*/
class DistilBertForMaskedLM extends DistilBertPreTrainedModel {
export class DistilBertForMaskedLM extends DistilBertPreTrainedModel {
/**
* Calls the model on new inputs.
*
@ -1084,14 +1132,14 @@ class DistilBertForMaskedLM extends DistilBertPreTrainedModel {
//////////////////////////////////////////////////
// MobileBert models
class MobileBertPreTrainedModel extends PreTrainedModel { }
class MobileBertModel extends MobileBertPreTrainedModel { }
export class MobileBertPreTrainedModel extends PreTrainedModel { }
export class MobileBertModel extends MobileBertPreTrainedModel { }
/**
* MobileBertForMaskedLM is a class representing a MobileBERT model for masking task.
* @extends MobileBertPreTrainedModel
*/
class MobileBertForMaskedLM extends MobileBertPreTrainedModel {
export class MobileBertForMaskedLM extends MobileBertPreTrainedModel {
/**
* Calls the model on new inputs.
*
@ -1107,7 +1155,7 @@ class MobileBertForMaskedLM extends MobileBertPreTrainedModel {
/**
* @extends MobileBertPreTrainedModel
*/
class MobileBertForSequenceClassification extends MobileBertPreTrainedModel {
export class MobileBertForSequenceClassification extends MobileBertPreTrainedModel {
/**
* Calls the model on new inputs.
*
@ -1123,7 +1171,7 @@ class MobileBertForSequenceClassification extends MobileBertPreTrainedModel {
/**
* @extends MobileBertPreTrainedModel
*/
class MobileBertForQuestionAnswering extends MobileBertPreTrainedModel {
export class MobileBertForQuestionAnswering extends MobileBertPreTrainedModel {
/**
* Calls the model on new inputs.
*
@ -1140,9 +1188,9 @@ class MobileBertForQuestionAnswering extends MobileBertPreTrainedModel {
//////////////////////////////////////////////////
// SqueezeBert models
class SqueezeBertPreTrainedModel extends PreTrainedModel { }
class SqueezeBertModel extends SqueezeBertPreTrainedModel { }
class SqueezeBertForMaskedLM extends SqueezeBertPreTrainedModel {
export class SqueezeBertPreTrainedModel extends PreTrainedModel { }
export class SqueezeBertModel extends SqueezeBertPreTrainedModel { }
export class SqueezeBertForMaskedLM extends SqueezeBertPreTrainedModel {
/**
* Calls the model on new inputs.
*
@ -1154,7 +1202,7 @@ class SqueezeBertForMaskedLM extends SqueezeBertPreTrainedModel {
return new MaskedLMOutput(logits)
}
}
class SqueezeBertForSequenceClassification extends SqueezeBertPreTrainedModel {
export class SqueezeBertForSequenceClassification extends SqueezeBertPreTrainedModel {
/**
* Calls the model on new inputs.
*
@ -1166,7 +1214,7 @@ class SqueezeBertForSequenceClassification extends SqueezeBertPreTrainedModel {
return new SequenceClassifierOutput(logits)
}
}
class SqueezeBertForQuestionAnswering extends SqueezeBertPreTrainedModel {
export class SqueezeBertForQuestionAnswering extends SqueezeBertPreTrainedModel {
/**
* Calls the model on new inputs.
*
@ -1183,9 +1231,9 @@ class SqueezeBertForQuestionAnswering extends SqueezeBertPreTrainedModel {
//////////////////////////////////////////////////
// Albert models
class AlbertPreTrainedModel extends PreTrainedModel { }
class AlbertModel extends AlbertPreTrainedModel { }
class AlbertForSequenceClassification extends AlbertPreTrainedModel {
export class AlbertPreTrainedModel extends PreTrainedModel { }
export class AlbertModel extends AlbertPreTrainedModel { }
export class AlbertForSequenceClassification extends AlbertPreTrainedModel {
/**
* Calls the model on new inputs.
*
@ -1197,7 +1245,7 @@ class AlbertForSequenceClassification extends AlbertPreTrainedModel {
return new SequenceClassifierOutput(logits)
}
}
class AlbertForQuestionAnswering extends AlbertPreTrainedModel {
export class AlbertForQuestionAnswering extends AlbertPreTrainedModel {
/**
* Calls the model on new inputs.
*
@ -1209,7 +1257,7 @@ class AlbertForQuestionAnswering extends AlbertPreTrainedModel {
return new QuestionAnsweringModelOutput(outputs.start_logits, outputs.end_logits);
}
}
class AlbertForMaskedLM extends AlbertPreTrainedModel {
export class AlbertForMaskedLM extends AlbertPreTrainedModel {
/**
* Calls the model on new inputs.
*
@ -1226,9 +1274,9 @@ class AlbertForMaskedLM extends AlbertPreTrainedModel {
//////////////////////////////////////////////////
// T5 models
class T5PreTrainedModel extends PreTrainedModel { };
export class T5PreTrainedModel extends PreTrainedModel { };
class T5Model extends T5PreTrainedModel {
export class T5Model extends T5PreTrainedModel {
/**
* Generates text based on the provided arguments.
* @throws {Error} - Throws an error as the current model class (T5Model) is not compatible with `.generate()`.
@ -1246,7 +1294,7 @@ class T5Model extends T5PreTrainedModel {
* T5Model is a class representing a T5 model for conditional generation.
* @extends T5PreTrainedModel
*/
class T5ForConditionalGeneration extends T5PreTrainedModel {
export class T5ForConditionalGeneration extends T5PreTrainedModel {
/**
* Creates a new instance of the `T5ForConditionalGeneration` class.
* @param {object} config - The model configuration.
@ -1336,9 +1384,9 @@ class T5ForConditionalGeneration extends T5PreTrainedModel {
//////////////////////////////////////////////////
// MT5 models
class MT5PreTrainedModel extends PreTrainedModel { };
export class MT5PreTrainedModel extends PreTrainedModel { };
class MT5Model extends MT5PreTrainedModel {
export class MT5Model extends MT5PreTrainedModel {
/**
*
* @param {...any} args
@ -1357,7 +1405,7 @@ class MT5Model extends MT5PreTrainedModel {
*
* @extends MT5PreTrainedModel
*/
class MT5ForConditionalGeneration extends MT5PreTrainedModel {
export class MT5ForConditionalGeneration extends MT5PreTrainedModel {
/**
* Creates a new instance of the `MT5ForConditionalGeneration` class.
* @param {any} config - The model configuration.
@ -1449,7 +1497,7 @@ class MT5ForConditionalGeneration extends MT5PreTrainedModel {
//////////////////////////////////////////////////
// Bart models
class BartPretrainedModel extends PreTrainedModel { };
export class BartPretrainedModel extends PreTrainedModel { };
/**
* BART encoder and decoder model.
@ -1457,7 +1505,7 @@ class BartPretrainedModel extends PreTrainedModel { };
* @hideconstructor
* @extends BartPretrainedModel
*/
class BartModel extends BartPretrainedModel {
export class BartModel extends BartPretrainedModel {
/**
* Throws an error because the current model class (BartModel) is not compatible with `.generate()`.
*
@ -1475,7 +1523,7 @@ class BartModel extends BartPretrainedModel {
* BART model with a language model head for conditional generation.
* @extends BartPretrainedModel
*/
class BartForConditionalGeneration extends BartPretrainedModel {
export class BartForConditionalGeneration extends BartPretrainedModel {
/**
* Creates a new instance of the `BartForConditionalGeneration` class.
* @param {object} config - The configuration object for the Bart model.
@ -1562,7 +1610,7 @@ class BartForConditionalGeneration extends BartPretrainedModel {
}
}
class BartForSequenceClassification extends BartPretrainedModel {
export class BartForSequenceClassification extends BartPretrainedModel {
/**
* Calls the model on new inputs.
*
@ -1579,14 +1627,14 @@ class BartForSequenceClassification extends BartPretrainedModel {
//////////////////////////////////////////////////
// Roberta models
class RobertaPreTrainedModel extends PreTrainedModel { }
class RobertaModel extends RobertaPreTrainedModel { }
export class RobertaPreTrainedModel extends PreTrainedModel { }
export class RobertaModel extends RobertaPreTrainedModel { }
/**
* RobertaForMaskedLM class for performing masked language modeling on Roberta models.
* @extends RobertaPreTrainedModel
*/
class RobertaForMaskedLM extends RobertaPreTrainedModel {
export class RobertaForMaskedLM extends RobertaPreTrainedModel {
/**
* Calls the model on new inputs.
*
@ -1603,7 +1651,7 @@ class RobertaForMaskedLM extends RobertaPreTrainedModel {
* RobertaForSequenceClassification class for performing sequence classification on Roberta models.
* @extends RobertaPreTrainedModel
*/
class RobertaForSequenceClassification extends RobertaPreTrainedModel {
export class RobertaForSequenceClassification extends RobertaPreTrainedModel {
/**
* Calls the model on new inputs.
*
@ -1620,7 +1668,7 @@ class RobertaForSequenceClassification extends RobertaPreTrainedModel {
* RobertaForQuestionAnswering class for performing question answering on Roberta models.
* @extends RobertaPreTrainedModel
*/
class RobertaForQuestionAnswering extends RobertaPreTrainedModel {
export class RobertaForQuestionAnswering extends RobertaPreTrainedModel {
/**
* Calls the model on new inputs.
*
@ -1636,13 +1684,13 @@ class RobertaForQuestionAnswering extends RobertaPreTrainedModel {
//////////////////////////////////////////////////
// T5 models
class WhisperPreTrainedModel extends PreTrainedModel { };
export class WhisperPreTrainedModel extends PreTrainedModel { };
/**
* WhisperModel class for training Whisper models without a language model head.
* @extends WhisperPreTrainedModel
*/
class WhisperModel extends WhisperPreTrainedModel {
export class WhisperModel extends WhisperPreTrainedModel {
/**
* Throws an error when attempting to generate output since this model doesn't have a language model head.
* @throws Error
@ -1660,7 +1708,7 @@ class WhisperModel extends WhisperPreTrainedModel {
* WhisperForConditionalGeneration class for generating conditional outputs from Whisper models.
* @extends WhisperPreTrainedModel
*/
class WhisperForConditionalGeneration extends WhisperPreTrainedModel {
export class WhisperForConditionalGeneration extends WhisperPreTrainedModel {
/**
* Creates a new instance of the `WhisperForConditionalGeneration` class.
* @param {Object} config - Configuration object for the model.
@ -1788,7 +1836,7 @@ class WhisperForConditionalGeneration extends WhisperPreTrainedModel {
* Vision Encoder-Decoder model based on OpenAI's GPT architecture for image captioning and other vision tasks
* @extends PreTrainedModel
*/
class VisionEncoderDecoderModel extends PreTrainedModel {
export class VisionEncoderDecoderModel extends PreTrainedModel {
/**
* Creates a new instance of the `VisionEncoderDecoderModel` class.
* @param {object} config - The configuration object specifying the hyperparameters and other model settings.
@ -1881,8 +1929,8 @@ class VisionEncoderDecoderModel extends PreTrainedModel {
//////////////////////////////////////////////////
// CLIP models
class CLIPPreTrainedModel extends PreTrainedModel { }
class CLIPModel extends CLIPPreTrainedModel {
export class CLIPPreTrainedModel extends PreTrainedModel { }
export class CLIPModel extends CLIPPreTrainedModel {
}
@ -1890,12 +1938,12 @@ class CLIPModel extends CLIPPreTrainedModel {
//////////////////////////////////////////////////
// GPT2 models
class GPT2PreTrainedModel extends PreTrainedModel { }
export class GPT2PreTrainedModel extends PreTrainedModel { }
/**
* GPT2Model is not compatible with `.generate()`, as it doesn't have a language model head.
* @extends GPT2PreTrainedModel
*/
class GPT2Model extends GPT2PreTrainedModel {
export class GPT2Model extends GPT2PreTrainedModel {
/**
*
* @param {...any} args
@ -1913,7 +1961,7 @@ class GPT2Model extends GPT2PreTrainedModel {
* GPT-2 language model head on top of the GPT-2 base model. This model is suitable for text generation tasks.
* @extends GPT2PreTrainedModel
*/
class GPT2LMHeadModel extends GPT2PreTrainedModel {
export class GPT2LMHeadModel extends GPT2PreTrainedModel {
/**
* Creates a new instance of the `GPT2LMHeadModel` class.
* @param {object} config - The configuration of the model.
@ -1969,12 +2017,12 @@ class GPT2LMHeadModel extends GPT2PreTrainedModel {
}
}
// class GPT2ForSequenceClassification extends GPT2PreTrainedModel {
// export class GPT2ForSequenceClassification extends GPT2PreTrainedModel {
// TODO
// }
//////////////////////////////////////////////////
class GPTNeoPreTrainedModel extends PreTrainedModel { }
class GPTNeoModel extends GPTNeoPreTrainedModel {
export class GPTNeoPreTrainedModel extends PreTrainedModel { }
export class GPTNeoModel extends GPTNeoPreTrainedModel {
/**
*
* @param {...any} args
@ -1988,7 +2036,7 @@ class GPTNeoModel extends GPTNeoPreTrainedModel {
}
}
class GPTNeoForCausalLM extends GPTNeoPreTrainedModel {
export class GPTNeoForCausalLM extends GPTNeoPreTrainedModel {
/**
* Creates a new instance of the `GPTNeoForCausalLM` class.
* @param {object} config - The configuration of the model.
@ -2046,13 +2094,13 @@ class GPTNeoForCausalLM extends GPTNeoPreTrainedModel {
//////////////////////////////////////////////////
// CodeGen models
class CodeGenPreTrainedModel extends PreTrainedModel { }
export class CodeGenPreTrainedModel extends PreTrainedModel { }
/**
* CodeGenModel is a class representing a code generation model without a language model head.
*
* @extends CodeGenPreTrainedModel
*/
class CodeGenModel extends CodeGenPreTrainedModel {
export class CodeGenModel extends CodeGenPreTrainedModel {
/**
* Throws an error indicating that the current model class is not compatible with `.generate()`,
* as it doesn't have a language model head.
@ -2073,7 +2121,7 @@ class CodeGenModel extends CodeGenPreTrainedModel {
* CodeGenForCausalLM is a class that represents a code generation model based on the GPT-2 architecture. It extends the `CodeGenPreTrainedModel` class.
* @extends CodeGenPreTrainedModel
*/
class CodeGenForCausalLM extends CodeGenPreTrainedModel {
export class CodeGenForCausalLM extends CodeGenPreTrainedModel {
/**
* Creates a new instance of the `CodeGenForCausalLM` class.
* @param {object} config The model configuration object.
@ -2132,8 +2180,8 @@ class CodeGenForCausalLM extends CodeGenPreTrainedModel {
//////////////////////////////////////////////////
//////////////////////////////////////////////////
class ViTPreTrainedModel extends PreTrainedModel { }
class ViTForImageClassification extends ViTPreTrainedModel {
export class ViTPreTrainedModel extends PreTrainedModel { }
export class ViTForImageClassification extends ViTPreTrainedModel {
/**
* @param {any} model_inputs
*/
@ -2145,8 +2193,8 @@ class ViTForImageClassification extends ViTPreTrainedModel {
//////////////////////////////////////////////////
//////////////////////////////////////////////////
class DetrPreTrainedModel extends PreTrainedModel { }
class DetrForObjectDetection extends DetrPreTrainedModel {
export class DetrPreTrainedModel extends PreTrainedModel { }
export class DetrForObjectDetection extends DetrPreTrainedModel {
/**
* @param {any} model_inputs
*/
@ -2156,7 +2204,7 @@ class DetrForObjectDetection extends DetrPreTrainedModel {
}
}
class DetrForSegmentation extends DetrPreTrainedModel {
export class DetrForSegmentation extends DetrPreTrainedModel {
/**
* Runs the model with the provided inputs
* @param {Object} model_inputs - Model inputs
@ -2168,7 +2216,7 @@ class DetrForSegmentation extends DetrPreTrainedModel {
}
}
class DetrObjectDetectionOutput extends ModelOutput {
export class DetrObjectDetectionOutput extends ModelOutput {
/**
* @param {any} logits
* @param {any} pred_boxes
@ -2180,7 +2228,7 @@ class DetrObjectDetectionOutput extends ModelOutput {
}
}
class DetrSegmentationOutput extends ModelOutput {
export class DetrSegmentationOutput extends ModelOutput {
/**
* @param {Tensor} logits - The output logits of the model.
@ -2199,9 +2247,9 @@ class DetrSegmentationOutput extends ModelOutput {
//////////////////////////////////////////////////
// MarianMT models
class MarianPreTrainedModel extends PreTrainedModel { };
export class MarianPreTrainedModel extends PreTrainedModel { };
class MarianModel extends MarianPreTrainedModel {
export class MarianModel extends MarianPreTrainedModel {
/**
*
* @param {...any} args
@ -2215,7 +2263,7 @@ class MarianModel extends MarianPreTrainedModel {
}
}
class MarianMTModel extends MarianPreTrainedModel {
export class MarianMTModel extends MarianPreTrainedModel {
/**
* Creates a new instance of the `MarianMTModel` class.
* @param {object} config The model configuration object.
@ -2304,9 +2352,9 @@ class MarianMTModel extends MarianPreTrainedModel {
//////////////////////////////////////////////////
// M2M100 models
class M2M100PreTrainedModel extends PreTrainedModel { };
export class M2M100PreTrainedModel extends PreTrainedModel { };
class M2M100Model extends M2M100PreTrainedModel {
export class M2M100Model extends M2M100PreTrainedModel {
/**
*
* @param {...any} args
@ -2320,7 +2368,7 @@ class M2M100Model extends M2M100PreTrainedModel {
}
}
class M2M100ForConditionalGeneration extends M2M100PreTrainedModel {
export class M2M100ForConditionalGeneration extends M2M100PreTrainedModel {
/**
* Creates a new instance of the `M2M100ForConditionalGeneration` class.
* @param {object} config The model configuration object.
@ -2416,7 +2464,7 @@ class M2M100ForConditionalGeneration extends M2M100PreTrainedModel {
* Base class of all AutoModels. Contains the `from_pretrained` function
* which is used to instantiate pretrained models.
*/
class PretrainedMixin {
export class PretrainedMixin {
/**
* Mapping from model type to model class.
*/
@ -2508,6 +2556,7 @@ export class AutoModel extends PretrainedMixin {
'mobilebert': MobileBertModel,
'squeezebert': SqueezeBertModel,
'marian': MarianModel,
'm2m_100': M2M100Model,
}
}
@ -2678,7 +2727,7 @@ export class AutoModelForObjectDetection extends PretrainedMixin {
//////////////////////////////////////////////////
//////////////////////////////////////////////////
class Seq2SeqLMOutput extends ModelOutput {
export class Seq2SeqLMOutput extends ModelOutput {
/**
* @param {Tensor} logits - The output logits of the model.
* @param {Array} past_key_values - An array of key/value pairs that represent the previous state of the model.
@ -2692,7 +2741,7 @@ class Seq2SeqLMOutput extends ModelOutput {
}
}
class SequenceClassifierOutput extends ModelOutput {
export class SequenceClassifierOutput extends ModelOutput {
/**
* @param {Tensor} logits
*/
@ -2702,7 +2751,7 @@ class SequenceClassifierOutput extends ModelOutput {
}
}
class TokenClassifierOutput extends ModelOutput {
export class TokenClassifierOutput extends ModelOutput {
/**
* @param {Tensor} logits
*/
@ -2713,7 +2762,7 @@ class TokenClassifierOutput extends ModelOutput {
}
class MaskedLMOutput extends ModelOutput {
export class MaskedLMOutput extends ModelOutput {
/**
* @param {Tensor} logits
*/
@ -2723,7 +2772,7 @@ class MaskedLMOutput extends ModelOutput {
}
}
class QuestionAnsweringModelOutput extends ModelOutput {
export class QuestionAnsweringModelOutput extends ModelOutput {
/**
* @param {Float32Array} start_logits - The logits for start positions of the answer.
* @param {Float32Array} end_logits - The logits for end positions of the answer.

View File

@ -1,3 +1,18 @@
/**
* @file Pipelines provide a high-level, easy to use, API for running machine learning models.
*
* **Example:** Instantiate pipeline using the `pipeline` function.
* ```javascript
* import { pipeline } from '@xenova/transformers';
*
* let pipeline = await pipeline('sentiment-analysis');
* let result = await pipeline('I love transformers!');
* // [{'label': 'POSITIVE', 'score': 0.999817686}]
* ```
*
* @module pipelines
*/
import {
Callable,
isString,
@ -10,10 +25,11 @@ import {
dot
} from './math_utils.js';
import {
getFile,
} from './utils/hub.js';
read_audio
} from './audio_utils.js';
import {
AutoTokenizer
AutoTokenizer,
PreTrainedTokenizer,
} from './tokenizers.js';
import {
AutoModel,
@ -26,7 +42,8 @@ import {
AutoModelForVision2Seq,
AutoModelForImageClassification,
AutoModelForImageSegmentation,
AutoModelForObjectDetection
AutoModelForObjectDetection,
PreTrainedModel,
} from './models.js';
import {
AutoProcessor,
@ -40,7 +57,7 @@ import { CustomImage } from './image_utils.js';
* Prepare images for further tasks.
* @param {any[]} images - images to prepare.
* @returns {Promise<any[]>} - returns processed images.
* @async
* @private
*/
async function prepareImages(images) {
if (!Array.isArray(images)) {
@ -56,12 +73,12 @@ async function prepareImages(images) {
* Pipeline class for executing a natural language processing task.
* @extends Callable
*/
class Pipeline extends Callable {
export class Pipeline extends Callable {
/**
* Creates a new instance of Pipeline.
* @param {string} task - The natural language processing task to be performed.
* @param {object} tokenizer - The tokenizer object to be used for tokenizing input texts.
* @param {object} model - The model object to be used for processing input texts.
* @param {PreTrainedTokenizer} tokenizer - The tokenizer object to be used for tokenizing input texts.
* @param {PreTrainedModel} model - The model object to be used for processing input texts.
*/
constructor(task, tokenizer, model) {
super();
@ -101,7 +118,7 @@ class Pipeline extends Callable {
* TextClassificationPipeline class for executing a text classification task.
* @extends Pipeline
*/
class TextClassificationPipeline extends Pipeline {
export class TextClassificationPipeline extends Pipeline {
/**
* Executes the text classification task.
* @param {any} texts - The input texts to be classified.
@ -142,7 +159,7 @@ class TextClassificationPipeline extends Pipeline {
* TokenClassificationPipeline class for executing a token classification task.
* @extends Pipeline
*/
class TokenClassificationPipeline extends Pipeline {
export class TokenClassificationPipeline extends Pipeline {
/**
* Executes the token classification task.
* @param {any} texts - The input texts to be classified.
@ -211,7 +228,7 @@ class TokenClassificationPipeline extends Pipeline {
* QuestionAnsweringPipeline class for executing a question answering task.
* @extends Pipeline
*/
class QuestionAnsweringPipeline extends Pipeline {
export class QuestionAnsweringPipeline extends Pipeline {
/**
* Executes the question answering task.
* @param {string|string[]} question - The question(s) to be answered.
@ -275,7 +292,7 @@ class QuestionAnsweringPipeline extends Pipeline {
* Class representing a fill-mask pipeline for natural language processing.
* @extends Pipeline
*/
class FillMaskPipeline extends Pipeline {
export class FillMaskPipeline extends Pipeline {
/**
* @param {any} texts
*/
@ -328,12 +345,11 @@ class FillMaskPipeline extends Pipeline {
* Text2TextGenerationPipeline class for generating text using a model that performs text-to-text generation tasks.
* @extends Pipeline
*/
class Text2TextGenerationPipeline extends Pipeline {
export class Text2TextGenerationPipeline extends Pipeline {
_key = null;
/**
* Fill the masked token in the text(s) given as inputs.
* @async
* @param {string|string[]} texts - The text or array of texts to be processed.
* @param {Object} [options={}] - Options for the fill-mask pipeline.
* @param {number} [options.topk=5] - The number of top-k predictions to return.
@ -399,7 +415,7 @@ class Text2TextGenerationPipeline extends Pipeline {
* A pipeline for summarization tasks, inheriting from Text2TextGenerationPipeline.
* @extends Text2TextGenerationPipeline
*/
class SummarizationPipeline extends Text2TextGenerationPipeline {
export class SummarizationPipeline extends Text2TextGenerationPipeline {
_key = 'summary_text';
}
@ -407,7 +423,7 @@ class SummarizationPipeline extends Text2TextGenerationPipeline {
* TranslationPipeline class to translate text from one language to another using the provided model and tokenizer.
* @extends Text2TextGenerationPipeline
*/
class TranslationPipeline extends Text2TextGenerationPipeline {
export class TranslationPipeline extends Text2TextGenerationPipeline {
_key = 'translation_text';
}
@ -415,10 +431,9 @@ class TranslationPipeline extends Text2TextGenerationPipeline {
* A pipeline for generating text based on an input prompt.
* @extends Pipeline
*/
class TextGenerationPipeline extends Pipeline {
export class TextGenerationPipeline extends Pipeline {
/**
* Generates text based on an input prompt.
* @async
* @param {any} texts - The input prompt or prompts to generate text from.
* @param {object} [generate_kwargs={}] - Additional arguments for text generation.
* @returns {Promise<any>} - The generated text or texts.
@ -466,12 +481,12 @@ class TextGenerationPipeline extends Pipeline {
* Class representing an Zero Shot Classification Pipeline that should only be used with zero shot classification tasks.
* @extends Pipeline
*/
class ZeroShotClassificationPipeline extends Pipeline {
export class ZeroShotClassificationPipeline extends Pipeline {
/**
* @param {string} task
* @param {any} tokenizer
* @param {any} model
* @param {PreTrainedTokenizer} tokenizer
* @param {PreTrainedModel} model
*/
constructor(task, tokenizer, model) {
super(task, tokenizer, model);
@ -577,7 +592,7 @@ class ZeroShotClassificationPipeline extends Pipeline {
*
* @todo Make sure this works for other models than `sentence-transformers`.
*/
class FeatureExtractionPipeline extends Pipeline {
export class FeatureExtractionPipeline extends Pipeline {
/**
* Private method to perform mean pooling of the last hidden state followed by a normalization step.
* @param {Tensor} last_hidden_state - Tensor of shape [batchSize, seqLength, embedDim]
@ -669,14 +684,14 @@ class FeatureExtractionPipeline extends Pipeline {
* A class representing an automatic speech recognition pipeline.
* @extends Pipeline
*/
class AutomaticSpeechRecognitionPipeline extends Pipeline {
export class AutomaticSpeechRecognitionPipeline extends Pipeline {
/**
* Creates an instance of AutomaticSpeechRecognitionPipeline.
* @param {string} task - The type of the task for this pipeline. Currently only "asr" is supported.
* @param {object} tokenizer - The tokenizer to be used for pre-processing inputs.
* @param {object} model - The model to be used for the task.
* @param {object} processor - The processor to be used for pre-processing audio inputs.
* @param {string} task - The type of the task for this pipeline.
* @param {PreTrainedTokenizer} tokenizer - The tokenizer to be used for pre-processing inputs.
* @param {PreTrainedModel} model - The model to be used for the task.
* @param {Processor} processor - The processor to be used for pre-processing audio inputs.
*/
constructor(task, tokenizer, model, processor) {
super(task, tokenizer, model);
@ -692,52 +707,7 @@ class AutomaticSpeechRecognitionPipeline extends Pipeline {
*/
async _preprocess(audio, sampling_rate) {
if (isString(audio)) {
// Attempting to load from path
if (typeof AudioContext === 'undefined') {
// Running in node or an environment without AudioContext
throw Error(
"Unable to load audio from path/URL since `AudioContext` is not available in your environment. " +
"As a result, audio data must be passed directly to the processor. " +
"If you are running in node.js, you can use an external library (e.g., https://github.com/audiojs/web-audio-api) to do this."
)
}
const response = await (await getFile(audio)).arrayBuffer();
const audioCTX = new AudioContext({ sampleRate: sampling_rate });
const decoded = await audioCTX.decodeAudioData(response);
// We now replicate HuggingFace's `ffmpeg_read` method:
if (decoded.numberOfChannels === 2) {
// When downmixing a stereo audio file to mono using the -ac 1 option in FFmpeg,
// the audio signal is summed across both channels to create a single mono channel.
// However, if the audio is at full scale (i.e. the highest possible volume level),
// the summing of the two channels can cause the audio signal to clip or distort.
// To prevent this clipping, FFmpeg applies a scaling factor of 1/sqrt(2) (~ 0.707)
// to the audio signal before summing the two channels. This scaling factor ensures
// that the combined audio signal will not exceed the maximum possible level, even
// if both channels are at full scale.
// After applying this scaling factor, the audio signal from both channels is summed
// to create a single mono channel. It's worth noting that this scaling factor is
// only applied when downmixing stereo audio to mono using the -ac 1 option in FFmpeg.
// If you're using a different downmixing method, or if you're not downmixing the
// audio at all, this scaling factor may not be needed.
const SCALING_FACTOR = Math.sqrt(2);
let left = decoded.getChannelData(0);
let right = decoded.getChannelData(1);
audio = new Float32Array(left.length);
for (let i = 0; i < decoded.length; i++) {
audio[i] = SCALING_FACTOR * (left[i] + right[i]) / 2;
}
} else {
// If the audio is not stereo, we can just use the first channel:
audio = decoded.getChannelData(0);
}
audio = await read_audio(audio, sampling_rate);
}
return audio;
@ -854,13 +824,13 @@ class AutomaticSpeechRecognitionPipeline extends Pipeline {
* A pipeline for performing image-to-text tasks.
* @extends Pipeline
*/
class ImageToTextPipeline extends Pipeline {
export class ImageToTextPipeline extends Pipeline {
/**
* Create an instance of ImageToTextPipeline.
* @param {string} task - The task name.
* @param {object} tokenizer - The tokenizer to use.
* @param {object} model - The generator model to use.
* @param {object} processor - The image processor to use.
* @param {PreTrainedTokenizer} tokenizer - The tokenizer to use.
* @param {PreTrainedModel} model - The generator model to use.
* @param {Processor} processor - The image processor to use.
*/
constructor(task, tokenizer, model, processor) {
super(task, tokenizer, model);
@ -897,12 +867,12 @@ class ImageToTextPipeline extends Pipeline {
* A class representing an image classification pipeline.
* @extends Pipeline
*/
class ImageClassificationPipeline extends Pipeline {
export class ImageClassificationPipeline extends Pipeline {
/**
* Create a new ImageClassificationPipeline.
* @param {string} task - The task of the pipeline.
* @param {Object} model - The model to use for classification.
* @param {Function} processor - The function to preprocess images.
* @param {PreTrainedModel} model - The model to use for classification.
* @param {Processor} processor - The function to preprocess images.
*/
constructor(task, model, processor) {
super(task, null, model); // TODO tokenizer
@ -911,7 +881,6 @@ class ImageClassificationPipeline extends Pipeline {
/**
* Classify the given images.
* @async
* @param {any} images - The images to classify.
* @param {Object} options - The options to use for classification.
* @param {number} [options.topk=1] - The number of top results to return.
@ -953,11 +922,11 @@ class ImageClassificationPipeline extends Pipeline {
* ImageSegmentationPipeline class for executing an image-segmentation task.
* @extends Pipeline
*/
class ImageSegmentationPipeline extends Pipeline {
export class ImageSegmentationPipeline extends Pipeline {
/**
* Create a new ImageSegmentationPipeline.
* @param {string} task - The task of the pipeline.
* @param {Object} model - The model to use for classification.
* @param {PreTrainedTokenizer} task - The task of the pipeline.
* @param {PreTrainedModel} model - The model to use for classification.
* @param {Processor} processor - The function to preprocess images.
*/
constructor(task, model, processor) {
@ -1067,14 +1036,14 @@ class ImageSegmentationPipeline extends Pipeline {
* Class representing a zero-shot image classification pipeline.
* @extends Pipeline
*/
class ZeroShotImageClassificationPipeline extends Pipeline {
export class ZeroShotImageClassificationPipeline extends Pipeline {
/**
* Create a zero-shot image classification pipeline.
* @param {string} task - The task of the pipeline.
* @param {Object} tokenizer - The tokenizer to use.
* @param {Object} model - The model to use.
* @param {Function} processor - The image processing function.
* @param {PreTrainedTokenizer} tokenizer - The tokenizer to use.
* @param {PreTrainedModel} model - The model to use.
* @param {Processor} processor - The image processing function.
*/
constructor(task, tokenizer, model, processor) {
super(task, tokenizer, model);
@ -1129,11 +1098,11 @@ class ZeroShotImageClassificationPipeline extends Pipeline {
}
class ObjectDetectionPipeline extends Pipeline {
export class ObjectDetectionPipeline extends Pipeline {
/**
* @param {string} task
* @param {any} model
* @param {any} processor
* @param {PreTrainedTokenizer} task
* @param {PreTrainedModel} model
* @param {Processor} processor
*/
constructor(task, model, processor) {
super(task, null, model); // TODO tokenizer
@ -1379,11 +1348,9 @@ const TASK_ALIASES = {
*/
/**
* Constructs a pipeline for a specified task with optional model and progress callback.
* Utility factory method to build a [`Pipeline`] object.
*
* @async
* @function
* @param {string} task - The task to perform, e.g. "text-generation".
* @param {string} task - The task defining which pipeline will be returned.
* @param {string} [model=null] - The name of the pre-trained model to use. If not specified, the default model for the task will be used.
* @param {PretrainedOptions} [options] - Optional parameters for the pipeline.
* @returns {Promise<Pipeline>} A Pipeline object for the specified task.
@ -1461,6 +1428,7 @@ export async function pipeline(
* Compute the Cartesian product of given arrays
* @param {...Array} a - Arrays to compute the product
* @returns {Array} - Returns the computed Cartesian product as an array
* @private
*/
function product(...a) {
// Cartesian product of items

View File

@ -1,4 +1,24 @@
/**
* @file Processors are used to prepare non-textual inputs (e.g., image or audio) for a model.
*
* **Example:** Using a `WhisperProcessor` to prepare an audio input for a model.
* ```javascript
* import { AutoProcessor, read_audio } from '@xenova/transformers';
*
* let processor = await AutoProcessor.from_pretrained('openai/whisper-tiny.en');
* let audio = await read_audio('https://huggingface.co/datasets/Narsil/asr_dummy/resolve/main/mlk.flac');
* let { input_features } = await processor(audio);
* // Tensor {
* // data: Float32Array(240000) [0.4752984642982483, 0.5597258806228638, 0.56434166431427, ...],
* // dims: [1, 80, 3000],
* // type: 'float32',
* // size: 240000,
* // }
* ```
*
* @module processors
*/
import {
Callable,
} from './utils.js';
@ -24,7 +44,7 @@ import { CustomImage } from './image_utils.js';
*
* @extends Callable
*/
class FeatureExtractor extends Callable {
export class FeatureExtractor extends Callable {
/**
* Constructs a new FeatureExtractor instance.
*
@ -41,7 +61,7 @@ class FeatureExtractor extends Callable {
*
* @extends FeatureExtractor
*/
class ImageFeatureExtractor extends FeatureExtractor {
export class ImageFeatureExtractor extends FeatureExtractor {
/**
* Constructs a new ViTFeatureExtractor instance.
@ -221,14 +241,14 @@ class ImageFeatureExtractor extends FeatureExtractor {
}
class ViTFeatureExtractor extends ImageFeatureExtractor { }
export class ViTFeatureExtractor extends ImageFeatureExtractor { }
/**
* Detr Feature Extractor.
*
* @extends ImageFeatureExtractor
*/
class DetrFeatureExtractor extends ImageFeatureExtractor {
export class DetrFeatureExtractor extends ImageFeatureExtractor {
/**
* Calls the feature extraction process on an array of image
* URLs, preprocesses each image, and concatenates the resulting
@ -605,7 +625,7 @@ class DetrFeatureExtractor extends ImageFeatureExtractor {
}
class WhisperFeatureExtractor extends FeatureExtractor {
export class WhisperFeatureExtractor extends FeatureExtractor {
/**
* Calculates the index offset for a given index and window size.
@ -921,7 +941,6 @@ class WhisperFeatureExtractor extends FeatureExtractor {
* Asynchronously extracts features from a given audio using the provided configuration.
* @param {Float32Array} audio - The audio data as a Float32Array.
* @returns {Promise<{ input_features: Tensor }>} - A Promise resolving to an object containing the extracted input features as a Tensor.
* @async
*/
async _call(audio) {
// audio is a float32array
@ -933,7 +952,7 @@ class WhisperFeatureExtractor extends FeatureExtractor {
"remember to specify `chunk_length_s` and/or `stride_length_s`."
);
}
let waveform = audio.slice(0, this.config.n_samples)
let waveform = audio.slice(0, this.config.n_samples);
let features = this._extract_fbank_features(waveform);
@ -965,7 +984,6 @@ export class Processor extends Callable {
* Calls the feature_extractor function with the given input.
* @param {any} input - The input to extract features from.
* @returns {Promise<any>} A Promise that resolves with the extracted features.
* @async
*/
async _call(input) {
return await this.feature_extractor(input);
@ -976,12 +994,11 @@ export class Processor extends Callable {
* Represents a WhisperProcessor that extracts features from an audio input.
* @extends Processor
*/
class WhisperProcessor extends Processor {
export class WhisperProcessor extends Processor {
/**
* Calls the feature_extractor function with the given audio input.
* @param {any} audio - The audio input to extract features from.
* @returns {Promise<any>} A Promise that resolves with the extracted features.
* @async
*/
async _call(audio) {
return await this.feature_extractor(audio)

View File

@ -1,3 +1,12 @@
/**
* @file Helper module sampling from a model that can generate (i.e., has `.generate()`).
*
* These classes are only used internally, meaning an end-user
* shouldn't need to access anything here.
*
* @module samplers
*/
import {
Callable,
} from "./utils.js";

View File

@ -1,7 +1,16 @@
/**
* @file Helper module for `Tensor` processing.
*
* These functions and classes are only used internally,
* meaning an end-user shouldn't need to access anything here.
*
* @module tensor_utils
*/
import { ONNX } from './backends/onnx.js';
import {
interpolate as interpolate_data,
interpolate_data,
transpose_data
} from './math_utils.js';
@ -12,7 +21,6 @@ import {
const ONNXTensor = ONNX.Tensor;
// TODO: fix error below
export class Tensor extends ONNXTensor {
/**
* Create a new Tensor or copy an existing Tensor.

View File

@ -1,3 +1,25 @@
/**
* @file Tokenizers are used to prepare textual inputs for a model.
*
* **Example:** Create an `AutoTokenizer` and use it to tokenize a sentence.
* This will automatically detect the tokenizer type based on the tokenizer class defined in `tokenizer.json`.
* ```javascript
* import { AutoTokenizer } from '@xenova/transformers';
*
* let tokenizer = await AutoTokenizer.from_pretrained('bert-base-uncased');
* let { input_ids } = await tokenizer('I love transformers!');
* // Tensor {
* // data: BigInt64Array(6) [101n, 1045n, 2293n, 19081n, 999n, 102n],
* // dims: [1, 6],
* // type: 'int64',
* // size: 6,
* // }
* ```
*
* @module tokenizers
*/
import {
Callable,
reverseDictionary,
@ -54,7 +76,7 @@ function createPattern(pattern) {
*
* @extends Callable
*/
class TokenizerModel extends Callable {
export class TokenizerModel extends Callable {
/**
* Creates a new instance of TokenizerModel.
* @param {object} config - The configuration object for the TokenizerModel.
@ -1474,7 +1496,7 @@ class WhitespaceSplit extends PreTokenizer {
}
}
class PreTrainedTokenizer extends Callable {
export class PreTrainedTokenizer extends Callable {
/**
* Create a new PreTrainedTokenizer instance.
* @param {Object} tokenizerJSON - The JSON of the tokenizer.
@ -1996,9 +2018,9 @@ export class GPT2Tokenizer extends PreTrainedTokenizer { }
export class BartTokenizer extends PreTrainedTokenizer { }
export class RobertaTokenizer extends PreTrainedTokenizer { }
class BloomTokenizer extends PreTrainedTokenizer { }
export class BloomTokenizer extends PreTrainedTokenizer { }
class NllbTokenizer extends PreTrainedTokenizer {
export class NllbTokenizer extends PreTrainedTokenizer {
constructor(tokenizerJSON, tokenizerConfig) {
super(tokenizerJSON, tokenizerConfig);

View File

@ -1,41 +1,25 @@
// Tokenizers
export {
AutoTokenizer,
BertTokenizer,
DistilBertTokenizer,
T5Tokenizer,
GPT2Tokenizer
} from './tokenizers.js';
/**
* @file Entry point for the Transformers.js library. Only the exports from this file
* are available to the end user, and are grouped as follows:
*
* 1. Pipelines
* 2. Environment variables
* 3. Models
* 4. Tokenizers
* 5. Processors
*
* @todo Add a link to the documentation for each export.
*
* @module transformers
*/
// Models
export {
// Auto classes
AutoModel,
AutoModelForSequenceClassification,
AutoModelForTokenClassification,
AutoModelForSeq2SeqLM,
AutoModelForCausalLM,
AutoModelForMaskedLM,
AutoModelForQuestionAnswering,
AutoModelForVision2Seq,
AutoModelForImageClassification,
AutoModelForObjectDetection,
// Other
PreTrainedModel,
} from './models.js';
export * from './pipelines.js';
export * from './env.js';
export * from './models.js';
export * from './tokenizers.js';
export * from './processors.js';
// Processors
export {
AutoProcessor
} from './processors.js';
// environment variables
export { env } from './env.js';
// other
export {
pipeline
} from './pipelines.js';
export { Tensor } from './tensor_utils.js';
export * from './audio_utils.js';
export * from './tensor_utils.js';
export * from './math_utils.js';

View File

@ -1,4 +1,12 @@
/**
* @file Utility functions/classes for Transformers.js.
*
* These are only used internally, meaning an end-user shouldn't
* need to access anything here.
*
* @module utils
*/
/**
* Helper function to dispatch progress callbacks.
@ -6,6 +14,7 @@
* @param {function} progress_callback - The progress callback function to dispatch.
* @param {any} data - The data to pass to the progress callback function.
* @returns {void}
* @private
*/
export function dispatchCallback(progress_callback, data) {
if (progress_callback !== null) progress_callback(data);
@ -77,6 +86,19 @@ export function isString(text) {
return typeof text === 'string' || text instanceof String
}
/**
* Check if a value is a typed array.
* @param {*} text - The value to check.
* @returns {boolean} - True if the value is a `TypedArray`, false otherwise.
*
* Adapted from https://stackoverflow.com/a/71091338/13989043
*/
export function isTypedArray(val) {
return val?.prototype?.__proto__?.constructor?.name === 'TypedArray';
}
/**
* Check if a value is an integer.
* @param {*} x - The value to check.

View File

@ -1,6 +1,10 @@
// Utility functions to interact with the Hugging Face Hub (https://huggingface.co/models)
// const path = require('file-system-cache');
/**
* @file Utility functions to interact with the Hugging Face Hub (https://huggingface.co/models)
*
* @module utils/hub
*/
import { env } from '../env.js';
import fs from 'fs';