diff --git a/docs/source/en/_toctree.yml b/docs/source/en/_toctree.yml
index a9113c5e74..30bb7fca76 100644
--- a/docs/source/en/_toctree.yml
+++ b/docs/source/en/_toctree.yml
@@ -93,6 +93,8 @@
title: Run training on Amazon SageMaker
- local: serialization
title: Export to ONNX
+ - local: tflite
+ title: Export to TFLite
- local: torchscript
title: Export to TorchScript
- local: benchmarks
diff --git a/docs/source/en/serialization.mdx b/docs/source/en/serialization.mdx
index cc429dea08..022cf460f8 100644
--- a/docs/source/en/serialization.mdx
+++ b/docs/source/en/serialization.mdx
@@ -12,13 +12,20 @@ specific language governing permissions and limitations under the License.
# Export to ONNX
-If you need to deploy 🤗 Transformers models in production environments, we recommend
-exporting them to a serialized format that can be loaded and executed on specialized
-runtimes and hardware. In this guide, we'll show you how to export 🤗 Transformers
-models to [ONNX (Open Neural Network eXchange)](http://onnx.ai).
+Deploying 🤗 Transformers models in production environments often requires, or can benefit from exporting the models into
+a serialized format that can be loaded and executed on specialized runtimes and hardware.
-ONNX is an open standard that defines a common set of operators and a common file format
-to represent deep learning models in a wide variety of frameworks, including PyTorch and
+🤗 Optimum is an extension of Transformers that enables exporting models from PyTorch or TensorFlow to serialized formats
+such as ONNX and TFLite through its `exporters` module. 🤗 Optimum also provides a set of performance optimization tools to train
+and run models on targeted hardware with maximum efficiency.
+
+This guide demonstrates how you can export 🤗 Transformers models to ONNX with 🤗 Optimum, for the guide on exporting models to TFLite,
+please refer to the [Export to TFLite page](tflite).
+
+## Export to ONNX
+
+[ONNX (Open Neural Network eXchange)](http://onnx.ai) is an open standard that defines a common set of operators and a
+common file format to represent deep learning models in a wide variety of frameworks, including PyTorch and
TensorFlow. When a model is exported to the ONNX format, these operators are used to
construct a computational graph (often called an _intermediate representation_) which
represents the flow of data through the neural network.
@@ -27,172 +34,141 @@ By exposing a graph with standardized operators and data types, ONNX makes it ea
switch between frameworks. For example, a model trained in PyTorch can be exported to
ONNX format and then imported in TensorFlow (and vice versa).
-🤗 Transformers provides a [`transformers.onnx`](main_classes/onnx) package that enables
-you to convert model checkpoints to an ONNX graph by leveraging configuration objects.
-These configuration objects come ready made for a number of model architectures, and are
-designed to be easily extendable to other architectures.
-
-
-
-You can also export 🤗 Transformers models with the [`optimum.exporters.onnx` package](https://huggingface.co/docs/optimum/exporters/onnx/usage_guides/export_a_model)
-from 🤗 Optimum.
-
-Once exported, a model can be:
-
-- Optimized for inference via techniques such as quantization and graph optimization.
-- Run with ONNX Runtime via [`ORTModelForXXX` classes](https://huggingface.co/docs/optimum/onnxruntime/package_reference/modeling_ort),
+Once exported to ONNX format, a model can be:
+- optimized for inference via techniques such as [graph optimization](https://huggingface.co/docs/optimum/onnxruntime/usage_guides/optimization) and [quantization](https://huggingface.co/docs/optimum/onnxruntime/usage_guides/quantization).
+- run with ONNX Runtime via [`ORTModelForXXX` classes](https://huggingface.co/docs/optimum/onnxruntime/package_reference/modeling_ort),
which follow the same `AutoModel` API as the one you are used to in 🤗 Transformers.
-- Run with [optimized inference pipelines](https://huggingface.co/docs/optimum/main/en/onnxruntime/usage_guides/pipelines),
-which has the same API as the [`pipeline`] function in 🤗 Transformers.
+- run with [optimized inference pipelines](https://huggingface.co/docs/optimum/main/en/onnxruntime/usage_guides/pipelines),
+which has the same API as the [`pipeline`] function in 🤗 Transformers.
-To explore all these features, check out the [🤗 Optimum library](https://github.com/huggingface/optimum).
+🤗 Optimum provides support for the ONNX export by leveraging configuration objects. These configuration objects come
+ready-made for a number of model architectures, and are designed to be easily extendable to other architectures.
-
+For the list of ready-made configurations, please refer to [🤗 Optimum documentation](https://huggingface.co/docs/optimum/exporters/onnx/overview).
-Ready-made configurations include the following architectures:
+There are two ways to export a 🤗 Transformers model to ONNX, here we show both:
-
+- export with 🤗 Optimum via CLI.
+- export with 🤗 Optimum with `optimum.onnxruntime`.
-- ALBERT
-- BART
-- BEiT
-- BERT
-- BigBird
-- BigBird-Pegasus
-- Blenderbot
-- BlenderbotSmall
-- BLOOM
-- CamemBERT
-- Chinese-CLIP
-- CLIP
-- CodeGen
-- Conditional DETR
-- ConvBERT
-- ConvNeXT
-- Data2VecText
-- Data2VecVision
-- DeBERTa
-- DeBERTa-v2
-- DeiT
-- DETR
-- DistilBERT
-- EfficientNet
-- ELECTRA
-- ERNIE
-- FlauBERT
-- GPT Neo
-- GPT-J
-- GPT-Sw3
-- GroupViT
-- I-BERT
-- ImageGPT
-- LayoutLM
-- LayoutLMv3
-- LeViT
-- Longformer
-- LongT5
-- M2M100
-- Marian
-- mBART
-- MEGA
-- MobileBERT
-- MobileNetV1
-- MobileNetV2
-- MobileViT
-- MT5
-- OpenAI GPT-2
-- OWL-ViT
-- Perceiver
-- PLBart
-- PoolFormer
-- RemBERT
-- ResNet
-- RoBERTa
-- RoBERTa-PreLayerNorm
-- RoFormer
-- SegFormer
-- SqueezeBERT
-- SwiftFormer
-- Swin Transformer
-- T5
-- Table Transformer
-- Vision Encoder decoder
-- ViT
-- Whisper
-- X-MOD
-- XLM
-- XLM-RoBERTa
-- XLM-RoBERTa-XL
-- YOLOS
+### Exporting a 🤗 Transformers model to ONNX with CLI
-In the next two sections, we'll show you how to:
-
-* Export a supported model using the `transformers.onnx` package.
-* Export a custom model for an unsupported architecture.
-
-## Exporting a model to ONNX
-
-
-
-The recommended way of exporting a model is now to use
-[`optimum.exporters.onnx`](https://huggingface.co/docs/optimum/main/en/exporters/onnx/usage_guides/export_a_model#exporting-a-model-to-onnx-using-the-cli),
-do not worry it is very similar to `transformers.onnx`!
-
-
-
-To export a 🤗 Transformers model to ONNX, you'll first need to install some extra
-dependencies:
+To export a 🤗 Transformers model to ONNX, first install an extra dependency:
```bash
-pip install transformers[onnx]
+pip install optimum[exporters]
```
-The `transformers.onnx` package can then be used as a Python module:
+To check out all available arguments, refer to the [🤗 Optimum docs](https://huggingface.co/docs/optimum/exporters/onnx/usage_guides/export_a_model#exporting-a-model-to-onnx-using-the-cli),
+or view help in command line:
```bash
-python -m transformers.onnx --help
-
-usage: Hugging Face Transformers ONNX exporter [-h] -m MODEL [--feature {causal-lm, ...}] [--opset OPSET] [--atol ATOL] output
-
-positional arguments:
- output Path indicating where to store generated ONNX model.
-
-optional arguments:
- -h, --help show this help message and exit
- -m MODEL, --model MODEL
- Model ID on huggingface.co or path on disk to load model from.
- --feature {causal-lm, ...}
- The type of features to export the model with.
- --opset OPSET ONNX opset version to export the model with.
- --atol ATOL Absolute difference tolerance when validating the model.
+optimum-cli export onnx --help
```
-Exporting a checkpoint using a ready-made configuration can be done as follows:
+To export a model's checkpoint from the 🤗 Hub, for example, `distilbert-base-uncased-distilled-squad`, run the following command:
```bash
-python -m transformers.onnx --model=distilbert-base-uncased onnx/
+optimum-cli export onnx --model distilbert-base-uncased-distilled-squad distilbert_base_uncased_squad_onnx/
```
-You should see the following logs:
+You should see the logs indicating progress and showing where the resulting `model.onnx` is saved, like this:
```bash
-Validating ONNX model...
- -[✓] ONNX model output names match reference model ({'last_hidden_state'})
- - Validating ONNX Model output "last_hidden_state":
- -[✓] (2, 8, 768) matches (2, 8, 768)
- -[✓] all values close (atol: 1e-05)
-All good, model saved at: onnx/model.onnx
+Validating ONNX model distilbert_base_uncased_squad_onnx/model.onnx...
+ -[✓] ONNX model output names match reference model (start_logits, end_logits)
+ - Validating ONNX Model output "start_logits":
+ -[✓] (2, 16) matches (2, 16)
+ -[✓] all values close (atol: 0.0001)
+ - Validating ONNX Model output "end_logits":
+ -[✓] (2, 16) matches (2, 16)
+ -[✓] all values close (atol: 0.0001)
+The ONNX export succeeded and the exported model was saved at: distilbert_base_uncased_squad_onnx
```
-This exports an ONNX graph of the checkpoint defined by the `--model` argument. In this
-example, it is `distilbert-base-uncased`, but it can be any checkpoint on the Hugging
-Face Hub or one that's stored locally.
+The example above illustrates exporting a checkpoint from 🤗 Hub. When exporting a local model, first make sure that you
+saved both the model's weights and tokenizer files in the same directory (`local_path`). When using CLI, pass the
+`local_path` to the `model` argument instead of the checkpoint name on 🤗 Hub and provide the `--task` argument.
+You can review the list of supported tasks in the [🤗 Optimum documentation](https://huggingface.co/docs/optimum/exporters/task_manager).
+If `task` argument is not provided, it will default to the model architecture without any task specific head.
+
+```bash
+optimum-cli export onnx --model local_path --task question-answering distilbert_base_uncased_squad_onnx/
+```
The resulting `model.onnx` file can then be run on one of the [many
accelerators](https://onnx.ai/supported-tools.html#deployModel) that support the ONNX
standard. For example, we can load and run the model with [ONNX
Runtime](https://onnxruntime.ai/) as follows:
+```python
+>>> from transformers import AutoTokenizer
+>>> from optimum.onnxruntime import ORTModelForQuestionAnswering
+
+>>> tokenizer = AutoTokenizer.from_pretrained("distilbert_base_uncased_squad_onnx")
+>>> model = ORTModelForQuestionAnswering.from_pretrained("distilbert_base_uncased_squad_onnx")
+>>> inputs = tokenizer("What am I using?", "Using DistilBERT with ONNX Runtime!", return_tensors="pt")
+>>> outputs = model(**inputs)
+```
+
+The process is identical for TensorFlow checkpoints on the Hub. For instance, here's how you would
+export a pure TensorFlow checkpoint from the [Keras organization](https://huggingface.co/keras-io):
+
+```bash
+optimum-cli export onnx --model keras-io/transformers-qa distilbert_base_cased_squad_onnx/
+```
+
+### Exporting a 🤗 Transformers model to ONNX with `optimum.onnxruntime`
+
+Alternative to CLI, you can export a 🤗 Transformers model to ONNX programmatically like so:
+
+```python
+>>> from optimum.onnxruntime import ORTModelForSequenceClassification
+>>> from transformers import AutoTokenizer
+
+>>> model_checkpoint = "distilbert_base_uncased_squad"
+>>> save_directory = "onnx/"
+
+>>> # Load a model from transformers and export it to ONNX
+>>> ort_model = ORTModelForSequenceClassification.from_pretrained(model_checkpoint, export=True)
+>>> tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
+
+>>> # Save the onnx model and tokenizer
+>>> ort_model.save_pretrained(save_directory)
+>>> tokenizer.save_pretrained(save_directory)
+```
+
+### Exporting a model for an unsupported architecture
+
+If you wish to contribute by adding support for a model that cannot be currently exported, you should first check if it is
+supported in [`optimum.exporters.onnx`](https://huggingface.co/docs/optimum/exporters/onnx/overview),
+and if it is not, [contribute to 🤗 Optimum](https://huggingface.co/docs/optimum/exporters/onnx/usage_guides/contribute)
+directly.
+
+### Exporting a model with `transformers.onnx`
+
+
+
+`tranformers.onnx` is no longer maintained, please export models with 🤗 Optimum as described above. This section will be removed in the future versions.
+
+
+
+To export a 🤗 Transformers model to ONNX with `tranformers.onnx`, install extra dependencies:
+
+```bash
+pip install transformers[onnx]
+```
+
+Use `transformers.onnx` package as a Python module to export a checkpoint using a ready-made configuration:
+
+```bash
+python -m transformers.onnx --model=distilbert-base-uncased onnx/
+```
+
+This exports an ONNX graph of the checkpoint defined by the `--model` argument. Pass any checkpoint on the 🤗 Hub or one that's stored locally.
+The resulting `model.onnx` file can then be run on one of the many accelerators that support the ONNX standard. For example,
+load and run the model with ONNX Runtime as follows:
+
```python
>>> from transformers import AutoTokenizer
>>> from onnxruntime import InferenceSession
@@ -204,8 +180,8 @@ Runtime](https://onnxruntime.ai/) as follows:
>>> outputs = session.run(output_names=["last_hidden_state"], input_feed=dict(inputs))
```
-The required output names (like `["last_hidden_state"]`) can be obtained by taking a
-look at the ONNX configuration of each model. For example, for DistilBERT we have:
+The required output names (like `["last_hidden_state"]`) can be obtained by taking a look at the ONNX configuration of
+each model. For example, for DistilBERT we have:
```python
>>> from transformers.models.distilbert import DistilBertConfig, DistilBertOnnxConfig
@@ -216,327 +192,15 @@ look at the ONNX configuration of each model. For example, for DistilBERT we hav
["last_hidden_state"]
```
-The process is identical for TensorFlow checkpoints on the Hub. For example, we can
-export a pure TensorFlow checkpoint from the [Keras
-organization](https://huggingface.co/keras-io) as follows:
+The process is identical for TensorFlow checkpoints on the Hub. For example, export a pure TensorFlow checkpoint like so:
```bash
python -m transformers.onnx --model=keras-io/transformers-qa onnx/
```
-To export a model that's stored locally, you'll need to have the model's weights and
-tokenizer files stored in a directory. For example, we can load and save a checkpoint as
-follows:
-
-
-```python
->>> from transformers import AutoTokenizer, AutoModelForSequenceClassification
-
->>> # Load tokenizer and PyTorch weights form the Hub
->>> tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")
->>> pt_model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased")
->>> # Save to disk
->>> tokenizer.save_pretrained("local-pt-checkpoint")
->>> pt_model.save_pretrained("local-pt-checkpoint")
-```
-
-Once the checkpoint is saved, we can export it to ONNX by pointing the `--model`
-argument of the `transformers.onnx` package to the desired directory:
+To export a model that's stored locally, save the model's weights and tokenizer files in the same directory (e.g. `local-pt-checkpoint`),
+then export it to ONNX by pointing the `--model` argument of the `transformers.onnx` package to the desired directory:
```bash
python -m transformers.onnx --model=local-pt-checkpoint onnx/
-```
-
-```python
->>> from transformers import AutoTokenizer, TFAutoModelForSequenceClassification
-
->>> # Load tokenizer and TensorFlow weights from the Hub
->>> tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")
->>> tf_model = TFAutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased")
->>> # Save to disk
->>> tokenizer.save_pretrained("local-tf-checkpoint")
->>> tf_model.save_pretrained("local-tf-checkpoint")
-```
-
-Once the checkpoint is saved, we can export it to ONNX by pointing the `--model`
-argument of the `transformers.onnx` package to the desired directory:
-
-```bash
-python -m transformers.onnx --model=local-tf-checkpoint onnx/
-```
-
-
-## Selecting features for different model tasks
-
-
-
-The recommended way of exporting a model is now to use `optimum.exporters.onnx`.
-You can check the [🤗 Optimum documentation](https://huggingface.co/docs/optimum/main/en/exporters/onnx/usage_guides/export_a_model#selecting-a-task)
-to learn how to select a task.
-
-
-
-Each ready-made configuration comes with a set of _features_ that enable you to export
-models for different types of tasks. As shown in the table below, each feature is
-associated with a different `AutoClass`:
-
-| Feature | Auto Class |
-| ------------------------------------ | ------------------------------------ |
-| `causal-lm`, `causal-lm-with-past` | `AutoModelForCausalLM` |
-| `default`, `default-with-past` | `AutoModel` |
-| `masked-lm` | `AutoModelForMaskedLM` |
-| `question-answering` | `AutoModelForQuestionAnswering` |
-| `seq2seq-lm`, `seq2seq-lm-with-past` | `AutoModelForSeq2SeqLM` |
-| `sequence-classification` | `AutoModelForSequenceClassification` |
-| `token-classification` | `AutoModelForTokenClassification` |
-
-For each configuration, you can find the list of supported features via the
-[`~transformers.onnx.FeaturesManager`]. For example, for DistilBERT we have:
-
-```python
->>> from transformers.onnx.features import FeaturesManager
-
->>> distilbert_features = list(FeaturesManager.get_supported_features_for_model_type("distilbert").keys())
->>> print(distilbert_features)
-["default", "masked-lm", "causal-lm", "sequence-classification", "token-classification", "question-answering"]
-```
-
-You can then pass one of these features to the `--feature` argument in the
-`transformers.onnx` package. For example, to export a text-classification model we can
-pick a fine-tuned model from the Hub and run:
-
-```bash
-python -m transformers.onnx --model=distilbert-base-uncased-finetuned-sst-2-english \
- --feature=sequence-classification onnx/
-```
-
-This displays the following logs:
-
-```bash
-Validating ONNX model...
- -[✓] ONNX model output names match reference model ({'logits'})
- - Validating ONNX Model output "logits":
- -[✓] (2, 2) matches (2, 2)
- -[✓] all values close (atol: 1e-05)
-All good, model saved at: onnx/model.onnx
-```
-
-Notice that in this case, the output names from the fine-tuned model are `logits`
-instead of the `last_hidden_state` we saw with the `distilbert-base-uncased` checkpoint
-earlier. This is expected since the fine-tuned model has a sequence classification head.
-
-
-
-The features that have a `with-past` suffix (like `causal-lm-with-past`) correspond to
-model classes with precomputed hidden states (key and values in the attention blocks)
-that can be used for fast autoregressive decoding.
-
-
-
-
-
-For `VisionEncoderDecoder` type models, the encoder and decoder parts are
-exported separately as two ONNX files named `encoder_model.onnx` and `decoder_model.onnx` respectively.
-
-
-
-
-## Exporting a model for an unsupported architecture
-
-
-
-If you wish to contribute by adding support for a model that cannot be currently exported, you should first check if it is
-supported in [`optimum.exporters.onnx`](https://huggingface.co/docs/optimum/main/en/exporters/onnx/package_reference/configuration#supported-architectures),
-and if it is not, [contribute to 🤗 Optimum](https://huggingface.co/docs/optimum/main/en/exporters/onnx/usage_guides/contribute)
-directly.
-
-
-
-If you wish to export a model whose architecture is not natively supported by the
-library, there are three main steps to follow:
-
-1. Implement a custom ONNX configuration.
-2. Export the model to ONNX.
-3. Validate the outputs of the PyTorch and exported models.
-
-In this section, we'll look at how DistilBERT was implemented to show what's involved
-with each step.
-
-### Implementing a custom ONNX configuration
-
-Let's start with the ONNX configuration object. We provide three abstract classes that
-you should inherit from, depending on the type of model architecture you wish to export:
-
-* Encoder-based models inherit from [`~onnx.config.OnnxConfig`]
-* Decoder-based models inherit from [`~onnx.config.OnnxConfigWithPast`]
-* Encoder-decoder models inherit from [`~onnx.config.OnnxSeq2SeqConfigWithPast`]
-
-
-
-A good way to implement a custom ONNX configuration is to look at the existing
-implementation in the `configuration_.py` file of a similar architecture.
-
-
-
-Since DistilBERT is an encoder-based model, its configuration inherits from
-`OnnxConfig`:
-
-```python
->>> from typing import Mapping, OrderedDict
->>> from transformers.onnx import OnnxConfig
-
-
->>> class DistilBertOnnxConfig(OnnxConfig):
-... @property
-... def inputs(self) -> Mapping[str, Mapping[int, str]]:
-... return OrderedDict(
-... [
-... ("input_ids", {0: "batch", 1: "sequence"}),
-... ("attention_mask", {0: "batch", 1: "sequence"}),
-... ]
-... )
-```
-
-Every configuration object must implement the `inputs` property and return a mapping,
-where each key corresponds to an expected input, and each value indicates the axis of
-that input. For DistilBERT, we can see that two inputs are required: `input_ids` and
-`attention_mask`. These inputs have the same shape of `(batch_size, sequence_length)`
-which is why we see the same axes used in the configuration.
-
-
-
-Notice that `inputs` property for `DistilBertOnnxConfig` returns an `OrderedDict`. This
-ensures that the inputs are matched with their relative position within the
-`PreTrainedModel.forward()` method when tracing the graph. We recommend using an
-`OrderedDict` for the `inputs` and `outputs` properties when implementing custom ONNX
-configurations.
-
-
-
-Once you have implemented an ONNX configuration, you can instantiate it by providing the
-base model's configuration as follows:
-
-```python
->>> from transformers import AutoConfig
-
->>> config = AutoConfig.from_pretrained("distilbert-base-uncased")
->>> onnx_config = DistilBertOnnxConfig(config)
-```
-
-The resulting object has several useful properties. For example, you can view the ONNX
-operator set that will be used during the export:
-
-```python
->>> print(onnx_config.default_onnx_opset)
-11
-```
-
-You can also view the outputs associated with the model as follows:
-
-```python
->>> print(onnx_config.outputs)
-OrderedDict([("last_hidden_state", {0: "batch", 1: "sequence"})])
-```
-
-Notice that the outputs property follows the same structure as the inputs; it returns an
-`OrderedDict` of named outputs and their shapes. The output structure is linked to the
-choice of feature that the configuration is initialised with. By default, the ONNX
-configuration is initialized with the `default` feature that corresponds to exporting a
-model loaded with the `AutoModel` class. If you want to export a model for another task,
-just provide a different feature to the `task` argument when you initialize the ONNX
-configuration. For example, if we wished to export DistilBERT with a sequence
-classification head, we could use:
-
-```python
->>> from transformers import AutoConfig
-
->>> config = AutoConfig.from_pretrained("distilbert-base-uncased")
->>> onnx_config_for_seq_clf = DistilBertOnnxConfig(config, task="sequence-classification")
->>> print(onnx_config_for_seq_clf.outputs)
-OrderedDict([('logits', {0: 'batch'})])
-```
-
-
-
-All of the base properties and methods associated with [`~onnx.config.OnnxConfig`] and
-the other configuration classes can be overridden if needed. Check out [`BartOnnxConfig`]
-for an advanced example.
-
-
-
-### Exporting the model
-
-Once you have implemented the ONNX configuration, the next step is to export the model.
-Here we can use the `export()` function provided by the `transformers.onnx` package.
-This function expects the ONNX configuration, along with the base model and tokenizer,
-and the path to save the exported file:
-
-```python
->>> from pathlib import Path
->>> from transformers.onnx import export
->>> from transformers import AutoTokenizer, AutoModel
-
->>> onnx_path = Path("model.onnx")
->>> model_ckpt = "distilbert-base-uncased"
->>> base_model = AutoModel.from_pretrained(model_ckpt)
->>> tokenizer = AutoTokenizer.from_pretrained(model_ckpt)
-
->>> onnx_inputs, onnx_outputs = export(tokenizer, base_model, onnx_config, onnx_config.default_onnx_opset, onnx_path)
-```
-
-The `onnx_inputs` and `onnx_outputs` returned by the `export()` function are lists of
-the keys defined in the `inputs` and `outputs` properties of the configuration. Once the
-model is exported, you can test that the model is well formed as follows:
-
-```python
->>> import onnx
-
->>> onnx_model = onnx.load("model.onnx")
->>> onnx.checker.check_model(onnx_model)
-```
-
-
-
-If your model is larger than 2GB, you will see that many additional files are created
-during the export. This is _expected_ because ONNX uses [Protocol
-Buffers](https://developers.google.com/protocol-buffers/) to store the model and these
-have a size limit of 2GB. See the [ONNX
-documentation](https://github.com/onnx/onnx/blob/master/docs/ExternalData.md) for
-instructions on how to load models with external data.
-
-
-
-### Validating the model outputs
-
-The final step is to validate that the outputs from the base and exported model agree
-within some absolute tolerance. Here we can use the `validate_model_outputs()` function
-provided by the `transformers.onnx` package as follows:
-
-```python
->>> from transformers.onnx import validate_model_outputs
-
->>> validate_model_outputs(
-... onnx_config, tokenizer, base_model, onnx_path, onnx_outputs, onnx_config.atol_for_validation
-... )
-```
-
-This function uses the [`~transformers.onnx.OnnxConfig.generate_dummy_inputs`] method to
-generate inputs for the base and exported model, and the absolute tolerance can be
-defined in the configuration. We generally find numerical agreement in the 1e-6 to 1e-4
-range, although anything smaller than 1e-3 is likely to be OK.
-
-## Contributing a new configuration to 🤗 Transformers
-
-We are looking to expand the set of ready-made configurations and welcome contributions
-from the community! If you would like to contribute your addition to the library, you
-will need to:
-
-* Implement the ONNX configuration in the corresponding `configuration_.py`
-file
-* Include the model architecture and corresponding features in
- [`~onnx.features.FeatureManager`]
-* Add your model architecture to the tests in `test_onnx_v2.py`
-
-Check out how the configuration for [IBERT was
-contributed](https://github.com/huggingface/transformers/pull/14868/files) to get an
-idea of what's involved.
+```
\ No newline at end of file
diff --git a/docs/source/en/tflite.mdx b/docs/source/en/tflite.mdx
new file mode 100644
index 0000000000..23e08478ba
--- /dev/null
+++ b/docs/source/en/tflite.mdx
@@ -0,0 +1,58 @@
+
+
+# Export to TFLite
+
+[TensorFlow Lite](https://www.tensorflow.org/lite/guide) is a lightweight framework for deploying machine learning models
+on resource-constrained devices, such as mobile phones, embedded systems, and Internet of Things (IoT) devices.
+TFLite is designed to optimize and run models efficiently on these devices with limited computational power, memory, and
+power consumption.
+A TensorFlow Lite model is represented in a special efficient portable format identified by the `.tflite` file extension.
+
+🤗 Optimum offers functionality to export 🤗 Transformers models to TFLite through the `exporters.tflite` module.
+For the list of supported model architectures, please refer to [🤗 Optimum documentation](https://huggingface.co/docs/optimum/exporters/tflite/overview).
+
+To export a model to TFLite, install the required dependencies:
+
+```bash
+pip install optimum[exporters-tf]
+```
+
+To check out all available arguments, refer to the [🤗 Optimum docs](https://huggingface.co/docs/optimum/main/en/exporters/tflite/usage_guides/export_a_model),
+or view help in command line:
+
+```bash
+optimum-cli export tflite --help
+```
+
+To export a model's checkpoint from the 🤗 Hub, for example, `bert-base-uncased`, run the following command:
+
+```bash
+optimum-cli export tflite --model bert-base-uncased --sequence_length 128 bert_tflite/
+```
+
+You should see the logs indicating progress and showing where the resulting `model.tflite` is saved, like this:
+
+```bash
+Validating TFLite model...
+ -[✓] TFLite model output names match reference model (logits)
+ - Validating TFLite Model output "logits":
+ -[✓] (1, 128, 30522) matches (1, 128, 30522)
+ -[x] values not close enough, max diff: 5.817413330078125e-05 (atol: 1e-05)
+The TensorFlow Lite export succeeded with the warning: The maximum absolute difference between the output of the reference model and the TFLite exported model is not within the set tolerance 1e-05:
+- logits: max diff = 5.817413330078125e-05.
+ The exported model was saved at: bert_tflite
+ ```
+
+The example above illustrates exporting a checkpoint from 🤗 Hub. When exporting a local model, first make sure that you
+saved both the model's weights and tokenizer files in the same directory (`local_path`). When using CLI, pass the
+`local_path` to the `model` argument instead of the checkpoint name on 🤗 Hub.
\ No newline at end of file
diff --git a/utils/check_table.py b/utils/check_table.py
index e7e31cfee3..80593881a3 100644
--- a/utils/check_table.py
+++ b/utils/check_table.py
@@ -173,56 +173,9 @@ def check_model_table(overwrite=False):
)
-def has_onnx(model_type):
- """
- Returns whether `model_type` is supported by ONNX (by checking if there is an ONNX config) or not.
- """
- config_mapping = transformers_module.models.auto.configuration_auto.CONFIG_MAPPING
- if model_type not in config_mapping:
- return False
- config = config_mapping[model_type]
- config_module = config.__module__
- module = transformers_module
- for part in config_module.split(".")[1:]:
- module = getattr(module, part)
- config_name = config.__name__
- onnx_config_name = config_name.replace("Config", "OnnxConfig")
- return hasattr(module, onnx_config_name)
-
-
-def get_onnx_model_list():
- """
- Return the list of models supporting ONNX.
- """
- config_mapping = transformers_module.models.auto.configuration_auto.CONFIG_MAPPING
- model_names = config_mapping = transformers_module.models.auto.configuration_auto.MODEL_NAMES_MAPPING
- onnx_model_types = [model_type for model_type in config_mapping.keys() if has_onnx(model_type)]
- onnx_model_names = [model_names[model_type] for model_type in onnx_model_types]
- onnx_model_names.sort(key=lambda x: x.upper())
- return "\n".join([f"- {name}" for name in onnx_model_names]) + "\n"
-
-
-def check_onnx_model_list(overwrite=False):
- """Check the model list in the serialization.mdx is consistent with the state of the lib and maybe `overwrite`."""
- current_list, start_index, end_index, lines = _find_text_in_file(
- filename=os.path.join(PATH_TO_DOCS, "serialization.mdx"),
- start_prompt="",
- end_prompt="In the next two sections, we'll show you how to:",
- )
- new_list = get_onnx_model_list()
-
- if current_list != new_list:
- if overwrite:
- with open(os.path.join(PATH_TO_DOCS, "serialization.mdx"), "w", encoding="utf-8", newline="\n") as f:
- f.writelines(lines[:start_index] + [new_list] + lines[end_index:])
- else:
- raise ValueError("The list of ONNX-supported models needs an update. Run `make fix-copies` to fix this.")
-
-
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--fix_and_overwrite", action="store_true", help="Whether to fix inconsistencies.")
args = parser.parse_args()
check_model_table(args.fix_and_overwrite)
- check_onnx_model_list(args.fix_and_overwrite)