Add BlenderBot small tokenizer to the init (#13367)

* Add BlenderBot small tokenizer to the init

* Update src/transformers/__init__.py

Co-authored-by: Suraj Patil <surajp815@gmail.com>

* Style

* Bugfix

Co-authored-by: Suraj Patil <surajp815@gmail.com>
This commit is contained in:
Lysandre Debut 2021-09-22 19:00:47 -04:00 committed by GitHub
parent 9e0fd78051
commit 5b57075449
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 28 additions and 4 deletions

View File

@ -354,7 +354,7 @@ Flax), PyTorch, and/or TensorFlow.
+-----------------------------+----------------+----------------+-----------------+--------------------+--------------+
| Blenderbot | ✅ | ❌ | ✅ | ✅ | ❌ |
+-----------------------------+----------------+----------------+-----------------+--------------------+--------------+
| BlenderbotSmall | ✅ | | ✅ | ✅ | ❌ |
| BlenderbotSmall | ✅ | | ✅ | ✅ | ❌ |
+-----------------------------+----------------+----------------+-----------------+--------------------+--------------+
| CamemBERT | ✅ | ✅ | ✅ | ✅ | ❌ |
+-----------------------------+----------------+----------------+-----------------+--------------------+--------------+

View File

@ -57,6 +57,13 @@ BlenderbotSmallTokenizer
create_token_type_ids_from_sequences, save_vocabulary
BlenderbotSmallTokenizerFast
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: transformers.BlenderbotSmallTokenizerFast
:members:
BlenderbotSmallModel
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

View File

@ -370,6 +370,7 @@ if is_tokenizers_available():
_import_structure["models.roformer"].append("RoFormerTokenizerFast")
_import_structure["models.clip"].append("CLIPTokenizerFast")
_import_structure["models.convbert"].append("ConvBertTokenizerFast")
_import_structure["models.blenderbot_small"].append("BlenderbotSmallTokenizerFast")
_import_structure["models.albert"].append("AlbertTokenizerFast")
_import_structure["models.bart"].append("BartTokenizerFast")
_import_structure["models.barthez"].append("BarthezTokenizerFast")
@ -2182,6 +2183,7 @@ if TYPE_CHECKING:
from .models.barthez import BarthezTokenizerFast
from .models.bert import BertTokenizerFast
from .models.big_bird import BigBirdTokenizerFast
from .models.blenderbot_small import BlenderbotSmallTokenizerFast
from .models.camembert import CamembertTokenizerFast
from .models.clip import CLIPTokenizerFast
from .models.convbert import ConvBertTokenizerFast

View File

@ -17,7 +17,7 @@
# limitations under the License.
from typing import TYPE_CHECKING
from ...file_utils import _LazyModule, is_tf_available, is_torch_available
from ...file_utils import _LazyModule, is_tf_available, is_tokenizers_available, is_torch_available
_import_structure = {
@ -25,6 +25,9 @@ _import_structure = {
"tokenization_blenderbot_small": ["BlenderbotSmallTokenizer"],
}
if is_tokenizers_available():
_import_structure["tokenization_blenderbot_small_fast"] = ["BlenderbotSmallTokenizerFast"]
if is_torch_available():
_import_structure["modeling_blenderbot_small"] = [
"BLENDERBOT_SMALL_PRETRAINED_MODEL_ARCHIVE_LIST",
@ -45,6 +48,9 @@ if TYPE_CHECKING:
from .configuration_blenderbot_small import BLENDERBOT_SMALL_PRETRAINED_CONFIG_ARCHIVE_MAP, BlenderbotSmallConfig
from .tokenization_blenderbot_small import BlenderbotSmallTokenizer
if is_tokenizers_available():
from .tokenization_blenderbot_small_fast import BlenderbotSmallTokenizerFast
if is_torch_available():
from .modeling_blenderbot_small import (
BLENDERBOT_SMALL_PRETRAINED_MODEL_ARCHIVE_LIST,

View File

@ -74,8 +74,8 @@ class BlenderbotSmallTokenizerFast(PreTrainedTokenizerFast):
):
super().__init__(
ByteLevelBPETokenizer(
vocab_file=vocab_file,
merges_file=merges_file,
vocab=vocab_file,
merges=merges_file,
add_prefix_space=add_prefix_space,
trim_offsets=trim_offsets,
),

View File

@ -47,6 +47,15 @@ class BigBirdTokenizerFast:
requires_backends(cls, ["tokenizers"])
class BlenderbotSmallTokenizerFast:
def __init__(self, *args, **kwargs):
requires_backends(self, ["tokenizers"])
@classmethod
def from_pretrained(cls, *args, **kwargs):
requires_backends(cls, ["tokenizers"])
class CamembertTokenizerFast:
def __init__(self, *args, **kwargs):
requires_backends(self, ["tokenizers"])