From ff5974bb6169b7a5f4bce3587bdaf82b6e5cea2a Mon Sep 17 00:00:00 2001 From: Ita Zaporozhets Date: Fri, 31 May 2024 14:01:04 +0200 Subject: [PATCH] utils update --- src/transformers/tokenization_utils_fast.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/transformers/tokenization_utils_fast.py b/src/transformers/tokenization_utils_fast.py index 602bea300e..ec65aea797 100644 --- a/src/transformers/tokenization_utils_fast.py +++ b/src/transformers/tokenization_utils_fast.py @@ -115,14 +115,15 @@ class PreTrainedTokenizerFast(PreTrainedTokenizerBase): elif fast_tokenizer_file is not None: # and not from_slow: # We have a serialization from tokenizers which let us directly build the backend fast_tokenizer = TokenizerFast.from_file(fast_tokenizer_file) - elif slow_tokenizer is not None: - # We need to convert a slow tokenizer to build the backend - tokenizer_dict = load_gguf_checkpoint(kwargs.get("vocab_file"))["tokenizer"] - fast_tokenizer = convert_gguf_tokenizer(tokenizer_dict) elif self.slow_tokenizer_class is not None: # We need to create and convert a slow tokenizer to build the backend slow_tokenizer = self.slow_tokenizer_class(*args, **kwargs) fast_tokenizer = convert_slow_tokenizer(slow_tokenizer) + elif slow_tokenizer is not None: + # We need to convert a slow tokenizer to build the backend + tokenizer_dict = load_gguf_checkpoint(kwargs.get("vocab_file"))["tokenizer"] + fast_tokenizer = convert_gguf_tokenizer(tokenizer_dict) + else: raise ValueError( "Couldn't instantiate the backend tokenizer from one of: \n"