utils update

This commit is contained in:
Ita Zaporozhets 2024-05-31 14:01:04 +02:00
parent fdb63e21db
commit ff5974bb61
1 changed files with 5 additions and 4 deletions

View File

@ -115,14 +115,15 @@ class PreTrainedTokenizerFast(PreTrainedTokenizerBase):
elif fast_tokenizer_file is not None: # and not from_slow:
# We have a serialization from tokenizers which let us directly build the backend
fast_tokenizer = TokenizerFast.from_file(fast_tokenizer_file)
elif slow_tokenizer is not None:
# We need to convert a slow tokenizer to build the backend
tokenizer_dict = load_gguf_checkpoint(kwargs.get("vocab_file"))["tokenizer"]
fast_tokenizer = convert_gguf_tokenizer(tokenizer_dict)
elif self.slow_tokenizer_class is not None:
# We need to create and convert a slow tokenizer to build the backend
slow_tokenizer = self.slow_tokenizer_class(*args, **kwargs)
fast_tokenizer = convert_slow_tokenizer(slow_tokenizer)
elif slow_tokenizer is not None:
# We need to convert a slow tokenizer to build the backend
tokenizer_dict = load_gguf_checkpoint(kwargs.get("vocab_file"))["tokenizer"]
fast_tokenizer = convert_gguf_tokenizer(tokenizer_dict)
else:
raise ValueError(
"Couldn't instantiate the backend tokenizer from one of: \n"