diff --git a/examples/flax/language-modeling/t5_tokenizer_model.py b/examples/flax/language-modeling/t5_tokenizer_model.py index 256519a99c..b55c2c95d9 100755 --- a/examples/flax/language-modeling/t5_tokenizer_model.py +++ b/examples/flax/language-modeling/t5_tokenizer_model.py @@ -46,12 +46,16 @@ class SentencePieceUnigramTokenizer(BaseTokenizer): ) tokenizer.pre_tokenizer = pre_tokenizers.Sequence( [ - pre_tokenizers.Metaspace(replacement=replacement, add_prefix_space="always" if add_prefix_space else "never"), + pre_tokenizers.Metaspace( + replacement=replacement, add_prefix_space="always" if add_prefix_space else "never" + ), pre_tokenizers.Digits(individual_digits=True), pre_tokenizers.Punctuation(), ] ) - tokenizer.decoder = decoders.Metaspace(replacement=replacement, add_prefix_space="always" if add_prefix_space else "never") + tokenizer.decoder = decoders.Metaspace( + replacement=replacement, add_prefix_space="always" if add_prefix_space else "never" + ) tokenizer.post_processor = TemplateProcessing( single=f"$A {self.special_tokens['eos']['token']}", diff --git a/src/transformers/convert_slow_tokenizer.py b/src/transformers/convert_slow_tokenizer.py index c4313cfe11..88f9e5f19a 100644 --- a/src/transformers/convert_slow_tokenizer.py +++ b/src/transformers/convert_slow_tokenizer.py @@ -42,6 +42,7 @@ def import_protobuf(error_message=""): else: raise ImportError(PROTOBUF_IMPORT_ERROR.format(error_message)) + def _get_prepend_scheme(add_prefix_space: bool, original_tokenizer) -> str: if add_prefix_space: prepend_scheme = "always" @@ -607,9 +608,7 @@ class SpmConverter(Converter): def pre_tokenizer(self, replacement, add_prefix_space): prepend_scheme = _get_prepend_scheme(add_prefix_space, self.original_tokenizer) - return pre_tokenizers.Metaspace( - replacement=replacement, prepend_scheme=prepend_scheme - ) + return pre_tokenizers.Metaspace(replacement=replacement, prepend_scheme=prepend_scheme) def post_processor(self): return None