Ruff.
This commit is contained in:
parent
17daeefa7c
commit
9a72749b0f
|
@ -46,12 +46,16 @@ class SentencePieceUnigramTokenizer(BaseTokenizer):
|
|||
)
|
||||
tokenizer.pre_tokenizer = pre_tokenizers.Sequence(
|
||||
[
|
||||
pre_tokenizers.Metaspace(replacement=replacement, add_prefix_space="always" if add_prefix_space else "never"),
|
||||
pre_tokenizers.Metaspace(
|
||||
replacement=replacement, add_prefix_space="always" if add_prefix_space else "never"
|
||||
),
|
||||
pre_tokenizers.Digits(individual_digits=True),
|
||||
pre_tokenizers.Punctuation(),
|
||||
]
|
||||
)
|
||||
tokenizer.decoder = decoders.Metaspace(replacement=replacement, add_prefix_space="always" if add_prefix_space else "never")
|
||||
tokenizer.decoder = decoders.Metaspace(
|
||||
replacement=replacement, add_prefix_space="always" if add_prefix_space else "never"
|
||||
)
|
||||
|
||||
tokenizer.post_processor = TemplateProcessing(
|
||||
single=f"$A {self.special_tokens['eos']['token']}",
|
||||
|
|
|
@ -42,6 +42,7 @@ def import_protobuf(error_message=""):
|
|||
else:
|
||||
raise ImportError(PROTOBUF_IMPORT_ERROR.format(error_message))
|
||||
|
||||
|
||||
def _get_prepend_scheme(add_prefix_space: bool, original_tokenizer) -> str:
|
||||
if add_prefix_space:
|
||||
prepend_scheme = "always"
|
||||
|
@ -607,9 +608,7 @@ class SpmConverter(Converter):
|
|||
|
||||
def pre_tokenizer(self, replacement, add_prefix_space):
|
||||
prepend_scheme = _get_prepend_scheme(add_prefix_space, self.original_tokenizer)
|
||||
return pre_tokenizers.Metaspace(
|
||||
replacement=replacement, prepend_scheme=prepend_scheme
|
||||
)
|
||||
return pre_tokenizers.Metaspace(replacement=replacement, prepend_scheme=prepend_scheme)
|
||||
|
||||
def post_processor(self):
|
||||
return None
|
||||
|
|
Loading…
Reference in New Issue