[fix codellama conversion] (#30472)

* fix codellama conversion

* nit
This commit is contained in:
Arthur 2024-04-25 10:56:48 +02:00 committed by GitHub
parent e9b1635478
commit c60749d6a6
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
1 changed files with 2 additions and 2 deletions

View File

@ -1395,14 +1395,14 @@ class LlamaConverter(SpmConverter):
def normalizer(self, proto):
if getattr(self.original_tokenizer, "legacy", True):
sequence = []
if getattr(self.original_tokenizer, "add_prefix_space"):
if getattr(self.original_tokenizer, "add_prefix_space", True):
sequence += [normalizers.Prepend(prepend="")]
sequence += [normalizers.Replace(pattern=" ", content="")]
return normalizers.Sequence(sequence)
return None # non-legacy, no normalizer
def pre_tokenizer(self, replacement, add_prefix_space):
if not self.original_tokenizer.legacy: # non-legacy, we need a replace
if not getattr(self.original_tokenizer, "legacy", True): # non-legacy, we need a replace
prepend_scheme = _get_prepend_scheme(add_prefix_space, self.original_tokenizer)
return pre_tokenizers.Metaspace(replacement=replacement, prepend_scheme=prepend_scheme, split=False)
return None