parent
e9b1635478
commit
c60749d6a6
|
@ -1395,14 +1395,14 @@ class LlamaConverter(SpmConverter):
|
|||
def normalizer(self, proto):
|
||||
if getattr(self.original_tokenizer, "legacy", True):
|
||||
sequence = []
|
||||
if getattr(self.original_tokenizer, "add_prefix_space"):
|
||||
if getattr(self.original_tokenizer, "add_prefix_space", True):
|
||||
sequence += [normalizers.Prepend(prepend="▁")]
|
||||
sequence += [normalizers.Replace(pattern=" ", content="▁")]
|
||||
return normalizers.Sequence(sequence)
|
||||
return None # non-legacy, no normalizer
|
||||
|
||||
def pre_tokenizer(self, replacement, add_prefix_space):
|
||||
if not self.original_tokenizer.legacy: # non-legacy, we need a replace
|
||||
if not getattr(self.original_tokenizer, "legacy", True): # non-legacy, we need a replace
|
||||
prepend_scheme = _get_prepend_scheme(add_prefix_space, self.original_tokenizer)
|
||||
return pre_tokenizers.Metaspace(replacement=replacement, prepend_scheme=prepend_scheme, split=False)
|
||||
return None
|
||||
|
|
Loading…
Reference in New Issue