adding user defined tokens #30824
This commit is contained in:
parent
c876d12127
commit
996ff224a3
|
@ -1385,6 +1385,10 @@ class LlamaConverter(SpmConverter):
|
|||
AddedToken(self.original_tokenizer.convert_ids_to_tokens(2), normalized=False, special=True),
|
||||
]
|
||||
)
|
||||
user_defined_symbols = [
|
||||
AddedToken(token, normalized=True, special=False) for token in proto.trainer_spec.user_defined_symbols
|
||||
]
|
||||
tokenizer.add_tokens(user_defined_symbols)
|
||||
else:
|
||||
raise Exception(
|
||||
"You're trying to run a `Unigram` model but you're file was trained with a different algorithm"
|
||||
|
|
Loading…
Reference in New Issue