[`LlamaTokenizer`] make unk_token_length a property (#25689)

make unk_token_length a property
This commit is contained in:
Arthur 2023-08-24 08:03:34 +02:00 committed by GitHub
parent b85b88069a
commit 6e6da5e4b8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 4 additions and 1 deletions

View File

@ -154,7 +154,10 @@ class LlamaTokenizer(PreTrainedTokenizer):
self.use_default_system_prompt = use_default_system_prompt
self.sp_model = self.get_spm_processor()
self.unk_token_length = len(self.sp_model.encode(str(self.unk_token)))
@property
def unk_token_length(self):
return len(self.sp_model.encode(str(self.unk_token)))
# Copied from transformers.models.t5.tokenization_t5.T5Tokenizer.get_spm_processor
def get_spm_processor(self):