Merge pull request #1296 from danai-antoniou/add-duplicate-tokens-error

Added ValueError for duplicates in list of added tokens
This commit is contained in:
Thomas Wolf 2019-10-03 17:06:17 -04:00 committed by GitHub
commit 1569610f2d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 2 additions and 1 deletions

View File

@ -512,7 +512,8 @@ class PreTrainedTokenizer(object):
for token in new_tokens:
assert isinstance(token, str) or (six.PY2 and isinstance(token, unicode))
if token != self.unk_token and \
self.convert_tokens_to_ids(token) == self.convert_tokens_to_ids(self.unk_token):
self.convert_tokens_to_ids(token) == self.convert_tokens_to_ids(self.unk_token) and \
token not in to_add_tokens:
to_add_tokens.append(token)
logger.info("Adding %s to the vocabulary", token)