Expose all constructor parameter for BertTokenizerFast (#2921)

Signed-off-by: Morgan Funtowicz <morgan@huggingface.co>
This commit is contained in:
Funtowicz Morgan 2020-02-20 17:53:32 +01:00 committed by GitHub
parent b662f0e625
commit 9b3093311f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 6 additions and 0 deletions

View File

@ -549,8 +549,11 @@ class BertTokenizerFast(PreTrainedTokenizerFast):
pad_token="[PAD]",
cls_token="[CLS]",
mask_token="[MASK]",
clean_text=True,
tokenize_chinese_chars=True,
add_special_tokens=True,
strip_accents=True,
wordpieces_prefix="##",
**kwargs
):
super().__init__(
@ -560,8 +563,11 @@ class BertTokenizerFast(PreTrainedTokenizerFast):
unk_token=unk_token,
sep_token=sep_token,
cls_token=cls_token,
clean_text=clean_text,
handle_chinese_chars=tokenize_chinese_chars,
strip_accents=strip_accents,
lowercase=do_lower_case,
wordpieces_prefix=wordpieces_prefix,
),
unk_token=unk_token,
sep_token=sep_token,