Fix `BertPreTokenizer` regex pattern

This commit is contained in:
Joshua Lochner 2023-05-01 22:34:19 +02:00
parent 63ccc3dbac
commit 459ac7f4f9
1 changed files with 1 additions and 1 deletions

View File

@ -849,7 +849,7 @@ class BertPreTokenizer extends PreTokenizer {
constructor(config) {
super();
// TODO use config
this.pattern = /\b\p{L}+\b|[^\s\p{L}]+/gu
this.pattern = /\b\p{L}+\b|[^\s\p{L}]/gu;
}
/**
* Tokenizes a single text using the BERT pre-tokenization scheme.