Fix `BertPreTokenizer` regex pattern
This commit is contained in:
parent
63ccc3dbac
commit
459ac7f4f9
|
@ -849,7 +849,7 @@ class BertPreTokenizer extends PreTokenizer {
|
|||
constructor(config) {
|
||||
super();
|
||||
// TODO use config
|
||||
this.pattern = /\b\p{L}+\b|[^\s\p{L}]+/gu
|
||||
this.pattern = /\b\p{L}+\b|[^\s\p{L}]/gu;
|
||||
}
|
||||
/**
|
||||
* Tokenizes a single text using the BERT pre-tokenization scheme.
|
||||
|
|
Loading…
Reference in New Issue