Update falcon tokenizer (#344)

* Update generate_tests.py

* Do not add token types for `FalconTokenizer`
This commit is contained in:
Joshua Lochner 2023-10-03 12:35:07 +02:00 committed by GitHub
parent 5b31129218
commit b6bd608322
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 2 additions and 7 deletions

View File

@ -2705,12 +2705,7 @@ export class CodeLlamaTokenizer extends PreTrainedTokenizer { }
export class XLMRobertaTokenizer extends PreTrainedTokenizer { }
export class MPNetTokenizer extends PreTrainedTokenizer { }
export class FalconTokenizer extends PreTrainedTokenizer {
/** @type {add_token_types} */
prepare_model_inputs(inputs) {
return add_token_types(inputs);
}
}
export class FalconTokenizer extends PreTrainedTokenizer { }
export class GPTNeoXTokenizer extends PreTrainedTokenizer { }

View File

@ -10,7 +10,7 @@ from scripts.supported_models import SUPPORTED_MODELS
# List of tokenizers where the model isn't yet supported, but the tokenizer is
ADDITIONAL_TOKENIZERS_TO_TEST = {
'RefinedWebModel': [
'falcon': [
'tiiuae/falcon-7b',
],
"llama": [