Black preview (#17217)

* Black preview

* Fixup too!

* Fix check copies

* Use the same version as the CI

* Bump black
This commit is contained in:
Sylvain Gugger 2022-05-12 16:25:55 -04:00 committed by GitHub
parent 9bd67ac7bb
commit afe5d42d8d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
578 changed files with 8274 additions and 3296 deletions

View File

@ -854,7 +854,7 @@ jobs:
key: v0.4-code_quality-{{ checksum "setup.py" }}
paths:
- '~/.cache/pip'
- run: black --check examples tests src utils
- run: black --check --preview examples tests src utils
- run: isort --check-only examples tests src utils
- run: python utils/custom_init_isort.py --check_only
- run: flake8 examples tests src utils

View File

@ -9,7 +9,7 @@ modified_only_fixup:
$(eval modified_py_files := $(shell python utils/get_modified_files.py $(check_dirs)))
@if test -n "$(modified_py_files)"; then \
echo "Checking/fixing $(modified_py_files)"; \
black $(modified_py_files); \
black --preview $(modified_py_files); \
isort $(modified_py_files); \
flake8 $(modified_py_files); \
else \
@ -45,7 +45,7 @@ repo-consistency:
# this target runs checks on all files
quality:
black --check $(check_dirs)
black --check --preview $(check_dirs)
isort --check-only $(check_dirs)
python utils/custom_init_isort.py --check_only
flake8 $(check_dirs)
@ -60,7 +60,7 @@ extra_style_checks:
# this target runs checks on all files and potentially modifies some of them
style:
black $(check_dirs)
black --preview $(check_dirs)
isort $(check_dirs)
${MAKE} autogenerate_code
${MAKE} extra_style_checks

View File

@ -42,14 +42,18 @@ class ModelArguments:
)
encoder_model_name_or_path: str = field(
metadata={
"help": "The encoder model checkpoint for weights initialization."
"Don't set if you want to train an encoder model from scratch."
"help": (
"The encoder model checkpoint for weights initialization."
"Don't set if you want to train an encoder model from scratch."
)
},
)
decoder_model_name_or_path: str = field(
metadata={
"help": "The decoder model checkpoint for weights initialization."
"Don't set if you want to train a decoder model from scratch."
"help": (
"The decoder model checkpoint for weights initialization."
"Don't set if you want to train a decoder model from scratch."
)
},
)
encoder_config_name: Optional[str] = field(

View File

@ -175,14 +175,19 @@ class ModelArguments:
dtype: Optional[str] = field(
default="float32",
metadata={
"help": "Floating-point format in which the model weights should be initialized and trained. Choose one of `[float32, float16, bfloat16]`."
"help": (
"Floating-point format in which the model weights should be initialized and trained. Choose one of"
" `[float32, float16, bfloat16]`."
)
},
)
use_auth_token: bool = field(
default=False,
metadata={
"help": "Will use the token generated when running `transformers-cli login` (necessary to use this script "
"with private models)."
"help": (
"Will use the token generated when running `transformers-cli login` (necessary to use this script "
"with private models)."
)
},
)
@ -222,38 +227,48 @@ class DataTrainingArguments:
max_target_length: Optional[int] = field(
default=128,
metadata={
"help": "The maximum total sequence length for target text after tokenization. Sequences longer "
"than this will be truncated, sequences shorter will be padded."
"help": (
"The maximum total sequence length for target text after tokenization. Sequences longer "
"than this will be truncated, sequences shorter will be padded."
)
},
)
val_max_target_length: Optional[int] = field(
default=None,
metadata={
"help": "The maximum total sequence length for validation target text after tokenization. Sequences longer "
"than this will be truncated, sequences shorter will be padded. Will default to `max_target_length`."
"This argument is also used to override the `max_length` param of `model.generate`, which is used "
"during evaluation."
"help": (
"The maximum total sequence length for validation target text after tokenization. Sequences longer "
"than this will be truncated, sequences shorter will be padded. Will default to `max_target_length`."
"This argument is also used to override the `max_length` param of `model.generate`, which is used "
"during evaluation."
)
},
)
max_train_samples: Optional[int] = field(
default=None,
metadata={
"help": "For debugging purposes or quicker training, truncate the number of training examples to this "
"value if set."
"help": (
"For debugging purposes or quicker training, truncate the number of training examples to this "
"value if set."
)
},
)
max_eval_samples: Optional[int] = field(
default=None,
metadata={
"help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this "
"value if set."
"help": (
"For debugging purposes or quicker training, truncate the number of evaluation examples to this "
"value if set."
)
},
)
max_predict_samples: Optional[int] = field(
default=None,
metadata={
"help": "For debugging purposes or quicker training, truncate the number of prediction examples to this "
"value if set."
"help": (
"For debugging purposes or quicker training, truncate the number of prediction examples to this "
"value if set."
)
},
)
preprocessing_num_workers: Optional[int] = field(
@ -266,8 +281,10 @@ class DataTrainingArguments:
num_beams: Optional[int] = field(
default=None,
metadata={
"help": "Number of beams to use for evaluation. This argument will be passed to `model.generate`, "
"which is used during evaluation."
"help": (
"Number of beams to use for evaluation. This argument will be passed to `model.generate`, "
"which is used during evaluation."
)
},
)
overwrite_cache: bool = field(
@ -623,7 +640,7 @@ def main():
eval_batch_size = int(training_args.per_device_eval_batch_size) * jax.device_count()
if training_args.block_size % train_batch_size > 0 or training_args.block_size % eval_batch_size > 0:
raise ValueError(
f"`training_args.block_size` needs to be a multiple of the global train/eval batch size."
"`training_args.block_size` needs to be a multiple of the global train/eval batch size."
f"Got {training_args.block_size}, {train_batch_size} and {eval_batch_size} respectively instead."
)
@ -1136,7 +1153,7 @@ def main():
)
# train
for (batch_idx, _) in enumerate(tqdm(range(steps_per_epoch), desc="Training...", position=1, leave=False)):
for batch_idx, _ in enumerate(tqdm(range(steps_per_epoch), desc="Training...", position=1, leave=False)):
cur_step += 1
batch = next(train_batches)
@ -1150,7 +1167,10 @@ def main():
if training_args.logging_steps > 0 and cur_step % training_args.logging_steps == 0:
_train_metric = unreplicate(train_metric)
desc = f"Epoch... ({epoch + 1}/{num_epochs} | Step: {cur_step} | Loss: {_train_metric['loss']} | Learning Rate: {_train_metric['learning_rate']} | Time per step: {time_per_step})"
desc = (
f"Epoch... ({epoch + 1}/{num_epochs} | Step: {cur_step} | Loss: {_train_metric['loss']} |"
f" Learning Rate: {_train_metric['learning_rate']} | Time per step: {time_per_step})"
)
epochs.desc = desc
epochs.write(desc)

View File

@ -138,8 +138,9 @@ class ModelArguments:
model_name_or_path: Optional[str] = field(
default=None,
metadata={
"help": "The model checkpoint for weights initialization."
"Don't set if you want to train a model from scratch."
"help": (
"The model checkpoint for weights initialization.Don't set if you want to train a model from scratch."
)
},
)
model_type: Optional[str] = field(
@ -162,14 +163,19 @@ class ModelArguments:
dtype: Optional[str] = field(
default="float32",
metadata={
"help": "Floating-point format in which the model weights should be initialized and trained. Choose one of `[float32, float16, bfloat16]`."
"help": (
"Floating-point format in which the model weights should be initialized and trained. Choose one of"
" `[float32, float16, bfloat16]`."
)
},
)
use_auth_token: bool = field(
default=False,
metadata={
"help": "Will use the token generated when running `transformers-cli login` (necessary to use this script "
"with private models)."
"help": (
"Will use the token generated when running `transformers-cli login` (necessary to use this script "
"with private models)."
)
},
)
@ -194,15 +200,19 @@ class DataTrainingArguments:
max_train_samples: Optional[int] = field(
default=None,
metadata={
"help": "For debugging purposes or quicker training, truncate the number of training examples to this "
"value if set."
"help": (
"For debugging purposes or quicker training, truncate the number of training examples to this "
"value if set."
)
},
)
max_eval_samples: Optional[int] = field(
default=None,
metadata={
"help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this "
"value if set."
"help": (
"For debugging purposes or quicker training, truncate the number of evaluation examples to this "
"value if set."
)
},
)
overwrite_cache: bool = field(
@ -217,9 +227,11 @@ class DataTrainingArguments:
block_size: Optional[int] = field(
default=None,
metadata={
"help": "Optional input sequence length after tokenization. "
"The training dataset will be truncated in block of this size for training. "
"Default to the model max input length for single sentence inputs (take into account special tokens)."
"help": (
"Optional input sequence length after tokenization. "
"The training dataset will be truncated in block of this size for training. "
"Default to the model max input length for single sentence inputs (take into account special tokens)."
)
},
)
overwrite_cache: bool = field(
@ -505,7 +517,8 @@ def main():
# clm input could be much much longer than block_size
if "Token indices sequence length is longer than the" in cl.out:
tok_logger.warning(
"^^^^^^^^^^^^^^^^ Please ignore the warning above - this long input will be chunked into smaller bits before being passed to the model."
"^^^^^^^^^^^^^^^^ Please ignore the warning above - this long input will be chunked into smaller bits"
" before being passed to the model."
)
return output
@ -735,7 +748,8 @@ def main():
write_train_metric(summary_writer, train_metrics, train_time, cur_step)
epochs.write(
f"Step... ({cur_step} | Loss: {train_metric['loss'].mean()}, Learning Rate: {train_metric['learning_rate'].mean()})"
f"Step... ({cur_step} | Loss: {train_metric['loss'].mean()}, Learning Rate:"
f" {train_metric['learning_rate'].mean()})"
)
train_metrics = []
@ -762,7 +776,10 @@ def main():
eval_metrics["perplexity"] = float("inf")
# Print metrics and update progress bar
desc = f"Step... ({cur_step} | Eval Loss: {eval_metrics['loss']} | Eval Perplexity: {eval_metrics['perplexity']})"
desc = (
f"Step... ({cur_step} | Eval Loss: {eval_metrics['loss']} | Eval Perplexity:"
f" {eval_metrics['perplexity']})"
)
epochs.write(desc)
epochs.desc = desc

View File

@ -136,8 +136,9 @@ class ModelArguments:
model_name_or_path: Optional[str] = field(
default=None,
metadata={
"help": "The model checkpoint for weights initialization."
"Don't set if you want to train a model from scratch."
"help": (
"The model checkpoint for weights initialization.Don't set if you want to train a model from scratch."
)
},
)
model_type: Optional[str] = field(
@ -160,14 +161,19 @@ class ModelArguments:
dtype: Optional[str] = field(
default="float32",
metadata={
"help": "Floating-point format in which the model weights should be initialized and trained. Choose one of `[float32, float16, bfloat16]`."
"help": (
"Floating-point format in which the model weights should be initialized and trained. Choose one of"
" `[float32, float16, bfloat16]`."
)
},
)
use_auth_token: bool = field(
default=False,
metadata={
"help": "Will use the token generated when running `transformers-cli login` (necessary to use this script "
"with private models)."
"help": (
"Will use the token generated when running `transformers-cli login` (necessary to use this script "
"with private models)."
)
},
)
@ -209,8 +215,10 @@ class DataTrainingArguments:
max_seq_length: Optional[int] = field(
default=None,
metadata={
"help": "The maximum total input sequence length after tokenization. Sequences longer "
"than this will be truncated. Default to the max input length of the model."
"help": (
"The maximum total input sequence length after tokenization. Sequences longer "
"than this will be truncated. Default to the max input length of the model."
)
},
)
preprocessing_num_workers: Optional[int] = field(
@ -223,8 +231,10 @@ class DataTrainingArguments:
pad_to_max_length: bool = field(
default=False,
metadata={
"help": "Whether to pad all samples to `max_seq_length`. "
"If False, will pad the samples dynamically when batching to the maximum length in the batch."
"help": (
"Whether to pad all samples to `max_seq_length`. "
"If False, will pad the samples dynamically when batching to the maximum length in the batch."
)
},
)
line_by_line: bool = field(
@ -764,7 +774,8 @@ def main():
write_train_metric(summary_writer, train_metrics, train_time, cur_step)
epochs.write(
f"Step... ({cur_step} | Loss: {train_metric['loss']}, Learning Rate: {train_metric['learning_rate']})"
f"Step... ({cur_step} | Loss: {train_metric['loss']}, Learning Rate:"
f" {train_metric['learning_rate']})"
)
train_metrics = []

View File

@ -135,8 +135,9 @@ class ModelArguments:
model_name_or_path: Optional[str] = field(
default=None,
metadata={
"help": "The model checkpoint for weights initialization."
"Don't set if you want to train a model from scratch."
"help": (
"The model checkpoint for weights initialization.Don't set if you want to train a model from scratch."
)
},
)
model_type: Optional[str] = field(
@ -159,14 +160,19 @@ class ModelArguments:
dtype: Optional[str] = field(
default="float32",
metadata={
"help": "Floating-point format in which the model weights should be initialized and trained. Choose one of `[float32, float16, bfloat16]`."
"help": (
"Floating-point format in which the model weights should be initialized and trained. Choose one of"
" `[float32, float16, bfloat16]`."
)
},
)
use_auth_token: bool = field(
default=False,
metadata={
"help": "Will use the token generated when running `transformers-cli login` (necessary to use this script "
"with private models)."
"help": (
"Will use the token generated when running `transformers-cli login` (necessary to use this script "
"with private models)."
)
},
)
@ -208,7 +214,10 @@ class DataTrainingArguments:
max_seq_length: Optional[int] = field(
default=None,
metadata={
"help": "The maximum total input sequence length after tokenization and masking. Sequences longer than this will be truncated. Default to the max input length of the model."
"help": (
"The maximum total input sequence length after tokenization and masking. Sequences longer than this"
" will be truncated. Default to the max input length of the model."
)
},
)
preprocessing_num_workers: Optional[int] = field(
@ -337,12 +346,14 @@ class FlaxDataCollatorForT5MLM:
if batch["input_ids"].shape[-1] != self.input_length:
raise ValueError(
f"`input_ids` are incorrectly preprocessed. `input_ids` length is {batch['input_ids'].shape[-1]}, but should be {self.target_length}."
f"`input_ids` are incorrectly preprocessed. `input_ids` length is {batch['input_ids'].shape[-1]}, but"
f" should be {self.target_length}."
)
if batch["labels"].shape[-1] != self.target_length:
raise ValueError(
f"`labels` are incorrectly preprocessed. `labels` length is {batch['labels'].shape[-1]}, but should be {self.target_length}."
f"`labels` are incorrectly preprocessed. `labels` length is {batch['labels'].shape[-1]}, but should be"
f" {self.target_length}."
)
# to check that tokens are correctly preprocessed, one can run `self.tokenizer.batch_decode(input_ids)` and `self.tokenizer.batch_decode(labels)` here...
@ -884,7 +895,8 @@ def main():
write_train_metric(summary_writer, train_metrics, train_time, cur_step)
epochs.write(
f"Step... ({cur_step} | Loss: {train_metric['loss'].mean()}, Learning Rate: {train_metric['learning_rate'].mean()})"
f"Step... ({cur_step} | Loss: {train_metric['loss'].mean()}, Learning Rate:"
f" {train_metric['learning_rate'].mean()})"
)
train_metrics = []

View File

@ -157,14 +157,19 @@ class ModelArguments:
use_auth_token: bool = field(
default=False,
metadata={
"help": "Will use the token generated when running `transformers-cli login` (necessary to use this script "
"with private models)."
"help": (
"Will use the token generated when running `transformers-cli login` (necessary to use this script "
"with private models)."
)
},
)
dtype: Optional[str] = field(
default="float32",
metadata={
"help": "Floating-point format in which the model weights should be initialized and trained. Choose one of `[float32, float16, bfloat16]`."
"help": (
"Floating-point format in which the model weights should be initialized and trained. Choose one of"
" `[float32, float16, bfloat16]`."
)
},
)
@ -200,37 +205,46 @@ class DataTrainingArguments:
max_seq_length: int = field(
default=384,
metadata={
"help": "The maximum total input sequence length after tokenization. Sequences longer "
"than this will be truncated, sequences shorter will be padded."
"help": (
"The maximum total input sequence length after tokenization. Sequences longer "
"than this will be truncated, sequences shorter will be padded."
)
},
)
pad_to_max_length: bool = field(
default=False,
metadata={
"help": "Whether to pad all samples to `max_seq_length`. "
"If False, will pad the samples dynamically when batching to the maximum length in the batch (which can "
"be faster on GPU but will be slower on TPU)."
"help": (
"Whether to pad all samples to `max_seq_length`. If False, will pad the samples dynamically when"
" batching to the maximum length in the batch (which can be faster on GPU but will be slower on TPU)."
)
},
)
max_train_samples: Optional[int] = field(
default=None,
metadata={
"help": "For debugging purposes or quicker training, truncate the number of training examples to this "
"value if set."
"help": (
"For debugging purposes or quicker training, truncate the number of training examples to this "
"value if set."
)
},
)
max_eval_samples: Optional[int] = field(
default=None,
metadata={
"help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this "
"value if set."
"help": (
"For debugging purposes or quicker training, truncate the number of evaluation examples to this "
"value if set."
)
},
)
max_predict_samples: Optional[int] = field(
default=None,
metadata={
"help": "For debugging purposes or quicker training, truncate the number of prediction examples to this "
"value if set."
"help": (
"For debugging purposes or quicker training, truncate the number of prediction examples to this "
"value if set."
)
},
)
version_2_with_negative: bool = field(
@ -239,9 +253,11 @@ class DataTrainingArguments:
null_score_diff_threshold: float = field(
default=0.0,
metadata={
"help": "The threshold used to select the null answer: if the best answer has a score that is less than "
"the score of the null answer minus this threshold, the null answer is selected for this example. "
"Only useful when `version_2_with_negative=True`."
"help": (
"The threshold used to select the null answer: if the best answer has a score that is less than "
"the score of the null answer minus this threshold, the null answer is selected for this example. "
"Only useful when `version_2_with_negative=True`."
)
},
)
doc_stride: int = field(
@ -255,8 +271,10 @@ class DataTrainingArguments:
max_answer_length: int = field(
default=30,
metadata={
"help": "The maximum length of an answer that can be generated. This is needed because the start "
"and end predictions are not conditioned on one another."
"help": (
"The maximum length of an answer that can be generated. This is needed because the start "
"and end predictions are not conditioned on one another."
)
},
)
@ -498,9 +516,9 @@ def main():
# region Tokenizer check: this script requires a fast tokenizer.
if not isinstance(tokenizer, PreTrainedTokenizerFast):
raise ValueError(
"This example script only works for models that have a fast tokenizer. Checkout the big table of models "
"at https://huggingface.co/transformers/index.html#supported-frameworks to find the model types that meet this "
"requirement"
"This example script only works for models that have a fast tokenizer. Checkout the big table of models at"
" https://huggingface.co/transformers/index.html#supported-frameworks to find the model types that meet"
" this requirement"
)
# endregion
@ -928,7 +946,8 @@ def main():
write_train_metric(summary_writer, train_metrics, train_time, cur_step)
epochs.write(
f"Step... ({cur_step}/{total_steps} | Training Loss: {train_metric['loss']}, Learning Rate: {train_metric['learning_rate']})"
f"Step... ({cur_step}/{total_steps} | Training Loss: {train_metric['loss']}, Learning Rate:"
f" {train_metric['learning_rate']})"
)
train_metrics = []

View File

@ -149,8 +149,9 @@ class ModelArguments:
model_name_or_path: Optional[str] = field(
default=None,
metadata={
"help": "The model checkpoint for weights initialization."
"Don't set if you want to train a model from scratch."
"help": (
"The model checkpoint for weights initialization.Don't set if you want to train a model from scratch."
)
},
)
model_type: Optional[str] = field(
@ -173,14 +174,19 @@ class ModelArguments:
dtype: Optional[str] = field(
default="float32",
metadata={
"help": "Floating-point format in which the model weights should be initialized and trained. Choose one of `[float32, float16, bfloat16]`."
"help": (
"Floating-point format in which the model weights should be initialized and trained. Choose one of"
" `[float32, float16, bfloat16]`."
)
},
)
use_auth_token: bool = field(
default=False,
metadata={
"help": "Will use the token generated when running `transformers-cli login` (necessary to use this script "
"with private models)."
"help": (
"Will use the token generated when running `transformers-cli login` (necessary to use this script "
"with private models)."
)
},
)
@ -217,45 +223,57 @@ class DataTrainingArguments:
max_source_length: Optional[int] = field(
default=1024,
metadata={
"help": "The maximum total input sequence length after tokenization. Sequences longer "
"than this will be truncated, sequences shorter will be padded."
"help": (
"The maximum total input sequence length after tokenization. Sequences longer "
"than this will be truncated, sequences shorter will be padded."
)
},
)
max_target_length: Optional[int] = field(
default=128,
metadata={
"help": "The maximum total sequence length for target text after tokenization. Sequences longer "
"than this will be truncated, sequences shorter will be padded."
"help": (
"The maximum total sequence length for target text after tokenization. Sequences longer "
"than this will be truncated, sequences shorter will be padded."
)
},
)
val_max_target_length: Optional[int] = field(
default=None,
metadata={
"help": "The maximum total sequence length for validation target text after tokenization. Sequences longer "
"than this will be truncated, sequences shorter will be padded. Will default to `max_target_length`."
"This argument is also used to override the `max_length` param of `model.generate`, which is used "
"during evaluation."
"help": (
"The maximum total sequence length for validation target text after tokenization. Sequences longer "
"than this will be truncated, sequences shorter will be padded. Will default to `max_target_length`."
"This argument is also used to override the `max_length` param of `model.generate`, which is used "
"during evaluation."
)
},
)
max_train_samples: Optional[int] = field(
default=None,
metadata={
"help": "For debugging purposes or quicker training, truncate the number of training examples to this "
"value if set."
"help": (
"For debugging purposes or quicker training, truncate the number of training examples to this "
"value if set."
)
},
)
max_eval_samples: Optional[int] = field(
default=None,
metadata={
"help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this "
"value if set."
"help": (
"For debugging purposes or quicker training, truncate the number of evaluation examples to this "
"value if set."
)
},
)
max_predict_samples: Optional[int] = field(
default=None,
metadata={
"help": "For debugging purposes or quicker training, truncate the number of prediction examples to this "
"value if set."
"help": (
"For debugging purposes or quicker training, truncate the number of prediction examples to this "
"value if set."
)
},
)
preprocessing_num_workers: Optional[int] = field(
@ -271,8 +289,10 @@ class DataTrainingArguments:
num_beams: Optional[int] = field(
default=None,
metadata={
"help": "Number of beams to use for evaluation. This argument will be passed to `model.generate`, "
"which is used during evaluation."
"help": (
"Number of beams to use for evaluation. This argument will be passed to `model.generate`, "
"which is used during evaluation."
)
},
)
overwrite_cache: bool = field(
@ -831,7 +851,8 @@ def main():
train_metric = unreplicate(train_metric)
epochs.write(
f"Epoch... ({epoch + 1}/{num_epochs} | Loss: {train_metric['loss']}, Learning Rate: {train_metric['learning_rate']})"
f"Epoch... ({epoch + 1}/{num_epochs} | Loss: {train_metric['loss']}, Learning Rate:"
f" {train_metric['learning_rate']})"
)
# ======================== Evaluating ==============================

View File

@ -103,8 +103,10 @@ class ModelArguments:
use_auth_token: bool = field(
default=False,
metadata={
"help": "Will use the token generated when running `transformers-cli login` (necessary to use this script "
"with private models)."
"help": (
"Will use the token generated when running `transformers-cli login` (necessary to use this script "
"with private models)."
)
},
)
@ -148,29 +150,37 @@ class DataTrainingArguments:
max_seq_length: int = field(
default=None,
metadata={
"help": "The maximum total input sequence length after tokenization. If set, sequences longer "
"than this will be truncated, sequences shorter will be padded."
"help": (
"The maximum total input sequence length after tokenization. If set, sequences longer "
"than this will be truncated, sequences shorter will be padded."
)
},
)
max_train_samples: Optional[int] = field(
default=None,
metadata={
"help": "For debugging purposes or quicker training, truncate the number of training examples to this "
"value if set."
"help": (
"For debugging purposes or quicker training, truncate the number of training examples to this "
"value if set."
)
},
)
max_eval_samples: Optional[int] = field(
default=None,
metadata={
"help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this "
"value if set."
"help": (
"For debugging purposes or quicker training, truncate the number of evaluation examples to this "
"value if set."
)
},
)
max_predict_samples: Optional[int] = field(
default=None,
metadata={
"help": "For debugging purposes or quicker training, truncate the number of prediction examples to this "
"value if set."
"help": (
"For debugging purposes or quicker training, truncate the number of prediction examples to this "
"value if set."
)
},
)
@ -585,7 +595,8 @@ def main():
write_train_metric(summary_writer, train_metrics, train_time, cur_step)
epochs.write(
f"Step... ({cur_step}/{total_steps} | Training Loss: {train_metric['loss']}, Learning Rate: {train_metric['learning_rate']})"
f"Step... ({cur_step}/{total_steps} | Training Loss: {train_metric['loss']}, Learning Rate:"
f" {train_metric['learning_rate']})"
)
train_metrics = []

View File

@ -150,8 +150,10 @@ class ModelArguments:
use_auth_token: bool = field(
default=False,
metadata={
"help": "Will use the token generated when running `transformers-cli login` (necessary to use this script "
"with private models)."
"help": (
"Will use the token generated when running `transformers-cli login` (necessary to use this script "
"with private models)."
)
},
)
@ -196,36 +198,46 @@ class DataTrainingArguments:
max_seq_length: int = field(
default=None,
metadata={
"help": "The maximum total input sequence length after tokenization. If set, sequences longer "
"than this will be truncated, sequences shorter will be padded."
"help": (
"The maximum total input sequence length after tokenization. If set, sequences longer "
"than this will be truncated, sequences shorter will be padded."
)
},
)
max_train_samples: Optional[int] = field(
default=None,
metadata={
"help": "For debugging purposes or quicker training, truncate the number of training examples to this "
"value if set."
"help": (
"For debugging purposes or quicker training, truncate the number of training examples to this "
"value if set."
)
},
)
max_eval_samples: Optional[int] = field(
default=None,
metadata={
"help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this "
"value if set."
"help": (
"For debugging purposes or quicker training, truncate the number of evaluation examples to this "
"value if set."
)
},
)
max_predict_samples: Optional[int] = field(
default=None,
metadata={
"help": "For debugging purposes or quicker training, truncate the number of prediction examples to this "
"value if set."
"help": (
"For debugging purposes or quicker training, truncate the number of prediction examples to this "
"value if set."
)
},
)
label_all_tokens: bool = field(
default=False,
metadata={
"help": "Whether to put the label for one word on all tokens of generated by that word or just on the "
"one (in which case the other tokens will have a padding index)."
"help": (
"Whether to put the label for one word on all tokens of generated by that word or just on the "
"one (in which case the other tokens will have a padding index)."
)
},
)
return_entity_level_metrics: bool = field(
@ -693,7 +705,8 @@ def main():
write_train_metric(summary_writer, train_metrics, train_time, cur_step)
epochs.write(
f"Step... ({cur_step}/{total_steps} | Training Loss: {train_metric['loss']}, Learning Rate: {train_metric['learning_rate']})"
f"Step... ({cur_step}/{total_steps} | Training Loss: {train_metric['loss']}, Learning Rate:"
f" {train_metric['learning_rate']})"
)
train_metrics = []
@ -744,7 +757,8 @@ def main():
logger.info(f"Step... ({cur_step}/{total_steps} | Validation metrics: {eval_metrics}")
else:
logger.info(
f"Step... ({cur_step}/{total_steps} | Validation f1: {eval_metrics['f1']}, Validation Acc: {eval_metrics['accuracy']})"
f"Step... ({cur_step}/{total_steps} | Validation f1: {eval_metrics['f1']}, Validation Acc:"
f" {eval_metrics['accuracy']})"
)
if has_tensorboard and jax.process_index() == 0:

View File

@ -134,8 +134,9 @@ class ModelArguments:
model_name_or_path: Optional[str] = field(
default=None,
metadata={
"help": "The model checkpoint for weights initialization."
"Don't set if you want to train a model from scratch."
"help": (
"The model checkpoint for weights initialization.Don't set if you want to train a model from scratch."
)
},
)
model_type: Optional[str] = field(
@ -151,14 +152,19 @@ class ModelArguments:
dtype: Optional[str] = field(
default="float32",
metadata={
"help": "Floating-point format in which the model weights should be initialized and trained. Choose one of `[float32, float16, bfloat16]`."
"help": (
"Floating-point format in which the model weights should be initialized and trained. Choose one of"
" `[float32, float16, bfloat16]`."
)
},
)
use_auth_token: bool = field(
default=False,
metadata={
"help": "Will use the token generated when running `transformers-cli login` (necessary to use this script "
"with private models)."
"help": (
"Will use the token generated when running `transformers-cli login` (necessary to use this script "
"with private models)."
)
},
)
@ -179,15 +185,19 @@ class DataTrainingArguments:
max_train_samples: Optional[int] = field(
default=None,
metadata={
"help": "For debugging purposes or quicker training, truncate the number of training examples to this "
"value if set."
"help": (
"For debugging purposes or quicker training, truncate the number of training examples to this "
"value if set."
)
},
)
max_eval_samples: Optional[int] = field(
default=None,
metadata={
"help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this "
"value if set."
"help": (
"For debugging purposes or quicker training, truncate the number of evaluation examples to this "
"value if set."
)
},
)
overwrite_cache: bool = field(
@ -509,7 +519,8 @@ def main():
train_step_progress_bar.close()
epochs.write(
f"Epoch... ({epoch + 1}/{num_epochs} | Loss: {train_metric['loss']}, Learning Rate: {train_metric['learning_rate']})"
f"Epoch... ({epoch + 1}/{num_epochs} | Loss: {train_metric['loss']}, Learning Rate:"
f" {train_metric['learning_rate']})"
)
# ======================== Evaluating ==============================

View File

@ -78,8 +78,10 @@ class DataTrainingArguments:
max_seq_length: int = field(
default=128,
metadata={
"help": "The maximum total input sequence length after tokenization. Sequences longer "
"than this will be truncated, sequences shorter will be padded."
"help": (
"The maximum total input sequence length after tokenization. Sequences longer "
"than this will be truncated, sequences shorter will be padded."
)
},
)
overwrite_cache: bool = field(
@ -102,7 +104,8 @@ def main():
and not training_args.overwrite_output_dir
):
raise ValueError(
f"Output directory ({training_args.output_dir}) already exists and is not empty. Use --overwrite_output_dir to overcome."
f"Output directory ({training_args.output_dir}) already exists and is not empty. Use"
" --overwrite_output_dir to overcome."
)
# Setup logging

View File

@ -182,7 +182,7 @@ if is_tf_available():
)
def gen():
for (ex_index, ex) in tqdm.tqdm(enumerate(self.features), desc="convert examples to features"):
for ex_index, ex in tqdm.tqdm(enumerate(self.features), desc="convert examples to features"):
if ex_index % 10000 == 0:
logger.info("Writing example %d of %d" % (ex_index, len(examples)))
@ -297,7 +297,7 @@ class RaceProcessor(DataProcessor):
def _create_examples(self, lines, set_type):
"""Creates examples for the training and dev sets."""
examples = []
for (_, data_raw) in enumerate(lines):
for _, data_raw in enumerate(lines):
race_id = "%s-%s" % (set_type, data_raw["race_id"])
article = data_raw["article"]
for i in range(len(data_raw["answers"])):
@ -518,7 +518,7 @@ def convert_examples_to_features(
label_map = {label: i for i, label in enumerate(label_list)}
features = []
for (ex_index, example) in tqdm.tqdm(enumerate(examples), desc="convert examples to features"):
for ex_index, example in tqdm.tqdm(enumerate(examples), desc="convert examples to features"):
if ex_index % 10000 == 0:
logger.info("Writing example %d of %d" % (ex_index, len(examples)))
choices_inputs = []

View File

@ -312,8 +312,10 @@ def add_generic_args(parser, root_dir) -> None:
"--fp16_opt_level",
type=str,
default="O2",
help="For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']."
"See details at https://nvidia.github.io/apex/amp.html",
help=(
"For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']."
"See details at https://nvidia.github.io/apex/amp.html"
),
)
parser.add_argument("--n_tpu_cores", dest="tpu_cores", type=int)
parser.add_argument("--max_grad_norm", dest="gradient_clip_val", default=1.0, type=float, help="Max gradient norm")

View File

@ -148,8 +148,10 @@ class GLUETransformer(BaseTransformer):
"--max_seq_length",
default=128,
type=int,
help="The maximum total input sequence length after tokenization. Sequences longer "
"than this will be truncated, sequences shorter will be padded.",
help=(
"The maximum total input sequence length after tokenization. Sequences longer "
"than this will be truncated, sequences shorter will be padded."
),
)
parser.add_argument(

View File

@ -173,8 +173,10 @@ class NERTransformer(BaseTransformer):
"--max_seq_length",
default=128,
type=int,
help="The maximum total input sequence length after tokenization. Sequences longer "
"than this will be truncated, sequences shorter will be padded.",
help=(
"The maximum total input sequence length after tokenization. Sequences longer "
"than this will be truncated, sequences shorter will be padded."
),
)
parser.add_argument(

View File

@ -551,8 +551,10 @@ def main():
"--max_seq_length",
default=384,
type=int,
help="The maximum total input sequence length after WordPiece tokenization. Sequences "
"longer than this will be truncated, and sequences shorter than this will be padded.",
help=(
"The maximum total input sequence length after WordPiece tokenization. Sequences "
"longer than this will be truncated, and sequences shorter than this will be padded."
),
)
parser.add_argument(
"--doc_stride",
@ -564,8 +566,10 @@ def main():
"--max_query_length",
default=64,
type=int,
help="The maximum number of tokens for the question. Questions longer than this will "
"be truncated to this length.",
help=(
"The maximum number of tokens for the question. Questions longer than this will "
"be truncated to this length."
),
)
parser.add_argument("--do_train", action="store_true", help="Whether to run training.")
parser.add_argument("--do_eval", action="store_true", help="Whether to run eval on the dev set.")
@ -610,20 +614,27 @@ def main():
"--max_answer_length",
default=30,
type=int,
help="The maximum length of an answer that can be generated. This is needed because the start "
"and end predictions are not conditioned on one another.",
help=(
"The maximum length of an answer that can be generated. This is needed because the start "
"and end predictions are not conditioned on one another."
),
)
parser.add_argument(
"--verbose_logging",
action="store_true",
help="If true, all of the warnings related to data processing will be printed. "
"A number of warnings are expected for a normal SQuAD evaluation.",
help=(
"If true, all of the warnings related to data processing will be printed. "
"A number of warnings are expected for a normal SQuAD evaluation."
),
)
parser.add_argument(
"--lang_id",
default=0,
type=int,
help="language id of input for language-specific xlm models (see tokenization_xlm.PRETRAINED_INIT_CONFIGURATION)",
help=(
"language id of input for language-specific xlm models (see"
" tokenization_xlm.PRETRAINED_INIT_CONFIGURATION)"
),
)
parser.add_argument("--logging_steps", type=int, default=500, help="Log every X updates steps.")
@ -652,8 +663,10 @@ def main():
"--fp16_opt_level",
type=str,
default="O1",
help="For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']."
"See details at https://nvidia.github.io/apex/amp.html",
help=(
"For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']."
"See details at https://nvidia.github.io/apex/amp.html"
),
)
parser.add_argument("--server_ip", type=str, default="", help="Can be used for distant debugging.")
parser.add_argument("--server_port", type=str, default="", help="Can be used for distant debugging.")

View File

@ -84,7 +84,8 @@ def main():
and not training_args.overwrite_output_dir
):
raise ValueError(
f"Output directory ({training_args.output_dir}) already exists and is not empty. Use --overwrite_output_dir to overcome."
f"Output directory ({training_args.output_dir}) already exists and is not empty. Use"
" --overwrite_output_dir to overcome."
)
# Setup logging

View File

@ -68,7 +68,10 @@ class ModelArguments:
model_name_or_path: Optional[str] = field(
default=None,
metadata={
"help": "The model checkpoint for weights initialization. Leave None if you want to train a model from scratch."
"help": (
"The model checkpoint for weights initialization. Leave None if you want to train a model from"
" scratch."
)
},
)
model_type: Optional[str] = field(
@ -99,8 +102,10 @@ class DataTrainingArguments:
train_data_files: Optional[str] = field(
default=None,
metadata={
"help": "The input training data files (multiple files in glob format). "
"Very often splitting large files to smaller files can prevent tokenizer going out of memory"
"help": (
"The input training data files (multiple files in glob format). "
"Very often splitting large files to smaller files can prevent tokenizer going out of memory"
)
},
)
eval_data_file: Optional[str] = field(
@ -130,7 +135,10 @@ class DataTrainingArguments:
plm_probability: float = field(
default=1 / 6,
metadata={
"help": "Ratio of length of a span of masked tokens to surrounding context length for permutation language modeling."
"help": (
"Ratio of length of a span of masked tokens to surrounding context length for permutation language"
" modeling."
)
},
)
max_span_length: int = field(
@ -140,9 +148,11 @@ class DataTrainingArguments:
block_size: int = field(
default=-1,
metadata={
"help": "Optional input sequence length after tokenization."
"The training dataset will be truncated in block of this size for training."
"Default to the model max input length for single sentence inputs (take into account special tokens)."
"help": (
"Optional input sequence length after tokenization."
"The training dataset will be truncated in block of this size for training."
"Default to the model max input length for single sentence inputs (take into account special tokens)."
)
},
)
overwrite_cache: bool = field(
@ -206,7 +216,8 @@ def main():
and not training_args.overwrite_output_dir
):
raise ValueError(
f"Output directory ({training_args.output_dir}) already exists and is not empty. Use --overwrite_output_dir to overcome."
f"Output directory ({training_args.output_dir}) already exists and is not empty. Use"
" --overwrite_output_dir to overcome."
)
# Setup logging
@ -253,8 +264,8 @@ def main():
tokenizer = AutoTokenizer.from_pretrained(model_args.model_name_or_path, cache_dir=model_args.cache_dir)
else:
raise ValueError(
"You are instantiating a new tokenizer from scratch. This is not supported, but you can do it from another script, save it,"
"and load it from here, using --tokenizer_name"
"You are instantiating a new tokenizer from scratch. This is not supported, but you can do it from another"
" script, save it,and load it from here, using --tokenizer_name"
)
if model_args.model_name_or_path:

View File

@ -126,15 +126,15 @@ def main():
"--max_steps",
default=-1,
type=int,
help="If > 0: set total number of training \
steps to perform. Override num_train_epochs.",
help=(
"If > 0: set total number of training steps to perform. Override num_train_epochs."
),
)
parser.add_argument(
"--gradient_accumulation_steps",
type=int,
default=1,
help="Number of updates steps to accumulate before\
performing a backward/update pass.",
help="Number of updates steps to accumulate before performing a backward/update pass.",
)
parser.add_argument("--learning_rate", type=float, default=6.25e-5)
parser.add_argument("--warmup_steps", default=0, type=int, help="Linear warmup over warmup_steps.")

View File

@ -516,8 +516,10 @@ def main():
"--max_seq_length",
default=384,
type=int,
help="The maximum total input sequence length after tokenization. Sequences "
"longer than this will be truncated, and sequences shorter than this will be padded.",
help=(
"The maximum total input sequence length after tokenization. Sequences "
"longer than this will be truncated, and sequences shorter than this will be padded."
),
)
parser.add_argument("--do_train", action="store_true", help="Whether to run training.")
parser.add_argument("--do_eval", action="store_true", help="Whether to run eval on the dev set.")
@ -576,8 +578,10 @@ def main():
"--fp16_opt_level",
type=str,
default="O1",
help="For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']."
"See details at https://nvidia.github.io/apex/amp.html",
help=(
"For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']."
"See details at https://nvidia.github.io/apex/amp.html"
),
)
parser.add_argument("--server_ip", type=str, default="", help="Can be used for distant debugging.")
parser.add_argument("--server_port", type=str, default="", help="Can be used for distant debugging.")

View File

@ -90,31 +90,39 @@ class DataTrainingArguments:
max_source_length: Optional[int] = field(
default=1024,
metadata={
"help": "The maximum total input sequence length after tokenization. Sequences longer "
"than this will be truncated, sequences shorter will be padded."
"help": (
"The maximum total input sequence length after tokenization. Sequences longer "
"than this will be truncated, sequences shorter will be padded."
)
},
)
max_target_length: Optional[int] = field(
default=128,
metadata={
"help": "The maximum total sequence length for target text after tokenization. Sequences longer "
"than this will be truncated, sequences shorter will be padded."
"help": (
"The maximum total sequence length for target text after tokenization. Sequences longer "
"than this will be truncated, sequences shorter will be padded."
)
},
)
val_max_target_length: Optional[int] = field(
default=142,
metadata={
"help": "The maximum total sequence length for validation target text after tokenization. Sequences longer "
"than this will be truncated, sequences shorter will be padded. "
"This argument is also used to override the ``max_length`` param of ``model.generate``, which is used "
"during ``evaluate`` and ``predict``."
"help": (
"The maximum total sequence length for validation target text after tokenization. Sequences longer "
"than this will be truncated, sequences shorter will be padded. "
"This argument is also used to override the ``max_length`` param of ``model.generate``, which is used "
"during ``evaluate`` and ``predict``."
)
},
)
test_max_target_length: Optional[int] = field(
default=142,
metadata={
"help": "The maximum total sequence length for test target text after tokenization. Sequences longer "
"than this will be truncated, sequences shorter will be padded."
"help": (
"The maximum total sequence length for test target text after tokenization. Sequences longer "
"than this will be truncated, sequences shorter will be padded."
)
},
)
n_train: Optional[int] = field(default=-1, metadata={"help": "# training examples. -1 means use all."})

View File

@ -22,15 +22,30 @@ from utils import calculate_rouge
PRED = [
'Prosecutor: "No videos were used in the crash investigation" German papers say they saw a cell phone video of the final seconds on board Flight 9525. The Germanwings co-pilot says he had a "previous episode of severe depression" German airline confirms it knew of Andreas Lubitz\'s depression years before he took control.',
"The Palestinian Authority officially becomes the 123rd member of the International Criminal Court. The formal accession was marked with a ceremony at The Hague, in the Netherlands. The Palestinians signed the ICC's founding Rome Statute in January. Israel and the United States opposed the Palestinians' efforts to join the body.",
"Amnesty International releases its annual report on the death penalty. The report catalogs the use of state-sanctioned killing as a punitive measure across the globe. At least 607 people were executed around the world in 2014, compared to 778 in 2013. The U.S. remains one of the worst offenders for imposing capital punishment.",
'Prosecutor: "No videos were used in the crash investigation" German papers say they saw a cell phone video of the'
' final seconds on board Flight 9525. The Germanwings co-pilot says he had a "previous episode of severe'
" depression\" German airline confirms it knew of Andreas Lubitz's depression years before he took control.",
"The Palestinian Authority officially becomes the 123rd member of the International Criminal Court. The formal"
" accession was marked with a ceremony at The Hague, in the Netherlands. The Palestinians signed the ICC's"
" founding Rome Statute in January. Israel and the United States opposed the Palestinians' efforts to join the"
" body.",
"Amnesty International releases its annual report on the death penalty. The report catalogs the use of"
" state-sanctioned killing as a punitive measure across the globe. At least 607 people were executed around the"
" world in 2014, compared to 778 in 2013. The U.S. remains one of the worst offenders for imposing capital"
" punishment.",
]
TGT = [
'Marseille prosecutor says "so far no videos were used in the crash investigation" despite media reports . Journalists at Bild and Paris Match are "very confident" the video clip is real, an editor says . Andreas Lubitz had informed his Lufthansa training school of an episode of severe depression, airline says .',
"Membership gives the ICC jurisdiction over alleged crimes committed in Palestinian territories since last June . Israel and the United States opposed the move, which could open the door to war crimes investigations against Israelis .",
"Amnesty's annual death penalty report catalogs encouraging signs, but setbacks in numbers of those sentenced to death . Organization claims that governments around the world are using the threat of terrorism to advance executions . The number of executions worldwide has gone down by almost 22% compared with 2013, but death sentences up by 28% .",
'Marseille prosecutor says "so far no videos were used in the crash investigation" despite media reports .'
' Journalists at Bild and Paris Match are "very confident" the video clip is real, an editor says . Andreas Lubitz'
" had informed his Lufthansa training school of an episode of severe depression, airline says .",
"Membership gives the ICC jurisdiction over alleged crimes committed in Palestinian territories since last June ."
" Israel and the United States opposed the move, which could open the door to war crimes investigations against"
" Israelis .",
"Amnesty's annual death penalty report catalogs encouraging signs, but setbacks in numbers of those sentenced to"
" death . Organization claims that governments around the world are using the threat of terrorism to advance"
" executions . The number of executions worldwide has gone down by almost 22% compared with 2013, but death"
" sentences up by 28% .",
]
@ -65,7 +80,8 @@ def test_single_sent_scores_dont_depend_on_newline_sep():
]
tgt = [
"Margot Frank, died in 1945, a month earlier than previously thought.",
'Prosecutor: "No videos were used in the crash investigation" German papers say they saw a cell phone video of the final seconds on board Flight 9525.',
'Prosecutor: "No videos were used in the crash investigation" German papers say they saw a cell phone video of'
" the final seconds on board Flight 9525.",
]
assert calculate_rouge(pred, tgt, newline_sep=True) == calculate_rouge(pred, tgt, newline_sep=False)

View File

@ -121,7 +121,10 @@ def run_generate(verbose=True):
nargs="?",
type=str,
const=datetime_now(),
help="use in conjunction w/ --dump-args to print with the results whatever other info you'd like, e.g. lang=en-ru. If no value is passed, the current datetime string will be used.",
help=(
"use in conjunction w/ --dump-args to print with the results whatever other info you'd like, e.g."
" lang=en-ru. If no value is passed, the current datetime string will be used."
),
)
# Unspecified args like --num_beams=2 --decoder_start_token_id=4 are passed to model.generate
args, rest = parser.parse_known_args()

View File

@ -35,7 +35,7 @@ def parse_search_arg(search):
groups = search.split()
entries = {k: vs for k, vs in (g.split("=") for g in groups)}
entry_names = list(entries.keys())
sets = [list((f"--{k} {v}") for v in vs.split(":")) for k, vs in entries.items()]
sets = [list(f"--{k} {v}" for v in vs.split(":")) for k, vs in entries.items()]
matrix = [list(x) for x in itertools.product(*sets)]
return matrix, entry_names
@ -66,7 +66,10 @@ def run_search():
prog = sys.argv[0]
parser = argparse.ArgumentParser(
usage="\n\nImportant: this script accepts all arguments `run_eval.py` accepts and then a few extra, therefore refer to `run_eval.py -h` for the complete list."
usage=(
"\n\nImportant: this script accepts all arguments `run_eval.py` accepts and then a few extra, therefore"
" refer to `run_eval.py -h` for the complete list."
)
)
parser.add_argument(
"--search",
@ -83,7 +86,10 @@ def run_search():
nargs="?",
type=str,
const=datetime_now(),
help="add custom notes to be printed before the results table. If no value is passed, the current datetime string will be used.",
help=(
"add custom notes to be printed before the results table. If no value is passed, the current datetime"
" string will be used."
),
)
args, args_main = parser.parse_known_args()
# we share some of the args

View File

@ -57,9 +57,10 @@ class Seq2SeqTrainer(Trainer):
super().__init__(*args, **kwargs)
if config is None:
assert isinstance(
self.model, PreTrainedModel
), f"If no `config` is passed the model to be trained has to be of type `PreTrainedModel`, but is {self.model.__class__}"
assert isinstance(self.model, PreTrainedModel), (
"If no `config` is passed the model to be trained has to be of type `PreTrainedModel`, but is"
f" {self.model.__class__}"
)
self.config = self.model.config
else:
self.config = config
@ -68,13 +69,15 @@ class Seq2SeqTrainer(Trainer):
self.vocab_size = self.config.tgt_vocab_size if isinstance(self.config, FSMTConfig) else self.config.vocab_size
if self.args.label_smoothing != 0 or (self.data_args is not None and self.data_args.ignore_pad_token_for_loss):
assert (
self.config.pad_token_id is not None
), "Make sure that `config.pad_token_id` is correcly defined when ignoring `pad_token` for loss calculation or doing label smoothing."
assert self.config.pad_token_id is not None, (
"Make sure that `config.pad_token_id` is correcly defined when ignoring `pad_token` for loss"
" calculation or doing label smoothing."
)
if self.config.pad_token_id is None and self.config.eos_token_id is not None:
logger.warning(
f"The `config.pad_token_id` is `None`. Using `config.eos_token_id` = {self.config.eos_token_id} for padding.."
f"The `config.pad_token_id` is `None`. Using `config.eos_token_id` = {self.config.eos_token_id} for"
" padding.."
)
if self.args.label_smoothing == 0:
@ -248,7 +251,8 @@ class Seq2SeqTrainer(Trainer):
if pad_token_id is None:
raise ValueError(
f"Make sure that either `config.pad_token_id` or `config.eos_token_id` is defined if tensor has to be padded to `max_length`={max_length}"
"Make sure that either `config.pad_token_id` or `config.eos_token_id` is defined if tensor has to be"
f" padded to `max_length`={max_length}"
)
padded_tensor = pad_token_id * torch.ones(

View File

@ -39,9 +39,7 @@ def parse_args():
"""
parser = ArgumentParser(
description=(
"PyTorch TPU distributed training launch "
"helper utility that will spawn up "
"multiple distributed processes"
"PyTorch TPU distributed training launch helper utility that will spawn up multiple distributed processes"
)
)

View File

@ -168,8 +168,10 @@ class DataTrainingArguments:
max_seq_length: int = field(
default=128,
metadata={
"help": "The maximum total input sequence length after tokenization. Sequences longer "
"than this will be truncated, sequences shorter will be padded."
"help": (
"The maximum total input sequence length after tokenization. Sequences longer "
"than this will be truncated, sequences shorter will be padded."
)
},
)
overwrite_cache: bool = field(
@ -215,7 +217,8 @@ def main():
and not training_args.overwrite_output_dir
):
raise ValueError(
f"Output directory ({training_args.output_dir}) already exists and is not empty. Use --overwrite_output_dir to overcome."
f"Output directory ({training_args.output_dir}) already exists and is not empty. Use"
" --overwrite_output_dir to overcome."
)
# Setup logging

View File

@ -87,8 +87,10 @@ class DataTrainingArguments:
max_seq_length: int = field(
default=128,
metadata={
"help": "The maximum total input sequence length after tokenization. Sequences longer "
"than this will be truncated, sequences shorter will be padded."
"help": (
"The maximum total input sequence length after tokenization. Sequences longer "
"than this will be truncated, sequences shorter will be padded."
)
},
)
overwrite_cache: bool = field(
@ -116,7 +118,8 @@ def main():
and not training_args.overwrite_output_dir
):
raise ValueError(
f"Output directory ({training_args.output_dir}) already exists and is not empty. Use --overwrite_output_dir to overcome."
f"Output directory ({training_args.output_dir}) already exists and is not empty. Use"
" --overwrite_output_dir to overcome."
)
module = import_module("tasks")

View File

@ -88,8 +88,10 @@ class DataTrainingArguments:
max_seq_length: int = field(
default=128,
metadata={
"help": "The maximum total input sequence length after tokenization. Sequences longer "
"than this will be truncated, sequences shorter will be padded."
"help": (
"The maximum total input sequence length after tokenization. Sequences longer "
"than this will be truncated, sequences shorter will be padded."
)
},
)
overwrite_cache: bool = field(
@ -111,7 +113,8 @@ def main():
and not training_args.overwrite_output_dir
):
raise ValueError(
f"Output directory ({training_args.output_dir}) already exists and is not empty. Use --overwrite_output_dir to overcome."
f"Output directory ({training_args.output_dir}) already exists and is not empty. Use"
" --overwrite_output_dir to overcome."
)
module = import_module("tasks")

View File

@ -103,7 +103,7 @@ class TokenClassificationTask:
label_map = {label: i for i, label in enumerate(label_list)}
features = []
for (ex_index, example) in enumerate(examples):
for ex_index, example in enumerate(examples):
if ex_index % 10_000 == 0:
logger.info("Writing example %d of %d", ex_index, len(examples))

View File

@ -86,8 +86,9 @@ class DataTrainingArguments:
eval_split_name: str = field(
default="validation",
metadata={
"help": "The name of the training data set split to use (via the datasets library). Defaults to "
"'validation'"
"help": (
"The name of the training data set split to use (via the datasets library). Defaults to 'validation'"
)
},
)
audio_column_name: str = field(
@ -100,15 +101,19 @@ class DataTrainingArguments:
max_train_samples: Optional[int] = field(
default=None,
metadata={
"help": "For debugging purposes or quicker training, truncate the number of training examples to this "
"value if set."
"help": (
"For debugging purposes or quicker training, truncate the number of training examples to this "
"value if set."
)
},
)
max_eval_samples: Optional[int] = field(
default=None,
metadata={
"help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this "
"value if set."
"help": (
"For debugging purposes or quicker training, truncate the number of evaluation examples to this "
"value if set."
)
},
)
max_length_seconds: float = field(
@ -149,8 +154,10 @@ class ModelArguments:
use_auth_token: bool = field(
default=False,
metadata={
"help": "Will use the token generated when running `transformers-cli login` (necessary to use this script "
"with private models)."
"help": (
"Will use the token generated when running `transformers-cli login` (necessary to use this script "
"with private models)."
)
},
)
freeze_feature_extractor: Optional[bool] = field(

View File

@ -89,8 +89,10 @@ class ModelArguments:
use_auth_token: bool = field(
default=False,
metadata={
"help": "Will use the token generated when running `transformers-cli login` (necessary to use this script "
"with private models)."
"help": (
"Will use the token generated when running `transformers-cli login` (necessary to use this script "
"with private models)."
)
},
)
freeze_vision_model: bool = field(
@ -132,22 +134,28 @@ class DataTrainingArguments:
max_seq_length: Optional[int] = field(
default=128,
metadata={
"help": "The maximum total input sequence length after tokenization. Sequences longer "
"than this will be truncated, sequences shorter will be padded."
"help": (
"The maximum total input sequence length after tokenization. Sequences longer "
"than this will be truncated, sequences shorter will be padded."
)
},
)
max_train_samples: Optional[int] = field(
default=None,
metadata={
"help": "For debugging purposes or quicker training, truncate the number of training examples to this "
"value if set."
"help": (
"For debugging purposes or quicker training, truncate the number of training examples to this "
"value if set."
)
},
)
max_eval_samples: Optional[int] = field(
default=None,
metadata={
"help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this "
"value if set."
"help": (
"For debugging purposes or quicker training, truncate the number of evaluation examples to this "
"value if set."
)
},
)
overwrite_cache: bool = field(

View File

@ -93,15 +93,19 @@ class DataTrainingArguments:
max_train_samples: Optional[int] = field(
default=None,
metadata={
"help": "For debugging purposes or quicker training, truncate the number of training examples to this "
"value if set."
"help": (
"For debugging purposes or quicker training, truncate the number of training examples to this "
"value if set."
)
},
)
max_eval_samples: Optional[int] = field(
default=None,
metadata={
"help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this "
"value if set."
"help": (
"For debugging purposes or quicker training, truncate the number of evaluation examples to this "
"value if set."
)
},
)
@ -140,8 +144,10 @@ class ModelArguments:
use_auth_token: bool = field(
default=False,
metadata={
"help": "Will use the token generated when running `transformers-cli login` (necessary to use this script "
"with private models)."
"help": (
"Will use the token generated when running `transformers-cli login` (necessary to use this script "
"with private models)."
)
},
)

View File

@ -62,7 +62,10 @@ def parse_args():
"--dataset_name",
type=str,
default="cifar10",
help="The name of the Dataset (from the HuggingFace hub) to train on (could be your own, possibly private, dataset).",
help=(
"The name of the Dataset (from the HuggingFace hub) to train on (could be your own, possibly private,"
" dataset)."
),
)
parser.add_argument("--train_dir", type=str, default=None, help="A folder containing the training data.")
parser.add_argument("--validation_dir", type=str, default=None, help="A folder containing the validation data.")
@ -70,15 +73,19 @@ def parse_args():
"--max_train_samples",
type=int,
default=None,
help="For debugging purposes or quicker training, truncate the number of training examples to this "
"value if set.",
help=(
"For debugging purposes or quicker training, truncate the number of training examples to this "
"value if set."
),
)
parser.add_argument(
"--max_eval_samples",
type=int,
default=None,
help="For debugging purposes or quicker training, truncate the number of evaluation examples to this "
"value if set.",
help=(
"For debugging purposes or quicker training, truncate the number of evaluation examples to this "
"value if set."
),
)
parser.add_argument(
"--train_val_split",

View File

@ -74,15 +74,19 @@ class DataTrainingArguments:
max_train_samples: Optional[int] = field(
default=None,
metadata={
"help": "For debugging purposes or quicker training, truncate the number of training examples to this "
"value if set."
"help": (
"For debugging purposes or quicker training, truncate the number of training examples to this "
"value if set."
)
},
)
max_eval_samples: Optional[int] = field(
default=None,
metadata={
"help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this "
"value if set."
"help": (
"For debugging purposes or quicker training, truncate the number of evaluation examples to this "
"value if set."
)
},
)
@ -104,8 +108,9 @@ class ModelArguments:
model_name_or_path: str = field(
default=None,
metadata={
"help": "The model checkpoint for weights initialization."
"Don't set if you want to train a model from scratch."
"help": (
"The model checkpoint for weights initialization.Don't set if you want to train a model from scratch."
)
},
)
config_name: Optional[str] = field(
@ -114,8 +119,10 @@ class ModelArguments:
config_overrides: Optional[str] = field(
default=None,
metadata={
"help": "Override some existing default config settings when a model is trained from scratch. Example: "
"n_embd=10,resid_pdrop=0.2,scale_attn_weights=false,summary_type=cls_index"
"help": (
"Override some existing default config settings when a model is trained from scratch. Example: "
"n_embd=10,resid_pdrop=0.2,scale_attn_weights=false,summary_type=cls_index"
)
},
)
cache_dir: Optional[str] = field(
@ -129,8 +136,10 @@ class ModelArguments:
use_auth_token: bool = field(
default=False,
metadata={
"help": "Will use the token generated when running `transformers-cli login` (necessary to use this script "
"with private models)."
"help": (
"Will use the token generated when running `transformers-cli login` (necessary to use this script "
"with private models)."
)
},
)
mask_ratio: float = field(

View File

@ -87,15 +87,19 @@ class DataTrainingArguments:
max_train_samples: Optional[int] = field(
default=None,
metadata={
"help": "For debugging purposes or quicker training, truncate the number of training examples to this "
"value if set."
"help": (
"For debugging purposes or quicker training, truncate the number of training examples to this "
"value if set."
)
},
)
max_eval_samples: Optional[int] = field(
default=None,
metadata={
"help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this "
"value if set."
"help": (
"For debugging purposes or quicker training, truncate the number of evaluation examples to this "
"value if set."
)
},
)
@ -117,9 +121,11 @@ class ModelArguments:
model_name_or_path: str = field(
default=None,
metadata={
"help": "The model checkpoint for weights initialization. Can be a local path to a pytorch_model.bin or a "
"checkpoint identifier on the hub. "
"Don't set if you want to train a model from scratch."
"help": (
"The model checkpoint for weights initialization. Can be a local path to a pytorch_model.bin or a "
"checkpoint identifier on the hub. "
"Don't set if you want to train a model from scratch."
)
},
)
model_type: Optional[str] = field(
@ -132,8 +138,10 @@ class ModelArguments:
config_overrides: Optional[str] = field(
default=None,
metadata={
"help": "Override some existing default config settings when a model is trained from scratch. Example: "
"n_embd=10,resid_pdrop=0.2,scale_attn_weights=false,summary_type=cls_index"
"help": (
"Override some existing default config settings when a model is trained from scratch. Example: "
"n_embd=10,resid_pdrop=0.2,scale_attn_weights=false,summary_type=cls_index"
)
},
)
cache_dir: Optional[str] = field(
@ -148,20 +156,26 @@ class ModelArguments:
use_auth_token: bool = field(
default=False,
metadata={
"help": "Will use the token generated when running `transformers-cli login` (necessary to use this script "
"with private models)."
"help": (
"Will use the token generated when running `transformers-cli login` (necessary to use this script "
"with private models)."
)
},
)
image_size: Optional[int] = field(
default=None,
metadata={
"help": "The size (resolution) of each image. If not specified, will use `image_size` of the configuration."
"help": (
"The size (resolution) of each image. If not specified, will use `image_size` of the configuration."
)
},
)
patch_size: Optional[int] = field(
default=None,
metadata={
"help": "The size (resolution) of each patch. If not specified, will use `patch_size` of the configuration."
"help": (
"The size (resolution) of each patch. If not specified, will use `patch_size` of the configuration."
)
},
)
encoder_stride: Optional[int] = field(

View File

@ -73,8 +73,9 @@ class ModelArguments:
model_name_or_path: Optional[str] = field(
default=None,
metadata={
"help": "The model checkpoint for weights initialization."
"Don't set if you want to train a model from scratch."
"help": (
"The model checkpoint for weights initialization.Don't set if you want to train a model from scratch."
)
},
)
model_type: Optional[str] = field(
@ -84,8 +85,10 @@ class ModelArguments:
config_overrides: Optional[str] = field(
default=None,
metadata={
"help": "Override some existing default config settings when a model is trained from scratch. Example: "
"n_embd=10,resid_pdrop=0.2,scale_attn_weights=false,summary_type=cls_index"
"help": (
"Override some existing default config settings when a model is trained from scratch. Example: "
"n_embd=10,resid_pdrop=0.2,scale_attn_weights=false,summary_type=cls_index"
)
},
)
config_name: Optional[str] = field(
@ -109,8 +112,10 @@ class ModelArguments:
use_auth_token: bool = field(
default=False,
metadata={
"help": "Will use the token generated when running `transformers-cli login` (necessary to use this script "
"with private models)."
"help": (
"Will use the token generated when running `transformers-cli login` (necessary to use this script "
"with private models)."
)
},
)
@ -141,24 +146,30 @@ class DataTrainingArguments:
max_train_samples: Optional[int] = field(
default=None,
metadata={
"help": "For debugging purposes or quicker training, truncate the number of training examples to this "
"value if set."
"help": (
"For debugging purposes or quicker training, truncate the number of training examples to this "
"value if set."
)
},
)
max_eval_samples: Optional[int] = field(
default=None,
metadata={
"help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this "
"value if set."
"help": (
"For debugging purposes or quicker training, truncate the number of evaluation examples to this "
"value if set."
)
},
)
block_size: Optional[int] = field(
default=None,
metadata={
"help": "Optional input sequence length after tokenization. "
"The training dataset will be truncated in block of this size for training. "
"Default to the model max input length for single sentence inputs (take into account special tokens)."
"help": (
"Optional input sequence length after tokenization. "
"The training dataset will be truncated in block of this size for training. "
"Default to the model max input length for single sentence inputs (take into account special tokens)."
)
},
)
overwrite_cache: bool = field(
@ -390,7 +401,8 @@ def main():
# clm input could be much much longer than block_size
if "Token indices sequence length is longer than the" in cl.out:
tok_logger.warning(
"^^^^^^^^^^^^^^^^ Please ignore the warning above - this long input will be chunked into smaller bits before being passed to the model."
"^^^^^^^^^^^^^^^^ Please ignore the warning above - this long input will be chunked into smaller bits"
" before being passed to the model."
)
return output

View File

@ -168,7 +168,11 @@ def parse_args():
"--block_size",
type=int,
default=None,
help="Optional input sequence length after tokenization. The training dataset will be truncated in block of this size for training. Default to the model max input length for single sentence inputs (take into account special tokens).",
help=(
"Optional input sequence length after tokenization. The training dataset will be truncated in block of"
" this size for training. Default to the model max input length for single sentence inputs (take into"
" account special tokens)."
),
)
parser.add_argument(
"--preprocessing_num_workers",

View File

@ -70,8 +70,9 @@ class ModelArguments:
model_name_or_path: Optional[str] = field(
default=None,
metadata={
"help": "The model checkpoint for weights initialization."
"Don't set if you want to train a model from scratch."
"help": (
"The model checkpoint for weights initialization.Don't set if you want to train a model from scratch."
)
},
)
model_type: Optional[str] = field(
@ -81,8 +82,10 @@ class ModelArguments:
config_overrides: Optional[str] = field(
default=None,
metadata={
"help": "Override some existing default config settings when a model is trained from scratch. Example: "
"n_embd=10,resid_pdrop=0.2,scale_attn_weights=false,summary_type=cls_index"
"help": (
"Override some existing default config settings when a model is trained from scratch. Example: "
"n_embd=10,resid_pdrop=0.2,scale_attn_weights=false,summary_type=cls_index"
)
},
)
config_name: Optional[str] = field(
@ -106,8 +109,10 @@ class ModelArguments:
use_auth_token: bool = field(
default=False,
metadata={
"help": "Will use the token generated when running `transformers-cli login` (necessary to use this script "
"with private models)."
"help": (
"Will use the token generated when running `transformers-cli login` (necessary to use this script "
"with private models)."
)
},
)
@ -147,8 +152,10 @@ class DataTrainingArguments:
max_seq_length: Optional[int] = field(
default=None,
metadata={
"help": "The maximum total input sequence length after tokenization. Sequences longer "
"than this will be truncated."
"help": (
"The maximum total input sequence length after tokenization. Sequences longer "
"than this will be truncated."
)
},
)
preprocessing_num_workers: Optional[int] = field(
@ -165,22 +172,28 @@ class DataTrainingArguments:
pad_to_max_length: bool = field(
default=False,
metadata={
"help": "Whether to pad all samples to `max_seq_length`. "
"If False, will pad the samples dynamically when batching to the maximum length in the batch."
"help": (
"Whether to pad all samples to `max_seq_length`. "
"If False, will pad the samples dynamically when batching to the maximum length in the batch."
)
},
)
max_train_samples: Optional[int] = field(
default=None,
metadata={
"help": "For debugging purposes or quicker training, truncate the number of training examples to this "
"value if set."
"help": (
"For debugging purposes or quicker training, truncate the number of training examples to this "
"value if set."
)
},
)
max_eval_samples: Optional[int] = field(
default=None,
metadata={
"help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this "
"value if set."
"help": (
"For debugging purposes or quicker training, truncate the number of evaluation examples to this "
"value if set."
)
},
)

View File

@ -171,7 +171,9 @@ def parse_args():
"--max_seq_length",
type=int,
default=None,
help="The maximum total input sequence length after tokenization. Sequences longer than this will be truncated.",
help=(
"The maximum total input sequence length after tokenization. Sequences longer than this will be truncated."
),
)
parser.add_argument(
"--line_by_line",

View File

@ -63,8 +63,9 @@ class ModelArguments:
model_name_or_path: Optional[str] = field(
default=None,
metadata={
"help": "The model checkpoint for weights initialization."
"Don't set if you want to train a model from scratch."
"help": (
"The model checkpoint for weights initialization.Don't set if you want to train a model from scratch."
)
},
)
config_name: Optional[str] = field(
@ -73,8 +74,10 @@ class ModelArguments:
config_overrides: Optional[str] = field(
default=None,
metadata={
"help": "Override some existing default config settings when a model is trained from scratch. Example: "
"n_embd=10,resid_pdrop=0.2,scale_attn_weights=false,summary_type=cls_index"
"help": (
"Override some existing default config settings when a model is trained from scratch. Example: "
"n_embd=10,resid_pdrop=0.2,scale_attn_weights=false,summary_type=cls_index"
)
},
)
tokenizer_name: Optional[str] = field(
@ -95,8 +98,10 @@ class ModelArguments:
use_auth_token: bool = field(
default=False,
metadata={
"help": "Will use the token generated when running `transformers-cli login` (necessary to use this script "
"with private models)."
"help": (
"Will use the token generated when running `transformers-cli login` (necessary to use this script "
"with private models)."
)
},
)
@ -136,8 +141,10 @@ class DataTrainingArguments:
max_seq_length: int = field(
default=512,
metadata={
"help": "The maximum total input sequence length after tokenization. Sequences longer "
"than this will be truncated."
"help": (
"The maximum total input sequence length after tokenization. Sequences longer "
"than this will be truncated."
)
},
)
preprocessing_num_workers: Optional[int] = field(
@ -147,8 +154,10 @@ class DataTrainingArguments:
plm_probability: float = field(
default=1 / 6,
metadata={
"help": "Ratio of length of a span of masked tokens to surrounding context length for "
"permutation language modeling."
"help": (
"Ratio of length of a span of masked tokens to surrounding context length for "
"permutation language modeling."
)
},
)
max_span_length: int = field(
@ -161,22 +170,28 @@ class DataTrainingArguments:
pad_to_max_length: bool = field(
default=False,
metadata={
"help": "Whether to pad all samples to `max_seq_length`. "
"If False, will pad the samples dynamically when batching to the maximum length in the batch."
"help": (
"Whether to pad all samples to `max_seq_length`. "
"If False, will pad the samples dynamically when batching to the maximum length in the batch."
)
},
)
max_train_samples: Optional[int] = field(
default=None,
metadata={
"help": "For debugging purposes or quicker training, truncate the number of training examples to this "
"value if set."
"help": (
"For debugging purposes or quicker training, truncate the number of training examples to this "
"value if set."
)
},
)
max_eval_samples: Optional[int] = field(
default=None,
metadata={
"help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this "
"value if set."
"help": (
"For debugging purposes or quicker training, truncate the number of evaluation examples to this "
"value if set."
)
},
)

View File

@ -82,8 +82,10 @@ class ModelArguments:
use_auth_token: bool = field(
default=False,
metadata={
"help": "Will use the token generated when running `transformers-cli login` (necessary to use this script "
"with private models)."
"help": (
"Will use the token generated when running `transformers-cli login` (necessary to use this script "
"with private models)."
)
},
)
@ -109,30 +111,38 @@ class DataTrainingArguments:
max_seq_length: Optional[int] = field(
default=None,
metadata={
"help": "The maximum total input sequence length after tokenization. If passed, sequences longer "
"than this will be truncated, sequences shorter will be padded."
"help": (
"The maximum total input sequence length after tokenization. If passed, sequences longer "
"than this will be truncated, sequences shorter will be padded."
)
},
)
pad_to_max_length: bool = field(
default=False,
metadata={
"help": "Whether to pad all samples to the maximum sentence length. "
"If False, will pad the samples dynamically when batching to the maximum length in the batch. More "
"efficient on GPU but very bad for TPU."
"help": (
"Whether to pad all samples to the maximum sentence length. "
"If False, will pad the samples dynamically when batching to the maximum length in the batch. More "
"efficient on GPU but very bad for TPU."
)
},
)
max_train_samples: Optional[int] = field(
default=None,
metadata={
"help": "For debugging purposes or quicker training, truncate the number of training examples to this "
"value if set."
"help": (
"For debugging purposes or quicker training, truncate the number of training examples to this "
"value if set."
)
},
)
max_eval_samples: Optional[int] = field(
default=None,
metadata={
"help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this "
"value if set."
"help": (
"For debugging purposes or quicker training, truncate the number of evaluation examples to this "
"value if set."
)
},
)

View File

@ -81,8 +81,10 @@ class ModelArguments:
use_auth_token: bool = field(
default=False,
metadata={
"help": "Will use the token generated when running `transformers-cli login` (necessary to use this script "
"with private models)."
"help": (
"Will use the token generated when running `transformers-cli login` (necessary to use this script "
"with private models)."
)
},
)
@ -118,37 +120,46 @@ class DataTrainingArguments:
max_seq_length: int = field(
default=384,
metadata={
"help": "The maximum total input sequence length after tokenization. Sequences longer "
"than this will be truncated, sequences shorter will be padded."
"help": (
"The maximum total input sequence length after tokenization. Sequences longer "
"than this will be truncated, sequences shorter will be padded."
)
},
)
pad_to_max_length: bool = field(
default=True,
metadata={
"help": "Whether to pad all samples to `max_seq_length`. "
"If False, will pad the samples dynamically when batching to the maximum length in the batch (which can "
"be faster on GPU but will be slower on TPU)."
"help": (
"Whether to pad all samples to `max_seq_length`. If False, will pad the samples dynamically when"
" batching to the maximum length in the batch (which can be faster on GPU but will be slower on TPU)."
)
},
)
max_train_samples: Optional[int] = field(
default=None,
metadata={
"help": "For debugging purposes or quicker training, truncate the number of training examples to this "
"value if set."
"help": (
"For debugging purposes or quicker training, truncate the number of training examples to this "
"value if set."
)
},
)
max_eval_samples: Optional[int] = field(
default=None,
metadata={
"help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this "
"value if set."
"help": (
"For debugging purposes or quicker training, truncate the number of evaluation examples to this "
"value if set."
)
},
)
max_predict_samples: Optional[int] = field(
default=None,
metadata={
"help": "For debugging purposes or quicker training, truncate the number of prediction examples to this "
"value if set."
"help": (
"For debugging purposes or quicker training, truncate the number of prediction examples to this "
"value if set."
)
},
)
version_2_with_negative: bool = field(
@ -157,9 +168,11 @@ class DataTrainingArguments:
null_score_diff_threshold: float = field(
default=0.0,
metadata={
"help": "The threshold used to select the null answer: if the best answer has a score that is less than "
"the score of the null answer minus this threshold, the null answer is selected for this example. "
"Only useful when `version_2_with_negative=True`."
"help": (
"The threshold used to select the null answer: if the best answer has a score that is less than "
"the score of the null answer minus this threshold, the null answer is selected for this example. "
"Only useful when `version_2_with_negative=True`."
)
},
)
doc_stride: int = field(
@ -173,8 +186,10 @@ class DataTrainingArguments:
max_answer_length: int = field(
default=30,
metadata={
"help": "The maximum length of an answer that can be generated. This is needed because the start "
"and end predictions are not conditioned on one another."
"help": (
"The maximum length of an answer that can be generated. This is needed because the start "
"and end predictions are not conditioned on one another."
)
},
)
@ -319,9 +334,9 @@ def main():
# Tokenizer check: this script requires a fast tokenizer.
if not isinstance(tokenizer, PreTrainedTokenizerFast):
raise ValueError(
"This example script only works for models that have a fast tokenizer. Checkout the big table of models "
"at https://huggingface.co/transformers/index.html#supported-frameworks to find the model types that meet this "
"requirement"
"This example script only works for models that have a fast tokenizer. Checkout the big table of models at"
" https://huggingface.co/transformers/index.html#supported-frameworks to find the model types that meet"
" this requirement"
)
# Preprocessing the datasets.

View File

@ -80,8 +80,10 @@ class ModelArguments:
use_auth_token: bool = field(
default=False,
metadata={
"help": "Will use the token generated when running `transformers-cli login` (necessary to use this script "
"with private models)."
"help": (
"Will use the token generated when running `transformers-cli login` (necessary to use this script "
"with private models)."
)
},
)
@ -117,37 +119,46 @@ class DataTrainingArguments:
max_seq_length: int = field(
default=384,
metadata={
"help": "The maximum total input sequence length after tokenization. Sequences longer "
"than this will be truncated, sequences shorter will be padded."
"help": (
"The maximum total input sequence length after tokenization. Sequences longer "
"than this will be truncated, sequences shorter will be padded."
)
},
)
pad_to_max_length: bool = field(
default=True,
metadata={
"help": "Whether to pad all samples to `max_seq_length`. "
"If False, will pad the samples dynamically when batching to the maximum length in the batch (which can "
"be faster on GPU but will be slower on TPU)."
"help": (
"Whether to pad all samples to `max_seq_length`. If False, will pad the samples dynamically when"
" batching to the maximum length in the batch (which can be faster on GPU but will be slower on TPU)."
)
},
)
max_train_samples: Optional[int] = field(
default=None,
metadata={
"help": "For debugging purposes or quicker training, truncate the number of training examples to this "
"value if set."
"help": (
"For debugging purposes or quicker training, truncate the number of training examples to this "
"value if set."
)
},
)
max_eval_samples: Optional[int] = field(
default=None,
metadata={
"help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this "
"value if set."
"help": (
"For debugging purposes or quicker training, truncate the number of evaluation examples to this "
"value if set."
)
},
)
max_predict_samples: Optional[int] = field(
default=None,
metadata={
"help": "For debugging purposes or quicker training, truncate the number of prediction examples to this "
"value if set."
"help": (
"For debugging purposes or quicker training, truncate the number of prediction examples to this "
"value if set."
)
},
)
version_2_with_negative: bool = field(
@ -156,9 +167,11 @@ class DataTrainingArguments:
null_score_diff_threshold: float = field(
default=0.0,
metadata={
"help": "The threshold used to select the null answer: if the best answer has a score that is less than "
"the score of the null answer minus this threshold, the null answer is selected for this example. "
"Only useful when `version_2_with_negative=True`."
"help": (
"The threshold used to select the null answer: if the best answer has a score that is less than "
"the score of the null answer minus this threshold, the null answer is selected for this example. "
"Only useful when `version_2_with_negative=True`."
)
},
)
doc_stride: int = field(
@ -172,8 +185,10 @@ class DataTrainingArguments:
max_answer_length: int = field(
default=30,
metadata={
"help": "The maximum length of an answer that can be generated. This is needed because the start "
"and end predictions are not conditioned on one another."
"help": (
"The maximum length of an answer that can be generated. This is needed because the start "
"and end predictions are not conditioned on one another."
)
},
)

View File

@ -116,8 +116,10 @@ def parse_args():
"--max_seq_length",
type=int,
default=384,
help="The maximum total input sequence length after tokenization. Sequences longer than this will be truncated,"
" sequences shorter will be padded if `--pad_to_max_lengh` is passed.",
help=(
"The maximum total input sequence length after tokenization. Sequences longer than this will be truncated,"
" sequences shorter will be padded if `--pad_to_max_lengh` is passed."
),
)
parser.add_argument(
"--pad_to_max_length",
@ -190,9 +192,11 @@ def parse_args():
"--null_score_diff_threshold",
type=float,
default=0.0,
help="The threshold used to select the null answer: if the best answer has a score that is less than "
"the score of the null answer minus this threshold, the null answer is selected for this example. "
"Only useful when `version_2_with_negative=True`.",
help=(
"The threshold used to select the null answer: if the best answer has a score that is less than "
"the score of the null answer minus this threshold, the null answer is selected for this example. "
"Only useful when `version_2_with_negative=True`."
),
)
parser.add_argument(
"--version_2_with_negative",
@ -203,22 +207,28 @@ def parse_args():
"--max_answer_length",
type=int,
default=30,
help="The maximum length of an answer that can be generated. This is needed because the start "
"and end predictions are not conditioned on one another.",
help=(
"The maximum length of an answer that can be generated. This is needed because the start "
"and end predictions are not conditioned on one another."
),
)
parser.add_argument(
"--max_train_samples",
type=int,
default=None,
help="For debugging purposes or quicker training, truncate the number of training examples to this "
"value if set.",
help=(
"For debugging purposes or quicker training, truncate the number of training examples to this "
"value if set."
),
)
parser.add_argument(
"--max_eval_samples",
type=int,
default=None,
help="For debugging purposes or quicker training, truncate the number of evaluation examples to this "
"value if set.",
help=(
"For debugging purposes or quicker training, truncate the number of evaluation examples to this "
"value if set."
),
)
parser.add_argument(
"--overwrite_cache", type=bool, default=False, help="Overwrite the cached training and evaluation sets"

View File

@ -121,8 +121,10 @@ def parse_args():
"--max_seq_length",
type=int,
default=384,
help="The maximum total input sequence length after tokenization. Sequences longer than this will be truncated,"
" sequences shorter will be padded if `--pad_to_max_lengh` is passed.",
help=(
"The maximum total input sequence length after tokenization. Sequences longer than this will be truncated,"
" sequences shorter will be padded if `--pad_to_max_lengh` is passed."
),
)
parser.add_argument(
"--pad_to_max_length",
@ -212,9 +214,11 @@ def parse_args():
"--null_score_diff_threshold",
type=float,
default=0.0,
help="The threshold used to select the null answer: if the best answer has a score that is less than "
"the score of the null answer minus this threshold, the null answer is selected for this example. "
"Only useful when `version_2_with_negative=True`.",
help=(
"The threshold used to select the null answer: if the best answer has a score that is less than "
"the score of the null answer minus this threshold, the null answer is selected for this example. "
"Only useful when `version_2_with_negative=True`."
),
)
parser.add_argument(
"--version_2_with_negative",
@ -225,22 +229,28 @@ def parse_args():
"--max_answer_length",
type=int,
default=30,
help="The maximum length of an answer that can be generated. This is needed because the start "
"and end predictions are not conditioned on one another.",
help=(
"The maximum length of an answer that can be generated. This is needed because the start "
"and end predictions are not conditioned on one another."
),
)
parser.add_argument(
"--max_train_samples",
type=int,
default=None,
help="For debugging purposes or quicker training, truncate the number of training examples to this "
"value if set.",
help=(
"For debugging purposes or quicker training, truncate the number of training examples to this "
"value if set."
),
)
parser.add_argument(
"--max_eval_samples",
type=int,
default=None,
help="For debugging purposes or quicker training, truncate the number of evaluation examples to this "
"value if set.",
help=(
"For debugging purposes or quicker training, truncate the number of evaluation examples to this "
"value if set."
),
)
parser.add_argument(
"--overwrite_cache", type=bool, default=False, help="Overwrite the cached training and evaluation sets"

View File

@ -81,8 +81,10 @@ class ModelArguments:
use_auth_token: bool = field(
default=False,
metadata={
"help": "Will use the token generated when running `transformers-cli login` (necessary to use this script "
"with private models)."
"help": (
"Will use the token generated when running `transformers-cli login` (necessary to use this script "
"with private models)."
)
},
)
@ -130,53 +132,66 @@ class DataTrainingArguments:
max_seq_length: int = field(
default=384,
metadata={
"help": "The maximum total input sequence length after tokenization. Sequences longer "
"than this will be truncated, sequences shorter will be padded."
"help": (
"The maximum total input sequence length after tokenization. Sequences longer "
"than this will be truncated, sequences shorter will be padded."
)
},
)
max_answer_length: int = field(
default=30,
metadata={
"help": "The maximum length of an answer that can be generated. This is needed because the start "
"and end predictions are not conditioned on one another."
"help": (
"The maximum length of an answer that can be generated. This is needed because the start "
"and end predictions are not conditioned on one another."
)
},
)
val_max_answer_length: Optional[int] = field(
default=None,
metadata={
"help": "The maximum total sequence length for validation target text after tokenization. Sequences longer "
"than this will be truncated, sequences shorter will be padded. Will default to `max_answer_length`."
"This argument is also used to override the ``max_length`` param of ``model.generate``, which is used "
"during ``evaluate`` and ``predict``."
"help": (
"The maximum total sequence length for validation target text after tokenization. Sequences longer "
"than this will be truncated, sequences shorter will be padded. Will default to `max_answer_length`."
"This argument is also used to override the ``max_length`` param of ``model.generate``, which is used "
"during ``evaluate`` and ``predict``."
)
},
)
pad_to_max_length: bool = field(
default=True,
metadata={
"help": "Whether to pad all samples to `max_seq_length`. "
"If False, will pad the samples dynamically when batching to the maximum length in the batch (which can "
"be faster on GPU but will be slower on TPU)."
"help": (
"Whether to pad all samples to `max_seq_length`. If False, will pad the samples dynamically when"
" batching to the maximum length in the batch (which can be faster on GPU but will be slower on TPU)."
)
},
)
max_train_samples: Optional[int] = field(
default=None,
metadata={
"help": "For debugging purposes or quicker training, truncate the number of training examples to this "
"value if set."
"help": (
"For debugging purposes or quicker training, truncate the number of training examples to this "
"value if set."
)
},
)
max_eval_samples: Optional[int] = field(
default=None,
metadata={
"help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this "
"value if set."
"help": (
"For debugging purposes or quicker training, truncate the number of evaluation examples to this "
"value if set."
)
},
)
max_predict_samples: Optional[int] = field(
default=None,
metadata={
"help": "For debugging purposes or quicker training, truncate the number of prediction examples to this "
"value if set."
"help": (
"For debugging purposes or quicker training, truncate the number of prediction examples to this "
"value if set."
)
},
)
version_2_with_negative: bool = field(
@ -185,9 +200,11 @@ class DataTrainingArguments:
null_score_diff_threshold: float = field(
default=0.0,
metadata={
"help": "The threshold used to select the null answer: if the best answer has a score that is less than "
"the score of the null answer minus this threshold, the null answer is selected for this example. "
"Only useful when `version_2_with_negative=True`."
"help": (
"The threshold used to select the null answer: if the best answer has a score that is less than "
"the score of the null answer minus this threshold, the null answer is selected for this example. "
"Only useful when `version_2_with_negative=True`."
)
},
)
doc_stride: int = field(
@ -201,8 +218,10 @@ class DataTrainingArguments:
num_beams: Optional[int] = field(
default=None,
metadata={
"help": "Number of beams to use for evaluation. This argument will be passed to ``model.generate``, "
"which is used during ``evaluate`` and ``predict``."
"help": (
"Number of beams to use for evaluation. This argument will be passed to ``model.generate``, "
"which is used during ``evaluate`` and ``predict``."
)
},
)
ignore_pad_token_for_loss: bool = field(

View File

@ -194,15 +194,19 @@ class DataTrainingArguments:
max_train_samples: Optional[int] = field(
default=None,
metadata={
"help": "For debugging purposes or quicker training, truncate the number of training examples to this "
"value if set."
"help": (
"For debugging purposes or quicker training, truncate the number of training examples to this "
"value if set."
)
},
)
max_eval_samples: Optional[int] = field(
default=None,
metadata={
"help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this "
"value if set."
"help": (
"For debugging purposes or quicker training, truncate the number of evaluation examples to this "
"value if set."
)
},
)
reduce_labels: Optional[bool] = field(
@ -241,8 +245,10 @@ class ModelArguments:
use_auth_token: bool = field(
default=False,
metadata={
"help": "Will use the token generated when running `transformers-cli login` (necessary to use this script "
"with private models)."
"help": (
"Will use the token generated when running `transformers-cli login` (necessary to use this script "
"with private models)."
)
},
)

View File

@ -219,7 +219,10 @@ def parse_args():
"--pad_to_multiple_of",
type=int,
default=None,
help="If set will pad the sequence to a multiple of the provided value. This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >= 7.5 (Volta).",
help=(
"If set will pad the sequence to a multiple of the provided value. This is especially useful to enable the"
" use of Tensor Cores on NVIDIA hardware with compute capability >= 7.5 (Volta)."
),
)
parser.add_argument(
"--adam_beta1",
@ -440,7 +443,7 @@ def main():
# only normalized-inputs-training is supported
if not feature_extractor.do_normalize:
raise ValueError(
"Training is only supported for normalized inputs. " "Make sure ``feature_extractor.do_normalize == True``"
"Training is only supported for normalized inputs. Make sure ``feature_extractor.do_normalize == True``"
)
# set max & min audio length in number of samples
@ -496,7 +499,8 @@ def main():
# apply_spec_augment has to be True, mask_feature_prob has to be 0.0
if not config.do_stable_layer_norm or config.feat_extract_norm != "layer":
raise ValueError(
"PreTraining is only supported for ``config.do_stable_layer_norm=True`` and ``config.feat_extract_norm='layer'"
"PreTraining is only supported for ``config.do_stable_layer_norm=True`` and"
" ``config.feat_extract_norm='layer'"
)
# initialize random model
@ -615,7 +619,7 @@ def main():
lr_scheduler.step()
elif accelerator.is_local_main_process:
progress_bar.write(
"Gradients have overflown - skipping update step... " f"Updating gradient scale to {scale}..."
f"Gradients have overflown - skipping update step... Updating gradient scale to {scale}..."
)
# update gumbel temperature

View File

@ -101,9 +101,11 @@ class ModelArguments:
mask_time_prob: float = field(
default=0.05,
metadata={
"help": "Probability of each feature vector along the time axis to be chosen as the start of the vector"
"span to be masked. Approximately ``mask_time_prob * sequence_length // mask_time_length`` feature"
"vectors will be masked along the time axis."
"help": (
"Probability of each feature vector along the time axis to be chosen as the start of the vector"
"span to be masked. Approximately ``mask_time_prob * sequence_length // mask_time_length`` feature"
"vectors will be masked along the time axis."
)
},
)
mask_time_length: int = field(
@ -113,8 +115,11 @@ class ModelArguments:
mask_feature_prob: float = field(
default=0.0,
metadata={
"help": "Probability of each feature vector along the feature axis to be chosen as the start of the vector"
"span to be masked. Approximately ``mask_feature_prob * sequence_length // mask_feature_length`` feature bins will be masked along the time axis."
"help": (
"Probability of each feature vector along the feature axis to be chosen as the start of the vectorspan"
" to be masked. Approximately ``mask_feature_prob * sequence_length // mask_feature_length`` feature"
" bins will be masked along the time axis."
)
},
)
mask_feature_length: int = field(
@ -146,8 +151,10 @@ class DataTrainingArguments:
train_split_name: str = field(
default="train+validation",
metadata={
"help": "The name of the training data set split to use (via the datasets library). Defaults to "
"'train+validation'"
"help": (
"The name of the training data set split to use (via the datasets library). Defaults to "
"'train+validation'"
)
},
)
eval_split_name: str = field(
@ -174,15 +181,19 @@ class DataTrainingArguments:
max_train_samples: Optional[int] = field(
default=None,
metadata={
"help": "For debugging purposes or quicker training, truncate the number of training examples to this "
"value if set."
"help": (
"For debugging purposes or quicker training, truncate the number of training examples to this "
"value if set."
)
},
)
max_eval_samples: Optional[int] = field(
default=None,
metadata={
"help": "For debugging purposes or quicker training, truncate the number of validation examples to this "
"value if set."
"help": (
"For debugging purposes or quicker training, truncate the number of validation examples to this "
"value if set."
)
},
)
chars_to_ignore: Optional[List[str]] = list_field(
@ -196,7 +207,10 @@ class DataTrainingArguments:
max_duration_in_seconds: float = field(
default=20.0,
metadata={
"help": "Filter audio files that are longer than `max_duration_in_seconds` seconds to 'max_duration_in_seconds`"
"help": (
"Filter audio files that are longer than `max_duration_in_seconds` seconds to"
" 'max_duration_in_seconds`"
)
},
)
min_duration_in_seconds: float = field(
@ -205,17 +219,21 @@ class DataTrainingArguments:
preprocessing_only: bool = field(
default=False,
metadata={
"help": "Whether to only do data preprocessing and skip training. "
"This is especially useful when data preprocessing errors out in distributed training due to timeout. "
"In this case, one should run the preprocessing in a non-distributed setup with `preprocessing_only=True` "
"so that the cached datasets can consequently be loaded in distributed training"
"help": (
"Whether to only do data preprocessing and skip training. This is especially useful when data"
" preprocessing errors out in distributed training due to timeout. In this case, one should run the"
" preprocessing in a non-distributed setup with `preprocessing_only=True` so that the cached datasets"
" can consequently be loaded in distributed training"
)
},
)
use_auth_token: bool = field(
default=False,
metadata={
"help": "If :obj:`True`, will use the token generated when running"
":obj:`transformers-cli login` as HTTP bearer authorization for remote files."
"help": (
"If :obj:`True`, will use the token generated when running"
":obj:`transformers-cli login` as HTTP bearer authorization for remote files."
)
},
)
unk_token: str = field(
@ -233,10 +251,12 @@ class DataTrainingArguments:
phoneme_language: Optional[str] = field(
default=None,
metadata={
"help": "The target language that should be used be"
" passed to the tokenizer for tokenization. Note that"
" this is only relevant if the model classifies the"
" input audio to a sequence of phoneme sequences."
"help": (
"The target language that should be used be"
" passed to the tokenizer for tokenization. Note that"
" this is only relevant if the model classifies the"
" input audio to a sequence of phoneme sequences."
)
},
)
@ -405,9 +425,9 @@ def main():
if data_args.audio_column_name not in raw_datasets["train"].column_names:
raise ValueError(
f"--audio_column_name '{data_args.audio_column_name}' not found in dataset '{data_args.dataset_name}'. "
"Make sure to set `--audio_column_name` to the correct audio column - one of "
f"{', '.join(raw_datasets['train'].column_names)}."
f"--audio_column_name '{data_args.audio_column_name}' not found in dataset '{data_args.dataset_name}'."
" Make sure to set `--audio_column_name` to the correct audio column - one of"
f" {', '.join(raw_datasets['train'].column_names)}."
)
if data_args.text_column_name not in raw_datasets["train"].column_names:
@ -720,7 +740,10 @@ def main():
"finetuned_from": model_args.model_name_or_path,
"tasks": "speech-recognition",
"tags": ["automatic-speech-recognition", data_args.dataset_name],
"dataset_args": f"Config: {config_name}, Training split: {data_args.train_split_name}, Eval split: {data_args.eval_split_name}",
"dataset_args": (
f"Config: {config_name}, Training split: {data_args.train_split_name}, Eval split:"
f" {data_args.eval_split_name}"
),
"dataset": f"{data_args.dataset_name.upper()} - {config_name.upper()}",
}
if "common_voice" in data_args.dataset_name:

View File

@ -87,8 +87,10 @@ class ModelArguments:
use_auth_token: bool = field(
default=False,
metadata={
"help": "Will use the token generated when running `transformers-cli login` (necessary to use this script "
"with private models)."
"help": (
"Will use the token generated when running `transformers-cli login` (necessary to use this script "
"with private models)."
)
},
)
freeze_feature_encoder: bool = field(
@ -122,15 +124,19 @@ class DataTrainingArguments:
max_train_samples: Optional[int] = field(
default=None,
metadata={
"help": "For debugging purposes or quicker training, truncate the number of training examples to this "
"value if set."
"help": (
"For debugging purposes or quicker training, truncate the number of training examples to this "
"value if set."
)
},
)
max_eval_samples: Optional[int] = field(
default=None,
metadata={
"help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this "
"value if set."
"help": (
"For debugging purposes or quicker training, truncate the number of evaluation examples to this "
"value if set."
)
},
)
audio_column_name: str = field(
@ -144,7 +150,10 @@ class DataTrainingArguments:
max_duration_in_seconds: float = field(
default=20.0,
metadata={
"help": "Truncate audio files that are longer than `max_duration_in_seconds` seconds to 'max_duration_in_seconds`"
"help": (
"Truncate audio files that are longer than `max_duration_in_seconds` seconds to"
" 'max_duration_in_seconds`"
)
},
)
min_duration_in_seconds: float = field(
@ -153,10 +162,12 @@ class DataTrainingArguments:
preprocessing_only: bool = field(
default=False,
metadata={
"help": "Whether to only do data preprocessing and skip training. "
"This is especially useful when data preprocessing errors out in distributed training due to timeout. "
"In this case, one should run the preprocessing in a non-distributed setup with `preprocessing_only=True` "
"so that the cached datasets can consequently be loaded in distributed training"
"help": (
"Whether to only do data preprocessing and skip training. This is especially useful when data"
" preprocessing errors out in distributed training due to timeout. In this case, one should run the"
" preprocessing in a non-distributed setup with `preprocessing_only=True` so that the cached datasets"
" can consequently be loaded in distributed training"
)
},
)
train_split_name: str = field(

View File

@ -101,15 +101,19 @@ class ModelArguments:
use_auth_token: bool = field(
default=False,
metadata={
"help": "Will use the token generated when running `transformers-cli login` (necessary to use this script "
"with private models)."
"help": (
"Will use the token generated when running `transformers-cli login` (necessary to use this script "
"with private models)."
)
},
)
resize_position_embeddings: Optional[bool] = field(
default=None,
metadata={
"help": "Whether to automatically resize the position embeddings if `max_source_length` exceeds "
"the model's position embeddings."
"help": (
"Whether to automatically resize the position embeddings if `max_source_length` exceeds "
"the model's position embeddings."
)
},
)
@ -142,14 +146,15 @@ class DataTrainingArguments:
validation_file: Optional[str] = field(
default=None,
metadata={
"help": "An optional input evaluation data file to evaluate the metrics (rouge) on "
"(a jsonlines or csv file)."
"help": (
"An optional input evaluation data file to evaluate the metrics (rouge) on (a jsonlines or csv file)."
)
},
)
test_file: Optional[str] = field(
default=None,
metadata={
"help": "An optional input test data file to evaluate the metrics (rouge) on " "(a jsonlines or csv file)."
"help": "An optional input test data file to evaluate the metrics (rouge) on (a jsonlines or csv file)."
},
)
overwrite_cache: bool = field(
@ -162,60 +167,76 @@ class DataTrainingArguments:
max_source_length: Optional[int] = field(
default=1024,
metadata={
"help": "The maximum total input sequence length after tokenization. Sequences longer "
"than this will be truncated, sequences shorter will be padded."
"help": (
"The maximum total input sequence length after tokenization. Sequences longer "
"than this will be truncated, sequences shorter will be padded."
)
},
)
max_target_length: Optional[int] = field(
default=128,
metadata={
"help": "The maximum total sequence length for target text after tokenization. Sequences longer "
"than this will be truncated, sequences shorter will be padded."
"help": (
"The maximum total sequence length for target text after tokenization. Sequences longer "
"than this will be truncated, sequences shorter will be padded."
)
},
)
val_max_target_length: Optional[int] = field(
default=None,
metadata={
"help": "The maximum total sequence length for validation target text after tokenization. Sequences longer "
"than this will be truncated, sequences shorter will be padded. Will default to `max_target_length`."
"This argument is also used to override the ``max_length`` param of ``model.generate``, which is used "
"during ``evaluate`` and ``predict``."
"help": (
"The maximum total sequence length for validation target text after tokenization. Sequences longer "
"than this will be truncated, sequences shorter will be padded. Will default to `max_target_length`."
"This argument is also used to override the ``max_length`` param of ``model.generate``, which is used "
"during ``evaluate`` and ``predict``."
)
},
)
pad_to_max_length: bool = field(
default=False,
metadata={
"help": "Whether to pad all samples to model maximum sentence length. "
"If False, will pad the samples dynamically when batching to the maximum length in the batch. More "
"efficient on GPU but very bad for TPU."
"help": (
"Whether to pad all samples to model maximum sentence length. "
"If False, will pad the samples dynamically when batching to the maximum length in the batch. More "
"efficient on GPU but very bad for TPU."
)
},
)
max_train_samples: Optional[int] = field(
default=None,
metadata={
"help": "For debugging purposes or quicker training, truncate the number of training examples to this "
"value if set."
"help": (
"For debugging purposes or quicker training, truncate the number of training examples to this "
"value if set."
)
},
)
max_eval_samples: Optional[int] = field(
default=None,
metadata={
"help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this "
"value if set."
"help": (
"For debugging purposes or quicker training, truncate the number of evaluation examples to this "
"value if set."
)
},
)
max_predict_samples: Optional[int] = field(
default=None,
metadata={
"help": "For debugging purposes or quicker training, truncate the number of prediction examples to this "
"value if set."
"help": (
"For debugging purposes or quicker training, truncate the number of prediction examples to this "
"value if set."
)
},
)
num_beams: Optional[int] = field(
default=None,
metadata={
"help": "Number of beams to use for evaluation. This argument will be passed to ``model.generate``, "
"which is used during ``evaluate`` and ``predict``."
"help": (
"Number of beams to use for evaluation. This argument will be passed to ``model.generate``, "
"which is used during ``evaluate`` and ``predict``."
)
},
)
ignore_pad_token_for_loss: bool = field(
@ -231,9 +252,11 @@ class DataTrainingArguments:
forced_bos_token: Optional[str] = field(
default=None,
metadata={
"help": "The token to force as the first generated token after the decoder_start_token_id."
"Useful for multilingual models like mBART where the first generated token"
"needs to be the target language token (Usually it is the target language token)"
"help": (
"The token to force as the first generated token after the decoder_start_token_id."
"Useful for multilingual models like mBART where the first generated token"
"needs to be the target language token (Usually it is the target language token)"
)
},
)
@ -410,17 +433,18 @@ def main():
):
if model_args.resize_position_embeddings is None:
logger.warning(
f"Increasing the model's number of position embedding vectors from {model.config.max_position_embeddings} "
f"to {data_args.max_source_length}."
"Increasing the model's number of position embedding vectors from"
f" {model.config.max_position_embeddings} to {data_args.max_source_length}."
)
model.resize_position_embeddings(data_args.max_source_length)
elif model_args.resize_position_embeddings:
model.resize_position_embeddings(data_args.max_source_length)
else:
raise ValueError(
f"`--max_source_length` is set to {data_args.max_source_length}, but the model only has {model.config.max_position_embeddings}"
f" position encodings. Consider either reducing `--max_source_length` to {model.config.max_position_embeddings} or to automatically "
"resize the model's position encodings by passing `--resize_position_embeddings`."
f"`--max_source_length` is set to {data_args.max_source_length}, but the model only has"
f" {model.config.max_position_embeddings} position encodings. Consider either reducing"
f" `--max_source_length` to {model.config.max_position_embeddings} or to automatically resize the"
" model's position encodings by passing `--resize_position_embeddings`."
)
prefix = data_args.source_prefix if data_args.source_prefix is not None else ""

View File

@ -111,20 +111,22 @@ def parse_args():
"--ignore_pad_token_for_loss",
type=bool,
default=True,
help="Whether to ignore the tokens corresponding to " "padded labels in the loss computation or not.",
help="Whether to ignore the tokens corresponding to padded labels in the loss computation or not.",
)
parser.add_argument(
"--max_source_length",
type=int,
default=1024,
help="The maximum total input sequence length after "
"tokenization.Sequences longer than this will be truncated, sequences shorter will be padded.",
help=(
"The maximum total input sequence length after "
"tokenization.Sequences longer than this will be truncated, sequences shorter will be padded."
),
)
parser.add_argument(
"--source_prefix",
type=str,
default=None,
help="A prefix to add before every source text " "(useful for T5 models).",
help="A prefix to add before every source text (useful for T5 models).",
)
parser.add_argument(
"--preprocessing_num_workers",
@ -139,18 +141,22 @@ def parse_args():
"--max_target_length",
type=int,
default=128,
help="The maximum total sequence length for target text after "
"tokenization. Sequences longer than this will be truncated, sequences shorter will be padded."
"during ``evaluate`` and ``predict``.",
help=(
"The maximum total sequence length for target text after "
"tokenization. Sequences longer than this will be truncated, sequences shorter will be padded."
"during ``evaluate`` and ``predict``."
),
)
parser.add_argument(
"--val_max_target_length",
type=int,
default=None,
help="The maximum total sequence length for validation "
"target text after tokenization.Sequences longer than this will be truncated, sequences shorter will be "
"padded. Will default to `max_target_length`.This argument is also used to override the ``max_length`` "
"param of ``model.generate``, which is used during ``evaluate`` and ``predict``.",
help=(
"The maximum total sequence length for validation "
"target text after tokenization.Sequences longer than this will be truncated, sequences shorter will be "
"padded. Will default to `max_target_length`.This argument is also used to override the ``max_length`` "
"param of ``model.generate``, which is used during ``evaluate`` and ``predict``."
),
)
parser.add_argument(
"--max_length",
@ -165,8 +171,10 @@ def parse_args():
"--num_beams",
type=int,
default=None,
help="Number of beams to use for evaluation. This argument will be "
"passed to ``model.generate``, which is used during ``evaluate`` and ``predict``.",
help=(
"Number of beams to use for evaluation. This argument will be "
"passed to ``model.generate``, which is used during ``evaluate`` and ``predict``."
),
)
parser.add_argument(
"--pad_to_max_length",

View File

@ -89,8 +89,10 @@ class DataTrainingArguments:
max_seq_length: int = field(
default=128,
metadata={
"help": "The maximum total input sequence length after tokenization. Sequences longer "
"than this will be truncated, sequences shorter will be padded."
"help": (
"The maximum total input sequence length after tokenization. Sequences longer "
"than this will be truncated, sequences shorter will be padded."
)
},
)
overwrite_cache: bool = field(
@ -99,29 +101,37 @@ class DataTrainingArguments:
pad_to_max_length: bool = field(
default=True,
metadata={
"help": "Whether to pad all samples to `max_seq_length`. "
"If False, will pad the samples dynamically when batching to the maximum length in the batch."
"help": (
"Whether to pad all samples to `max_seq_length`. "
"If False, will pad the samples dynamically when batching to the maximum length in the batch."
)
},
)
max_train_samples: Optional[int] = field(
default=None,
metadata={
"help": "For debugging purposes or quicker training, truncate the number of training examples to this "
"value if set."
"help": (
"For debugging purposes or quicker training, truncate the number of training examples to this "
"value if set."
)
},
)
max_eval_samples: Optional[int] = field(
default=None,
metadata={
"help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this "
"value if set."
"help": (
"For debugging purposes or quicker training, truncate the number of evaluation examples to this "
"value if set."
)
},
)
max_predict_samples: Optional[int] = field(
default=None,
metadata={
"help": "For debugging purposes or quicker training, truncate the number of prediction examples to this "
"value if set."
"help": (
"For debugging purposes or quicker training, truncate the number of prediction examples to this "
"value if set."
)
},
)
train_file: Optional[str] = field(
@ -180,8 +190,10 @@ class ModelArguments:
use_auth_token: bool = field(
default=False,
metadata={
"help": "Will use the token generated when running `transformers-cli login` (necessary to use this script "
"with private models)."
"help": (
"Will use the token generated when running `transformers-cli login` (necessary to use this script "
"with private models)."
)
},
)

View File

@ -67,8 +67,10 @@ class DataTrainingArguments:
max_seq_length: Optional[int] = field(
default=128,
metadata={
"help": "The maximum total input sequence length after tokenization. Sequences longer "
"than this will be truncated, sequences shorter will be padded."
"help": (
"The maximum total input sequence length after tokenization. Sequences longer "
"than this will be truncated, sequences shorter will be padded."
)
},
)
overwrite_cache: bool = field(
@ -77,29 +79,37 @@ class DataTrainingArguments:
pad_to_max_length: bool = field(
default=True,
metadata={
"help": "Whether to pad all samples to `max_seq_length`. "
"If False, will pad the samples dynamically when batching to the maximum length in the batch."
"help": (
"Whether to pad all samples to `max_seq_length`. "
"If False, will pad the samples dynamically when batching to the maximum length in the batch."
)
},
)
max_train_samples: Optional[int] = field(
default=None,
metadata={
"help": "For debugging purposes or quicker training, truncate the number of training examples to this "
"value if set."
"help": (
"For debugging purposes or quicker training, truncate the number of training examples to this "
"value if set."
)
},
)
max_eval_samples: Optional[int] = field(
default=None,
metadata={
"help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this "
"value if set."
"help": (
"For debugging purposes or quicker training, truncate the number of evaluation examples to this "
"value if set."
)
},
)
max_predict_samples: Optional[int] = field(
default=None,
metadata={
"help": "For debugging purposes or quicker training, truncate the number of prediction examples to this "
"value if set."
"help": (
"For debugging purposes or quicker training, truncate the number of prediction examples to this "
"value if set."
)
},
)
server_ip: Optional[str] = field(default=None, metadata={"help": "For distant debugging."})
@ -146,8 +156,10 @@ class ModelArguments:
use_auth_token: bool = field(
default=False,
metadata={
"help": "Will use the token generated when running `transformers-cli login` (necessary to use this script "
"with private models)."
"help": (
"Will use the token generated when running `transformers-cli login` (necessary to use this script "
"with private models)."
)
},
)

View File

@ -81,8 +81,10 @@ class ModelArguments:
use_auth_token: bool = field(
default=False,
metadata={
"help": "Will use the token generated when running `transformers-cli login` (necessary to use this script "
"with private models)."
"help": (
"Will use the token generated when running `transformers-cli login` (necessary to use this script "
"with private models)."
)
},
)
@ -127,44 +129,56 @@ class DataTrainingArguments:
max_seq_length: int = field(
default=None,
metadata={
"help": "The maximum total input sequence length after tokenization. If set, sequences longer "
"than this will be truncated, sequences shorter will be padded."
"help": (
"The maximum total input sequence length after tokenization. If set, sequences longer "
"than this will be truncated, sequences shorter will be padded."
)
},
)
pad_to_max_length: bool = field(
default=False,
metadata={
"help": "Whether to pad all samples to model maximum sentence length. "
"If False, will pad the samples dynamically when batching to the maximum length in the batch. More "
"efficient on GPU but very bad for TPU."
"help": (
"Whether to pad all samples to model maximum sentence length. "
"If False, will pad the samples dynamically when batching to the maximum length in the batch. More "
"efficient on GPU but very bad for TPU."
)
},
)
max_train_samples: Optional[int] = field(
default=None,
metadata={
"help": "For debugging purposes or quicker training, truncate the number of training examples to this "
"value if set."
"help": (
"For debugging purposes or quicker training, truncate the number of training examples to this "
"value if set."
)
},
)
max_eval_samples: Optional[int] = field(
default=None,
metadata={
"help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this "
"value if set."
"help": (
"For debugging purposes or quicker training, truncate the number of evaluation examples to this "
"value if set."
)
},
)
max_predict_samples: Optional[int] = field(
default=None,
metadata={
"help": "For debugging purposes or quicker training, truncate the number of prediction examples to this "
"value if set."
"help": (
"For debugging purposes or quicker training, truncate the number of prediction examples to this "
"value if set."
)
},
)
label_all_tokens: bool = field(
default=False,
metadata={
"help": "Whether to put the label for one word on all tokens of generated by that word or just on the "
"one (in which case the other tokens will have a padding index)."
"help": (
"Whether to put the label for one word on all tokens of generated by that word or just on the "
"one (in which case the other tokens will have a padding index)."
)
},
)
return_entity_level_metrics: bool = field(
@ -355,9 +369,9 @@ def main():
# Tokenizer check: this script requires a fast tokenizer.
if not isinstance(tokenizer, PreTrainedTokenizerFast):
raise ValueError(
"This example script only works for models that have a fast tokenizer. Checkout the big table of models "
"at https://huggingface.co/transformers/index.html#supported-frameworks to find the model types that meet this "
"requirement"
"This example script only works for models that have a fast tokenizer. Checkout the big table of models at"
" https://huggingface.co/transformers/index.html#supported-frameworks to find the model types that meet"
" this requirement"
)
# Model has labels -> use them.
@ -373,8 +387,8 @@ def main():
else:
logger.warning(
"Your model seems to have been trained with labels, but they don't match the dataset: ",
f"model labels: {list(sorted(model.config.label2id.keys()))}, dataset labels: {list(sorted(label_list))}."
"\nIgnoring the model labels as a result.",
f"model labels: {list(sorted(model.config.label2id.keys()))}, dataset labels:"
f" {list(sorted(label_list))}.\nIgnoring the model labels as a result.",
)
# Set the correspondences label/ID inside the model config

View File

@ -403,8 +403,8 @@ def main():
else:
logger.warning(
"Your model seems to have been trained with labels, but they don't match the dataset: ",
f"model labels: {list(sorted(model.config.label2id.keys()))}, dataset labels: {list(sorted(label_list))}."
"\nIgnoring the model labels as a result.",
f"model labels: {list(sorted(model.config.label2id.keys()))}, dataset labels:"
f" {list(sorted(label_list))}.\nIgnoring the model labels as a result.",
)
# Set the correspondences label/ID inside the model config

View File

@ -91,8 +91,10 @@ class ModelArguments:
use_auth_token: bool = field(
default=False,
metadata={
"help": "Will use the token generated when running `transformers-cli login` (necessary to use this script "
"with private models)."
"help": (
"Will use the token generated when running `transformers-cli login` (necessary to use this script "
"with private models)."
)
},
)
@ -116,15 +118,12 @@ class DataTrainingArguments:
validation_file: Optional[str] = field(
default=None,
metadata={
"help": "An optional input evaluation data file to evaluate the metrics (sacreblue) on "
"a jsonlines file."
"help": "An optional input evaluation data file to evaluate the metrics (sacreblue) on a jsonlines file."
},
)
test_file: Optional[str] = field(
default=None,
metadata={
"help": "An optional input test data file to evaluate the metrics (sacreblue) on " "a jsonlines file."
},
metadata={"help": "An optional input test data file to evaluate the metrics (sacreblue) on a jsonlines file."},
)
overwrite_cache: bool = field(
default=False, metadata={"help": "Overwrite the cached training and evaluation sets"}
@ -136,60 +135,76 @@ class DataTrainingArguments:
max_source_length: Optional[int] = field(
default=1024,
metadata={
"help": "The maximum total input sequence length after tokenization. Sequences longer "
"than this will be truncated, sequences shorter will be padded."
"help": (
"The maximum total input sequence length after tokenization. Sequences longer "
"than this will be truncated, sequences shorter will be padded."
)
},
)
max_target_length: Optional[int] = field(
default=128,
metadata={
"help": "The maximum total sequence length for target text after tokenization. Sequences longer "
"than this will be truncated, sequences shorter will be padded."
"help": (
"The maximum total sequence length for target text after tokenization. Sequences longer "
"than this will be truncated, sequences shorter will be padded."
)
},
)
val_max_target_length: Optional[int] = field(
default=None,
metadata={
"help": "The maximum total sequence length for validation target text after tokenization. Sequences longer "
"than this will be truncated, sequences shorter will be padded. Will default to `max_target_length`."
"This argument is also used to override the ``max_length`` param of ``model.generate``, which is used "
"during ``evaluate`` and ``predict``."
"help": (
"The maximum total sequence length for validation target text after tokenization. Sequences longer "
"than this will be truncated, sequences shorter will be padded. Will default to `max_target_length`."
"This argument is also used to override the ``max_length`` param of ``model.generate``, which is used "
"during ``evaluate`` and ``predict``."
)
},
)
pad_to_max_length: bool = field(
default=False,
metadata={
"help": "Whether to pad all samples to model maximum sentence length. "
"If False, will pad the samples dynamically when batching to the maximum length in the batch. More "
"efficient on GPU but very bad for TPU."
"help": (
"Whether to pad all samples to model maximum sentence length. "
"If False, will pad the samples dynamically when batching to the maximum length in the batch. More "
"efficient on GPU but very bad for TPU."
)
},
)
max_train_samples: Optional[int] = field(
default=None,
metadata={
"help": "For debugging purposes or quicker training, truncate the number of training examples to this "
"value if set."
"help": (
"For debugging purposes or quicker training, truncate the number of training examples to this "
"value if set."
)
},
)
max_eval_samples: Optional[int] = field(
default=None,
metadata={
"help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this "
"value if set."
"help": (
"For debugging purposes or quicker training, truncate the number of evaluation examples to this "
"value if set."
)
},
)
max_predict_samples: Optional[int] = field(
default=None,
metadata={
"help": "For debugging purposes or quicker training, truncate the number of prediction examples to this "
"value if set."
"help": (
"For debugging purposes or quicker training, truncate the number of prediction examples to this "
"value if set."
)
},
)
num_beams: Optional[int] = field(
default=None,
metadata={
"help": "Number of beams to use for evaluation. This argument will be passed to ``model.generate``, "
"which is used during ``evaluate`` and ``predict``."
"help": (
"Number of beams to use for evaluation. This argument will be passed to ``model.generate``, "
"which is used during ``evaluate`` and ``predict``."
)
},
)
ignore_pad_token_for_loss: bool = field(
@ -204,9 +219,11 @@ class DataTrainingArguments:
forced_bos_token: Optional[str] = field(
default=None,
metadata={
"help": "The token to force as the first generated token after the :obj:`decoder_start_token_id`."
"Useful for multilingual models like :doc:`mBART <../model_doc/mbart>` where the first generated token "
"needs to be the target language token.(Usually it is the target language token)"
"help": (
"The token to force as the first generated token after the :obj:`decoder_start_token_id`.Useful for"
" multilingual models like :doc:`mBART <../model_doc/mbart>` where the first generated token needs to"
" be the target language token.(Usually it is the target language token)"
)
},
)

View File

@ -95,41 +95,51 @@ def parse_args():
"--num_beams",
type=int,
default=None,
help="Number of beams to use for evaluation. This argument will be "
"passed to ``model.generate``, which is used during ``evaluate`` and ``predict``.",
help=(
"Number of beams to use for evaluation. This argument will be "
"passed to ``model.generate``, which is used during ``evaluate`` and ``predict``."
),
)
parser.add_argument(
"--max_source_length",
type=int,
default=1024,
help="The maximum total input sequence length after "
"tokenization.Sequences longer than this will be truncated, sequences shorter will be padded.",
help=(
"The maximum total input sequence length after "
"tokenization.Sequences longer than this will be truncated, sequences shorter will be padded."
),
)
parser.add_argument(
"--max_target_length",
type=int,
default=128,
help="The maximum total sequence length for target text after "
"tokenization. Sequences longer than this will be truncated, sequences shorter will be padded."
"during ``evaluate`` and ``predict``.",
help=(
"The maximum total sequence length for target text after "
"tokenization. Sequences longer than this will be truncated, sequences shorter will be padded."
"during ``evaluate`` and ``predict``."
),
)
parser.add_argument(
"--val_max_target_length",
type=int,
default=None,
help="The maximum total sequence length for validation "
"target text after tokenization.Sequences longer than this will be truncated, sequences shorter will be "
"padded. Will default to `max_target_length`.This argument is also used to override the ``max_length`` "
"param of ``model.generate``, which is used during ``evaluate`` and ``predict``.",
help=(
"The maximum total sequence length for validation "
"target text after tokenization.Sequences longer than this will be truncated, sequences shorter will be "
"padded. Will default to `max_target_length`.This argument is also used to override the ``max_length`` "
"param of ``model.generate``, which is used during ``evaluate`` and ``predict``."
),
)
parser.add_argument(
"--pad_to_max_length",
type=bool,
default=False,
help="Whether to pad all samples to model maximum sentence "
"length. If False, will pad the samples dynamically when batching to the maximum length in the batch. More"
"efficient on GPU but very bad for TPU.",
help=(
"Whether to pad all samples to model maximum sentence "
"length. If False, will pad the samples dynamically when batching to the maximum length in the batch. More"
"efficient on GPU but very bad for TPU."
),
)
parser.add_argument(
"--validation_file", type=str, default=None, help="A csv or a json file containing the validation data."
@ -138,7 +148,7 @@ def parse_args():
"--ignore_pad_token_for_loss",
type=bool,
default=True,
help="Whether to ignore the tokens corresponding to " "padded labels in the loss computation or not.",
help="Whether to ignore the tokens corresponding to padded labels in the loss computation or not.",
)
parser.add_argument("--source_lang", type=str, default=None, help="Source language id for translation.")
parser.add_argument("--target_lang", type=str, default=None, help="Target language id for translation.")
@ -146,7 +156,7 @@ def parse_args():
"--source_prefix",
type=str,
default=None,
help="A prefix to add before every source text " "(useful for T5 models).",
help="A prefix to add before every source text (useful for T5 models).",
)
parser.add_argument(
"--preprocessing_num_workers",

View File

@ -39,9 +39,7 @@ def parse_args():
"""
parser = ArgumentParser(
description=(
"PyTorch TPU distributed training launch "
"helper utility that will spawn up "
"multiple distributed processes"
"PyTorch TPU distributed training launch helper utility that will spawn up multiple distributed processes"
)
)

View File

@ -77,8 +77,10 @@ class DataTrainingArguments:
max_seq_length: int = field(
default=128,
metadata={
"help": "The maximum total input sequence length after tokenization. Sequences longer "
"than this will be truncated, sequences shorter will be padded."
"help": (
"The maximum total input sequence length after tokenization. Sequences longer "
"than this will be truncated, sequences shorter will be padded."
)
},
)
overwrite_cache: bool = field(
@ -110,7 +112,8 @@ def main():
and not training_args.overwrite_output_dir
):
raise ValueError(
f"Output directory ({training_args.output_dir}) already exists and is not empty. Use --overwrite_output_dir to overcome."
f"Output directory ({training_args.output_dir}) already exists and is not empty. Use"
" --overwrite_output_dir to overcome."
)
# Setup logging

View File

@ -197,7 +197,7 @@ if is_tf_available():
self.features = hans_convert_examples_to_features(examples, label_list, max_seq_length, tokenizer)
def gen():
for (ex_index, ex) in tqdm.tqdm(enumerate(self.features), desc="convert examples to features"):
for ex_index, ex in tqdm.tqdm(enumerate(self.features), desc="convert examples to features"):
if ex_index % 10000 == 0:
logger.info("Writing example %d of %d" % (ex_index, len(examples)))
@ -268,7 +268,7 @@ class HansProcessor(DataProcessor):
def _create_examples(self, lines, set_type):
"""Creates examples for the training and dev sets."""
examples = []
for (i, line) in enumerate(lines):
for i, line in enumerate(lines):
if i == 0:
continue
guid = "%s-%s" % (set_type, line[0])
@ -303,7 +303,7 @@ def hans_convert_examples_to_features(
label_map = {label: i for i, label in enumerate(label_list)}
features = []
for (ex_index, example) in tqdm.tqdm(enumerate(examples), desc="convert examples to features"):
for ex_index, example in tqdm.tqdm(enumerate(examples), desc="convert examples to features"):
if ex_index % 10000 == 0:
logger.info("Writing example %d" % (ex_index))

View File

@ -84,7 +84,10 @@ class AlbertModelWithPabee(AlbertModel):
def log_stats(self):
avg_inf_layers = self.inference_layers_num / self.inference_instances_num
message = f"*** Patience = {self.patience} Avg. Inference Layers = {avg_inf_layers:.2f} Speed Up = {1 - avg_inf_layers / self.config.num_hidden_layers:.2f} ***"
message = (
f"*** Patience = {self.patience} Avg. Inference Layers = {avg_inf_layers:.2f} Speed Up ="
f" {1 - avg_inf_layers / self.config.num_hidden_layers:.2f} ***"
)
print(message)
@add_start_docstrings_to_model_forward(ALBERT_INPUTS_DOCSTRING)

View File

@ -89,7 +89,10 @@ class BertModelWithPabee(BertModel):
def log_stats(self):
avg_inf_layers = self.inference_layers_num / self.inference_instances_num
message = f"*** Patience = {self.patience} Avg. Inference Layers = {avg_inf_layers:.2f} Speed Up = {1 - avg_inf_layers / self.config.num_hidden_layers:.2f} ***"
message = (
f"*** Patience = {self.patience} Avg. Inference Layers = {avg_inf_layers:.2f} Speed Up ="
f" {1 - avg_inf_layers / self.config.num_hidden_layers:.2f} ***"
)
print(message)
@add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING)

View File

@ -483,8 +483,10 @@ def main():
"--max_seq_length",
default=128,
type=int,
help="The maximum total input sequence length after tokenization. Sequences longer "
"than this will be truncated, sequences shorter will be padded.",
help=(
"The maximum total input sequence length after tokenization. Sequences longer "
"than this will be truncated, sequences shorter will be padded."
),
)
parser.add_argument("--do_train", action="store_true", help="Whether to run training.")
parser.add_argument("--do_eval", action="store_true", help="Whether to run eval on the dev set.")
@ -574,8 +576,10 @@ def main():
"--fp16_opt_level",
type=str,
default="O1",
help="For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']."
"See details at https://nvidia.github.io/apex/amp.html",
help=(
"For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']."
"See details at https://nvidia.github.io/apex/amp.html"
),
)
parser.add_argument(
"--local_rank",

View File

@ -325,7 +325,8 @@ def main():
if not documents_dir_is_valid(args.documents_dir):
raise FileNotFoundError(
"We could not find the directory you specified for the documents to summarize, or it was empty. Please specify a valid path."
"We could not find the directory you specified for the documents to summarize, or it was empty. Please"
" specify a valid path."
)
os.makedirs(args.summaries_output_dir, exist_ok=True)

View File

@ -338,8 +338,10 @@ def main():
"--max_seq_length",
default=128,
type=int,
help="The maximum total input sequence length after WordPiece tokenization. \n"
"Sequences longer than this will be truncated, sequences shorter padded.",
help=(
"The maximum total input sequence length after WordPiece tokenization. \n"
"Sequences longer than this will be truncated, sequences shorter padded."
),
)
parser.add_argument("--batch_size", default=1, type=int, help="Batch size.")

View File

@ -314,8 +314,10 @@ def main():
"--max_seq_length",
default=128,
type=int,
help="The maximum total input sequence length after WordPiece tokenization. \n"
"Sequences longer than this will be truncated, sequences shorter padded.",
help=(
"The maximum total input sequence length after WordPiece tokenization. \n"
"Sequences longer than this will be truncated, sequences shorter padded."
),
)
parser.add_argument("--batch_size", default=1, type=int, help="Batch size.")

View File

@ -112,7 +112,10 @@ class HumanEvalArguments:
device_int: Optional[int] = field(
default=-1,
metadata={
"help": "Determine which device to run the `text-generation` Pipeline on. -1 is CPU and any zero or positive number corresponds to which GPU device id to run on."
"help": (
"Determine which device to run the `text-generation` Pipeline on. -1 is CPU and any zero or positive"
" number corresponds to which GPU device id to run on."
)
},
)

View File

@ -186,7 +186,8 @@ def main():
_ = code_eval_metric.compute(references=[""], predictions=[[""]])
except ValueError as exception:
print(
'Code evaluation not enabled. Read the warning below carefully and then use `--HF_ALLOW_CODE_EVAL="1"` flag to enable code evaluation.'
'Code evaluation not enabled. Read the warning below carefully and then use `--HF_ALLOW_CODE_EVAL="1"`'
" flag to enable code evaluation."
)
raise exception

View File

@ -459,8 +459,10 @@ def main():
"--max_seq_length",
default=128,
type=int,
help="The maximum total input sequence length after tokenization. Sequences longer "
"than this will be truncated, sequences shorter will be padded.",
help=(
"The maximum total input sequence length after tokenization. Sequences longer "
"than this will be truncated, sequences shorter will be padded."
),
)
parser.add_argument("--do_train", action="store_true", help="Whether to run training.")
parser.add_argument("--do_eval", action="store_true", help="Whether to run eval on the dev set.")
@ -529,8 +531,10 @@ def main():
"--fp16_opt_level",
type=str,
default="O1",
help="For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']."
"See details at https://nvidia.github.io/apex/amp.html",
help=(
"For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']."
"See details at https://nvidia.github.io/apex/amp.html"
),
)
parser.add_argument("--local_rank", type=int, default=-1, help="For distributed training: local_rank")
parser.add_argument("--server_ip", type=str, default="", help="For distant debugging.")

View File

@ -60,7 +60,7 @@ class GroupedBatchSampler(BatchSampler):
def __init__(self, sampler, group_ids, batch_size):
if not isinstance(sampler, Sampler):
raise ValueError(
"sampler should be an instance of " "torch.utils.data.Sampler, but got sampler={}".format(sampler)
"sampler should be an instance of torch.utils.data.Sampler, but got sampler={}".format(sampler)
)
self.sampler = sampler
self.group_ids = group_ids

View File

@ -518,7 +518,10 @@ def main():
"--teacher_type",
default=None,
type=str,
help="Teacher type. Teacher tokenizer and student (model) tokenizer must output the same tokenization. Only for distillation.",
help=(
"Teacher type. Teacher tokenizer and student (model) tokenizer must output the same tokenization. Only for"
" distillation."
),
)
parser.add_argument(
"--teacher_name_or_path",
@ -590,8 +593,10 @@ def main():
"--max_seq_length",
default=384,
type=int,
help="The maximum total input sequence length after WordPiece tokenization. Sequences "
"longer than this will be truncated, and sequences shorter than this will be padded.",
help=(
"The maximum total input sequence length after WordPiece tokenization. Sequences "
"longer than this will be truncated, and sequences shorter than this will be padded."
),
)
parser.add_argument(
"--doc_stride",
@ -603,8 +608,10 @@ def main():
"--max_query_length",
default=64,
type=int,
help="The maximum number of tokens for the question. Questions longer than this will "
"be truncated to this length.",
help=(
"The maximum number of tokens for the question. Questions longer than this will "
"be truncated to this length."
),
)
parser.add_argument("--do_train", action="store_true", help="Whether to run training.")
parser.add_argument("--do_eval", action="store_true", help="Whether to run eval on the dev set.")
@ -649,14 +656,18 @@ def main():
"--max_answer_length",
default=30,
type=int,
help="The maximum length of an answer that can be generated. This is needed because the start "
"and end predictions are not conditioned on one another.",
help=(
"The maximum length of an answer that can be generated. This is needed because the start "
"and end predictions are not conditioned on one another."
),
)
parser.add_argument(
"--verbose_logging",
action="store_true",
help="If true, all of the warnings related to data processing will be printed. "
"A number of warnings are expected for a normal SQuAD evaluation.",
help=(
"If true, all of the warnings related to data processing will be printed. "
"A number of warnings are expected for a normal SQuAD evaluation."
),
)
parser.add_argument("--logging_steps", type=int, default=50, help="Log every X updates steps.")
@ -685,8 +696,10 @@ def main():
"--fp16_opt_level",
type=str,
default="O1",
help="For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']."
"See details at https://nvidia.github.io/apex/amp.html",
help=(
"For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']."
"See details at https://nvidia.github.io/apex/amp.html"
),
)
parser.add_argument("--server_ip", type=str, default="", help="Can be used for distant debugging.")
parser.add_argument("--server_port", type=str, default="", help="Can be used for distant debugging.")

View File

@ -25,7 +25,10 @@ from transformers import GPT2LMHeadModel, RobertaForMaskedLM
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Extraction some layers of the full RobertaForMaskedLM or GPT2LMHeadModel for Transfer Learned Distillation"
description=(
"Extraction some layers of the full RobertaForMaskedLM or GPT2LMHeadModel for Transfer Learned"
" Distillation"
)
)
parser.add_argument("--model_type", default="roberta", choices=["roberta", "gpt2"])
parser.add_argument("--model_name", default="roberta-large", type=str)

View File

@ -25,7 +25,10 @@ from transformers import BertForMaskedLM
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Extraction some layers of the full BertForMaskedLM or RObertaForMaskedLM for Transfer Learned Distillation"
description=(
"Extraction some layers of the full BertForMaskedLM or RObertaForMaskedLM for Transfer Learned"
" Distillation"
)
)
parser.add_argument("--model_type", default="bert", choices=["bert"])
parser.add_argument("--model_name", default="bert-base-uncased", type=str)

View File

@ -207,8 +207,10 @@ def main():
"--fp16_opt_level",
type=str,
default="O1",
help="For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']."
"See details at https://nvidia.github.io/apex/amp.html",
help=(
"For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']."
"See details at https://nvidia.github.io/apex/amp.html"
),
)
parser.add_argument("--n_gpu", type=int, default=1, help="Number of GPUs in the node.")
parser.add_argument("--local_rank", type=int, default=-1, help="Distributed training - Local rank")
@ -226,8 +228,8 @@ def main():
if os.path.exists(args.dump_path):
if not args.force:
raise ValueError(
f"Serialization dir {args.dump_path} already exists, but you have not precised wheter to overwrite it"
"Use `--force` if you want to overwrite it"
f"Serialization dir {args.dump_path} already exists, but you have not precised wheter to overwrite"
" itUse `--force` if you want to overwrite it"
)
else:
shutil.rmtree(args.dump_path)

View File

@ -48,7 +48,8 @@ class FSNERTokenizerUtils(object):
else:
raise Exception(
"Type of parameter x was not recognized! Only `list of strings` for query or `list of lists of strings` for supports are supported."
"Type of parameter x was not recognized! Only `list of strings` for query or `list of lists of"
" strings` for supports are supported."
)
return d

View File

@ -75,8 +75,9 @@ class ModelArguments:
model_name_or_path: Optional[str] = field(
default=None,
metadata={
"help": "The model checkpoint for weights initialization."
"Don't set if you want to train a model from scratch."
"help": (
"The model checkpoint for weights initialization.Don't set if you want to train a model from scratch."
)
},
)
model_type: Optional[str] = field(
@ -99,7 +100,10 @@ class ModelArguments:
dtype: Optional[str] = field(
default="float32",
metadata={
"help": "Floating-point format in which the model weights should be initialized and trained. Choose one of `[float32, float16, bfloat16]`."
"help": (
"Floating-point format in which the model weights should be initialized and trained. Choose one of"
" `[float32, float16, bfloat16]`."
)
},
)
@ -141,8 +145,10 @@ class DataTrainingArguments:
max_seq_length: Optional[int] = field(
default=None,
metadata={
"help": "The maximum total input sequence length after tokenization. Sequences longer "
"than this will be truncated. Default to the max input length of the model."
"help": (
"The maximum total input sequence length after tokenization. Sequences longer "
"than this will be truncated. Default to the max input length of the model."
)
},
)
preprocessing_num_workers: Optional[int] = field(
@ -155,8 +161,10 @@ class DataTrainingArguments:
pad_to_max_length: bool = field(
default=False,
metadata={
"help": "Whether to pad all samples to `max_seq_length`. "
"If False, will pad the samples dynamically when batching to the maximum length in the batch."
"help": (
"Whether to pad all samples to `max_seq_length`. "
"If False, will pad the samples dynamically when batching to the maximum length in the batch."
)
},
)
line_by_line: bool = field(
@ -575,7 +583,8 @@ if __name__ == "__main__":
if step % training_args.logging_steps == 0 and step > 0:
steps.write(
f"Step... ({step} | Loss: {train_metric['loss'].mean()}, Learning Rate: {train_metric['learning_rate'].mean()})"
f"Step... ({step} | Loss: {train_metric['loss'].mean()}, Learning Rate:"
f" {train_metric['learning_rate'].mean()})"
)
train_time += time.time() - train_start
if has_tensorboard and jax.process_index() == 0:
@ -604,7 +613,10 @@ if __name__ == "__main__":
eval_metrics = jax.tree_map(lambda x: x / eval_normalizer, eval_metrics)
# Update progress bar
steps.desc = f"Step... ({step + 1}/{num_train_steps} | Loss: {eval_metrics['loss']}, Acc: {eval_metrics['accuracy']})"
steps.desc = (
f"Step... ({step + 1}/{num_train_steps} | Loss: {eval_metrics['loss']}, Acc:"
f" {eval_metrics['accuracy']})"
)
if has_tensorboard and jax.process_index() == 0:
write_eval_metric(summary_writer, eval_metrics, step)

View File

@ -77,14 +77,18 @@ class ModelArguments:
text_model_name_or_path: str = field(
metadata={
"help": "The text model checkpoint for weights initialization."
"Don't set if you want to train a model from scratch."
"help": (
"The text model checkpoint for weights initialization."
"Don't set if you want to train a model from scratch."
)
},
)
vision_model_name_or_path: str = field(
metadata={
"help": "The vision model checkpoint for weights initialization."
"Don't set if you want to train a model from scratch."
"help": (
"The vision model checkpoint for weights initialization."
"Don't set if you want to train a model from scratch."
)
},
)
from_pt: bool = field(
@ -107,7 +111,10 @@ class ModelArguments:
dtype: Optional[str] = field(
default="float32",
metadata={
"help": "Floating-point format in which the model weights should be initialized and trained. Choose one of `[float32, float16, bfloat16]`."
"help": (
"Floating-point format in which the model weights should be initialized and trained. Choose one of"
" `[float32, float16, bfloat16]`."
)
},
)
@ -129,22 +136,28 @@ class DataTrainingArguments:
max_seq_length: Optional[int] = field(
default=72,
metadata={
"help": "The maximum total input sequence length after tokenization. Sequences longer "
"than this will be truncated, sequences shorter will be padded."
"help": (
"The maximum total input sequence length after tokenization. Sequences longer "
"than this will be truncated, sequences shorter will be padded."
)
},
)
max_train_samples: Optional[int] = field(
default=None,
metadata={
"help": "For debugging purposes or quicker training, truncate the number of training examples to this "
"value if set."
"help": (
"For debugging purposes or quicker training, truncate the number of training examples to this "
"value if set."
)
},
)
max_eval_samples: Optional[int] = field(
default=None,
metadata={
"help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this "
"value if set."
"help": (
"For debugging purposes or quicker training, truncate the number of evaluation examples to this "
"value if set."
)
},
)
overwrite_cache: bool = field(
@ -519,7 +532,8 @@ def main():
train_step_progress_bar.close()
epochs.write(
f"Epoch... ({epoch + 1}/{num_epochs} | Loss: {train_metric['loss']}, Learning Rate: {train_metric['learning_rate']})"
f"Epoch... ({epoch + 1}/{num_epochs} | Loss: {train_metric['loss']}, Learning Rate:"
f" {train_metric['learning_rate']})"
)
# ======================== Evaluating ==============================

View File

@ -69,8 +69,9 @@ class ModelArguments:
model_name_or_path: Optional[str] = field(
default=None,
metadata={
"help": "The model checkpoint for weights initialization."
"Don't set if you want to train a model from scratch."
"help": (
"The model checkpoint for weights initialization.Don't set if you want to train a model from scratch."
)
},
)
model_type: Optional[str] = field(
@ -93,7 +94,10 @@ class ModelArguments:
dtype: Optional[str] = field(
default="float32",
metadata={
"help": "Floating-point format in which the model weights should be initialized and trained. Choose one of `[float32, float16, bfloat16]`."
"help": (
"Floating-point format in which the model weights should be initialized and trained. Choose one of"
" `[float32, float16, bfloat16]`."
)
},
)
@ -118,15 +122,19 @@ class DataTrainingArguments:
max_train_samples: Optional[int] = field(
default=None,
metadata={
"help": "For debugging purposes or quicker training, truncate the number of training examples to this "
"value if set."
"help": (
"For debugging purposes or quicker training, truncate the number of training examples to this "
"value if set."
)
},
)
max_eval_samples: Optional[int] = field(
default=None,
metadata={
"help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this "
"value if set."
"help": (
"For debugging purposes or quicker training, truncate the number of evaluation examples to this "
"value if set."
)
},
)
overwrite_cache: bool = field(
@ -141,9 +149,11 @@ class DataTrainingArguments:
block_size: Optional[int] = field(
default=None,
metadata={
"help": "Optional input sequence length after tokenization. "
"The training dataset will be truncated in block of this size for training. "
"Default to the model max input length for single sentence inputs (take into account special tokens)."
"help": (
"Optional input sequence length after tokenization. "
"The training dataset will be truncated in block of this size for training. "
"Default to the model max input length for single sentence inputs (take into account special tokens)."
)
},
)
overwrite_cache: bool = field(
@ -334,7 +344,8 @@ def main():
# clm input could be much much longer than block_size
if "Token indices sequence length is longer than the" in cl.out:
tok_logger.warning(
"^^^^^^^^^^^^^^^^ Please ignore the warning above - this long input will be chunked into smaller bits before being passed to the model."
"^^^^^^^^^^^^^^^^ Please ignore the warning above - this long input will be chunked into smaller bits"
" before being passed to the model."
)
return output
@ -606,7 +617,8 @@ def main():
write_train_metric(summary_writer, train_metrics, train_time, cur_step)
epochs.write(
f"Step... ({cur_step} | Loss: {train_metric['loss']}, Learning Rate: {train_metric['learning_rate']})"
f"Step... ({cur_step} | Loss: {train_metric['loss']}, Learning Rate:"
f" {train_metric['learning_rate']})"
)
train_metrics = []
@ -632,7 +644,8 @@ def main():
eval_metrics["perplexity"] = float("inf")
logger.info(
f"Step... ({cur_step} | Eval loss: {eval_metrics['loss']} | Eval Perplexity: {eval_metrics['perplexity']}"
f"Step... ({cur_step} | Eval loss: {eval_metrics['loss']} | Eval Perplexity:"
f" {eval_metrics['perplexity']}"
)
if cur_step % training_args.save_steps == 0 and cur_step > 0:

View File

@ -64,7 +64,10 @@ class ModelArguments:
dtype: Optional[str] = field(
default="float32",
metadata={
"help": "Floating-point format in which the model weights should be initialized and trained. Choose one of `[float32, float16, bfloat16]`."
"help": (
"Floating-point format in which the model weights should be initialized and trained. Choose one of"
" `[float32, float16, bfloat16]`."
)
},
)
@ -94,7 +97,9 @@ class DataTrainingArguments:
validation_split_name: Optional[str] = field(
default="validation",
metadata={
"help": "The name of the validation data set split to use (via the datasets library). Defaults to 'validation'"
"help": (
"The name of the validation data set split to use (via the datasets library). Defaults to 'validation'"
)
},
)
speech_file_column: Optional[str] = field(
@ -120,7 +125,10 @@ class DataTrainingArguments:
pad_to_multiple_of: Optional[int] = field(
default=1024,
metadata={
"help": "If set will pad the sequence to a multiple of the provided value. This is important to avoid triggering recompilations on TPU"
"help": (
"If set will pad the sequence to a multiple of the provided value. This is important to avoid"
" triggering recompilations on TPU"
)
},
)
@ -357,7 +365,8 @@ def main():
if not config.do_stable_layer_norm or config.feat_extract_norm != "layer":
raise ValueError(
"PreTraining is only supported for ``config.do_stable_layer_norm=True`` and ``config.feat_extract_norm='layer'"
"PreTraining is only supported for ``config.do_stable_layer_norm=True`` and"
" ``config.feat_extract_norm='layer'"
)
model = FlaxWav2Vec2ForPreTraining(config, seed=training_args.seed, dtype=getattr(jnp, model_args.dtype))
@ -557,7 +566,8 @@ def main():
write_train_metric(summary_writer, train_metrics, train_time, cur_step)
epochs.write(
f"Step... ({cur_step} | Loss: {train_metric['loss'].mean()}, Learning Rate: {train_metric['learning_rate'].mean()})"
f"Step... ({cur_step} | Loss: {train_metric['loss'].mean()}, Learning Rate:"
f" {train_metric['learning_rate'].mean()})"
)
train_metrics = []
@ -583,7 +593,8 @@ def main():
# Update progress bar
epochs.write(
f"Epoch... ({epoch + 1}/{num_epochs} | Loss: {eval_metrics['loss']}, Perplexity: {eval_metrics['codevector_perplexity']})"
f"Epoch... ({epoch + 1}/{num_epochs} | Loss: {eval_metrics['loss']}, Perplexity:"
f" {eval_metrics['codevector_perplexity']})"
)
# Save metrics

View File

@ -649,7 +649,7 @@ def batch_query_qa_dense_index(questions, qa_embedder, tokenizer, wiki_passages,
"<P> " + " <P> ".join([p["passage_text"] for p in res_passages]) for res_passages in res_passages_lst
]
all_res_lists = []
for (res_passages, dl) in zip(res_passages_lst, D):
for res_passages, dl in zip(res_passages_lst, D):
res_list = [dict([(k, p[k]) for k in wiki_passages.column_names]) for p in res_passages]
for r, sc in zip(res_list, dl):
r["score"] = float(sc)
@ -679,7 +679,7 @@ def batch_query_qa_dense_index_nn(passages, qa_embedder, tokenizer, wiki_passage
"<P> " + " <P> ".join([p["passage_text"] for p in res_passages]) for res_passages in res_passages_lst
]
all_res_lists = []
for (res_passages, dl, il) in zip(res_passages_lst, D, I):
for res_passages, dl, il in zip(res_passages_lst, D, I):
res_list = [dict([(k, p[k]) for k in wiki_passages.column_names]) for p in res_passages]
for r, sc, i in zip(res_list, dl, il):
r["passage_id"] = int(i)

View File

@ -101,8 +101,8 @@ def parse_args():
type=int,
default=32,
help=(
"The maximum total input entity length after tokenization (Used only for (M)Luke models). Sequences longer than this will be truncated,"
" sequences shorter will be padded if `--pad_to_max_length` is passed."
"The maximum total input entity length after tokenization (Used only for (M)Luke models). Sequences longer"
" than this will be truncated, sequences shorter will be padded if `--pad_to_max_length` is passed."
),
)
parser.add_argument(
@ -110,8 +110,8 @@ def parse_args():
type=int,
default=30,
help=(
"The maximum total input mention length after tokenization (Used only for (M)Luke models). Sequences longer than this will be truncated,"
" sequences shorter will be padded if `--pad_to_max_length` is passed."
"The maximum total input mention length after tokenization (Used only for (M)Luke models). Sequences"
" longer than this will be truncated, sequences shorter will be padded if `--pad_to_max_length` is passed."
),
)
parser.add_argument(

View File

@ -592,7 +592,7 @@ class Matcher(object):
match_labels = matches.new_full(matches.size(), 1, dtype=torch.int8)
for (l, low, high) in zip(self.labels, self.thresholds[:-1], self.thresholds[1:]):
for l, low, high in zip(self.labels, self.thresholds[:-1], self.thresholds[1:]):
low_high = (matched_vals >= low) & (matched_vals < high)
match_labels[low_high] = l
@ -1037,9 +1037,9 @@ class ResNet(Backbone):
curr_kwargs = {}
for k, v in kwargs.items():
if k.endswith("_per_block"):
assert len(v) == num_blocks, (
f"Argument '{k}' of make_stage should have the " f"same length as num_blocks={num_blocks}."
)
assert (
len(v) == num_blocks
), f"Argument '{k}' of make_stage should have the same length as num_blocks={num_blocks}."
newk = k[: -len("_per_block")]
assert newk not in kwargs, f"Cannot call make_stage with both {k} and {newk}!"
curr_kwargs[newk] = v[i]
@ -1401,7 +1401,7 @@ class AnchorGenerator(nn.Module):
def grid_anchors(self, grid_sizes):
anchors = []
for (size, stride, base_anchors) in zip(grid_sizes, self.strides, self.cell_anchors):
for size, stride, base_anchors in zip(grid_sizes, self.strides, self.cell_anchors):
shift_x, shift_y = _create_grid_offsets(size, stride, self.offset, base_anchors.device)
shifts = torch.stack((shift_x, shift_y, shift_x, shift_y), dim=1)
@ -1708,10 +1708,9 @@ class GeneralizedRCNN(nn.Module):
elif os.path.isfile(pretrained_model_name_or_path) or is_remote_url(pretrained_model_name_or_path):
archive_file = pretrained_model_name_or_path
elif os.path.isfile(pretrained_model_name_or_path + ".index"):
assert (
from_tf
), "We found a TensorFlow checkpoint at {}, please set from_tf to True to load from this checkpoint".format(
pretrained_model_name_or_path + ".index"
assert from_tf, (
"We found a TensorFlow checkpoint at {}, please set from_tf to True to load from this checkpoint"
.format(pretrained_model_name_or_path + ".index")
)
archive_file = pretrained_model_name_or_path + ".index"
else:
@ -1797,26 +1796,28 @@ class GeneralizedRCNN(nn.Module):
if len(unexpected_keys) > 0:
print(
f"Some weights of the model checkpoint at {pretrained_model_name_or_path} were not used when "
f"initializing {model.__class__.__name__}: {unexpected_keys}\n"
f"- This IS expected if you are initializing {model.__class__.__name__} from the checkpoint of a model trained on another task "
f"or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n"
f"- This IS NOT expected if you are initializing {model.__class__.__name__} from the checkpoint of a model that you expect "
f"to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model)."
f"Some weights of the model checkpoint at {pretrained_model_name_or_path} were not used when"
f" initializing {model.__class__.__name__}: {unexpected_keys}\n- This IS expected if you are"
f" initializing {model.__class__.__name__} from the checkpoint of a model trained on another task or"
" with another architecture (e.g. initializing a BertForSequenceClassification model from a"
" BertForPreTraining model).\n- This IS NOT expected if you are initializing"
f" {model.__class__.__name__} from the checkpoint of a model that you expect to be exactly identical"
" (initializing a BertForSequenceClassification model from a BertForSequenceClassification model)."
)
else:
print(f"All model checkpoint weights were used when initializing {model.__class__.__name__}.\n")
if len(missing_keys) > 0:
print(
f"Some weights of {model.__class__.__name__} were not initialized from the model checkpoint at {pretrained_model_name_or_path} "
f"and are newly initialized: {missing_keys}\n"
f"You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference."
f"Some weights of {model.__class__.__name__} were not initialized from the model checkpoint at"
f" {pretrained_model_name_or_path} and are newly initialized: {missing_keys}\nYou should probably"
" TRAIN this model on a down-stream task to be able to use it for predictions and inference."
)
else:
print(
f"All the weights of {model.__class__.__name__} were initialized from the model checkpoint at {pretrained_model_name_or_path}.\n"
f"If your task is similar to the task the model of the checkpoint was trained on, "
f"you can already use {model.__class__.__name__} for predictions without further training."
f"All the weights of {model.__class__.__name__} were initialized from the model checkpoint at"
f" {pretrained_model_name_or_path}.\nIf your task is similar to the task the model of the checkpoint"
f" was trained on, you can already use {model.__class__.__name__} for predictions without further"
" training."
)
if len(error_msgs) > 0:
raise RuntimeError(

View File

@ -231,9 +231,10 @@ def compare(in_tensor):
n2 = out_tensor.numpy()[0]
print(n1.shape, n1[0, 0, :5])
print(n2.shape, n2[0, 0, :5])
assert np.allclose(
n1, n2, rtol=0.01, atol=0.1
), f"{sum([1 for x in np.isclose(n1, n2, rtol=0.01, atol=0.1).flatten() if x == False])/len(n1.flatten())*100:.4f} % element-wise mismatch"
assert np.allclose(n1, n2, rtol=0.01, atol=0.1), (
f"{sum([1 for x in np.isclose(n1, n2, rtol=0.01, atol=0.1).flatten() if x == False])/len(n1.flatten())*100:.4f} %"
" element-wise mismatch"
)
raise Exception("tensors are all good")
# Hugging face functions below

View File

@ -61,8 +61,9 @@ class ModelArguments:
model_name_or_path: Optional[str] = field(
default=None,
metadata={
"help": "The model checkpoint for weights initialization."
"Don't set if you want to train a model from scratch."
"help": (
"The model checkpoint for weights initialization.Don't set if you want to train a model from scratch."
)
},
)
model_type: Optional[str] = field(
@ -72,8 +73,10 @@ class ModelArguments:
config_overrides: Optional[str] = field(
default=None,
metadata={
"help": "Override some existing default config settings when a model is trained from scratch. Example: "
"n_embd=10,resid_pdrop=0.2,scale_attn_weights=false,summary_type=cls_index"
"help": (
"Override some existing default config settings when a model is trained from scratch. Example: "
"n_embd=10,resid_pdrop=0.2,scale_attn_weights=false,summary_type=cls_index"
)
},
)
config_name: Optional[str] = field(
@ -97,8 +100,10 @@ class ModelArguments:
use_auth_token: bool = field(
default=False,
metadata={
"help": "Will use the token generated when running `transformers-cli login` (necessary to use this script "
"with private models)."
"help": (
"Will use the token generated when running `transformers-cli login` (necessary to use this script "
"with private models)."
)
},
)
@ -146,8 +151,10 @@ class DataTrainingArguments:
max_seq_length: Optional[int] = field(
default=None,
metadata={
"help": "The maximum total input sequence length after tokenization. Sequences longer "
"than this will be truncated. Default to the max input length of the model."
"help": (
"The maximum total input sequence length after tokenization. Sequences longer "
"than this will be truncated. Default to the max input length of the model."
)
},
)
preprocessing_num_workers: Optional[int] = field(
@ -160,8 +167,10 @@ class DataTrainingArguments:
pad_to_max_length: bool = field(
default=False,
metadata={
"help": "Whether to pad all samples to `max_seq_length`. "
"If False, will pad the samples dynamically when batching to the maximum length in the batch."
"help": (
"Whether to pad all samples to `max_seq_length`. "
"If False, will pad the samples dynamically when batching to the maximum length in the batch."
)
},
)

View File

@ -356,8 +356,10 @@ def main():
"--max_seq_length",
default=128,
type=int,
help="The maximum total input sequence length after tokenization. Sequences longer "
"than this will be truncated, sequences shorter will be padded.",
help=(
"The maximum total input sequence length after tokenization. Sequences longer "
"than this will be truncated, sequences shorter will be padded."
),
)
parser.add_argument(
"--num_image_embeds", default=1, type=int, help="Number of Image Embeddings from the Image Encoder"
@ -423,8 +425,10 @@ def main():
"--fp16_opt_level",
type=str,
default="O1",
help="For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']."
"See details at https://nvidia.github.io/apex/amp.html",
help=(
"For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']."
"See details at https://nvidia.github.io/apex/amp.html"
),
)
parser.add_argument("--local_rank", type=int, default=-1, help="For distributed training: local_rank")
parser.add_argument("--server_ip", type=str, default="", help="For distant debugging.")

View File

@ -103,15 +103,20 @@ if __name__ == "__main__":
choices=["l0", "magnitude", "topK", "sigmoied_threshold"],
type=str,
required=True,
help="Pruning Method (l0 = L0 regularization, magnitude = Magnitude pruning, topK = Movement pruning, sigmoied_threshold = Soft movement pruning)",
help=(
"Pruning Method (l0 = L0 regularization, magnitude = Magnitude pruning, topK = Movement pruning,"
" sigmoied_threshold = Soft movement pruning)"
),
)
parser.add_argument(
"--threshold",
type=float,
required=False,
help="For `magnitude` and `topK`, it is the level of remaining weights (in %) in the fine-pruned model."
"For `sigmoied_threshold`, it is the threshold \tau against which the (sigmoied) scores are compared."
"Not needed for `l0`",
help=(
"For `magnitude` and `topK`, it is the level of remaining weights (in %) in the fine-pruned model."
"For `sigmoied_threshold`, it is the threshold \tau against which the (sigmoied) scores are compared."
"Not needed for `l0`"
),
)
parser.add_argument(
"--model_name_or_path",

View File

@ -70,15 +70,20 @@ if __name__ == "__main__":
choices=["l0", "topK", "sigmoied_threshold"],
type=str,
required=True,
help="Pruning Method (l0 = L0 regularization, topK = Movement pruning, sigmoied_threshold = Soft movement pruning)",
help=(
"Pruning Method (l0 = L0 regularization, topK = Movement pruning, sigmoied_threshold = Soft movement"
" pruning)"
),
)
parser.add_argument(
"--threshold",
type=float,
required=False,
help="For `topK`, it is the level of remaining weights (in %) in the fine-pruned model."
"For `sigmoied_threshold`, it is the threshold \tau against which the (sigmoied) scores are compared."
"Not needed for `l0`",
help=(
"For `topK`, it is the level of remaining weights (in %) in the fine-pruned model."
"For `sigmoied_threshold`, it is the threshold \tau against which the (sigmoied) scores are compared."
"Not needed for `l0`"
),
)
parser.add_argument(
"--serialization_dir",

View File

@ -80,8 +80,8 @@ class BertSelfAttention(nn.Module):
super().__init__()
if config.hidden_size % config.num_attention_heads != 0 and not hasattr(config, "embedding_size"):
raise ValueError(
"The hidden size (%d) is not a multiple of the number of attention "
"heads (%d)" % (config.hidden_size, config.num_attention_heads)
"The hidden size (%d) is not a multiple of the number of attention heads (%d)"
% (config.hidden_size, config.num_attention_heads)
)
self.output_attentions = config.output_attentions

View File

@ -622,8 +622,10 @@ def main():
"--max_seq_length",
default=128,
type=int,
help="The maximum total input sequence length after tokenization. Sequences longer "
"than this will be truncated, sequences shorter will be padded.",
help=(
"The maximum total input sequence length after tokenization. Sequences longer "
"than this will be truncated, sequences shorter will be padded."
),
)
parser.add_argument("--do_train", action="store_true", help="Whether to run training.")
parser.add_argument("--do_eval", action="store_true", help="Whether to run eval on the dev set.")
@ -669,22 +671,29 @@ def main():
"--initial_warmup",
default=1,
type=int,
help="Run `initial_warmup` * `warmup_steps` steps of threshold warmup during which threshold stays"
"at its `initial_threshold` value (sparsity schedule).",
help=(
"Run `initial_warmup` * `warmup_steps` steps of threshold warmup during which threshold stays"
"at its `initial_threshold` value (sparsity schedule)."
),
)
parser.add_argument(
"--final_warmup",
default=2,
type=int,
help="Run `final_warmup` * `warmup_steps` steps of threshold cool-down during which threshold stays"
"at its final_threshold value (sparsity schedule).",
help=(
"Run `final_warmup` * `warmup_steps` steps of threshold cool-down during which threshold stays"
"at its final_threshold value (sparsity schedule)."
),
)
parser.add_argument(
"--pruning_method",
default="topK",
type=str,
help="Pruning Method (l0 = L0 regularization, magnitude = Magnitude pruning, topK = Movement pruning, sigmoied_threshold = Soft movement pruning).",
help=(
"Pruning Method (l0 = L0 regularization, magnitude = Magnitude pruning, topK = Movement pruning,"
" sigmoied_threshold = Soft movement pruning)."
),
)
parser.add_argument(
"--mask_init",
@ -717,7 +726,10 @@ def main():
"--teacher_type",
default=None,
type=str,
help="Teacher type. Teacher tokenizer and student (model) tokenizer must output the same tokenization. Only for distillation.",
help=(
"Teacher type. Teacher tokenizer and student (model) tokenizer must output the same tokenization. Only for"
" distillation."
),
)
parser.add_argument(
"--teacher_name_or_path",
@ -787,8 +799,10 @@ def main():
"--fp16_opt_level",
type=str,
default="O1",
help="For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']."
"See details at https://nvidia.github.io/apex/amp.html",
help=(
"For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']."
"See details at https://nvidia.github.io/apex/amp.html"
),
)
parser.add_argument("--local_rank", type=int, default=-1, help="For distributed training: local_rank")
@ -805,7 +819,8 @@ def main():
and not args.overwrite_output_dir
):
raise ValueError(
f"Output directory ({args.output_dir}) already exists and is not empty. Use --overwrite_output_dir to overcome."
f"Output directory ({args.output_dir}) already exists and is not empty. Use --overwrite_output_dir to"
" overcome."
)
# Setup CUDA, GPU & distributed training

View File

@ -737,8 +737,10 @@ def main():
"--max_seq_length",
default=384,
type=int,
help="The maximum total input sequence length after WordPiece tokenization. Sequences "
"longer than this will be truncated, and sequences shorter than this will be padded.",
help=(
"The maximum total input sequence length after WordPiece tokenization. Sequences "
"longer than this will be truncated, and sequences shorter than this will be padded."
),
)
parser.add_argument(
"--doc_stride",
@ -750,8 +752,10 @@ def main():
"--max_query_length",
default=64,
type=int,
help="The maximum number of tokens for the question. Questions longer than this will "
"be truncated to this length.",
help=(
"The maximum number of tokens for the question. Questions longer than this will "
"be truncated to this length."
),
)
parser.add_argument("--do_train", action="store_true", help="Whether to run training.")
parser.add_argument("--do_eval", action="store_true", help="Whether to run eval on the dev set.")
@ -785,22 +789,29 @@ def main():
"--initial_warmup",
default=1,
type=int,
help="Run `initial_warmup` * `warmup_steps` steps of threshold warmup during which threshold stays"
"at its `initial_threshold` value (sparsity schedule).",
help=(
"Run `initial_warmup` * `warmup_steps` steps of threshold warmup during which threshold stays"
"at its `initial_threshold` value (sparsity schedule)."
),
)
parser.add_argument(
"--final_warmup",
default=2,
type=int,
help="Run `final_warmup` * `warmup_steps` steps of threshold cool-down during which threshold stays"
"at its final_threshold value (sparsity schedule).",
help=(
"Run `final_warmup` * `warmup_steps` steps of threshold cool-down during which threshold stays"
"at its final_threshold value (sparsity schedule)."
),
)
parser.add_argument(
"--pruning_method",
default="topK",
type=str,
help="Pruning Method (l0 = L0 regularization, magnitude = Magnitude pruning, topK = Movement pruning, sigmoied_threshold = Soft movement pruning).",
help=(
"Pruning Method (l0 = L0 regularization, magnitude = Magnitude pruning, topK = Movement pruning,"
" sigmoied_threshold = Soft movement pruning)."
),
)
parser.add_argument(
"--mask_init",
@ -833,7 +844,10 @@ def main():
"--teacher_type",
default=None,
type=str,
help="Teacher type. Teacher tokenizer and student (model) tokenizer must output the same tokenization. Only for distillation.",
help=(
"Teacher type. Teacher tokenizer and student (model) tokenizer must output the same tokenization. Only for"
" distillation."
),
)
parser.add_argument(
"--teacher_name_or_path",
@ -883,20 +897,27 @@ def main():
"--max_answer_length",
default=30,
type=int,
help="The maximum length of an answer that can be generated. This is needed because the start "
"and end predictions are not conditioned on one another.",
help=(
"The maximum length of an answer that can be generated. This is needed because the start "
"and end predictions are not conditioned on one another."
),
)
parser.add_argument(
"--verbose_logging",
action="store_true",
help="If true, all of the warnings related to data processing will be printed. "
"A number of warnings are expected for a normal SQuAD evaluation.",
help=(
"If true, all of the warnings related to data processing will be printed. "
"A number of warnings are expected for a normal SQuAD evaluation."
),
)
parser.add_argument(
"--lang_id",
default=0,
type=int,
help="language id of input for language-specific xlm models (see tokenization_xlm.PRETRAINED_INIT_CONFIGURATION)",
help=(
"language id of input for language-specific xlm models (see"
" tokenization_xlm.PRETRAINED_INIT_CONFIGURATION)"
),
)
parser.add_argument("--logging_steps", type=int, default=500, help="Log every X updates steps.")
@ -925,8 +946,10 @@ def main():
"--fp16_opt_level",
type=str,
default="O1",
help="For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']."
"See details at https://nvidia.github.io/apex/amp.html",
help=(
"For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']."
"See details at https://nvidia.github.io/apex/amp.html"
),
)
parser.add_argument("--server_ip", type=str, default="", help="Can be used for distant debugging.")
parser.add_argument("--server_port", type=str, default="", help="Can be used for distant debugging.")

View File

@ -392,13 +392,14 @@ class BeamSearchScorerTS(torch.nn.Module):
if not isinstance(num_beams, int) or num_beams <= 1:
raise ValueError(
f"`num_beams` has to be an integer strictly greater than 1, but is {num_beams}. For `num_beams` == 1, one should make use of `greedy_search` instead."
f"`num_beams` has to be an integer strictly greater than 1, but is {num_beams}. For `num_beams` == 1,"
" one should make use of `greedy_search` instead."
)
if not isinstance(num_beam_groups, int) or (num_beam_groups > num_beams) or (num_beams % num_beam_groups != 0):
raise ValueError(
f"`num_beam_groups` has to be an integer smaller or equal than `num_beams` and `num_beams` "
f"has to be divisible by `num_beam_groups`, but is {num_beam_groups} with `num_beams` being {num_beams}."
"`num_beam_groups` has to be an integer smaller or equal than `num_beams` and `num_beams` has to be"
f" divisible by `num_beam_groups`, but is {num_beam_groups} with `num_beams` being {num_beams}."
)
def hypo_len(self, hypo_idx: int):
@ -508,7 +509,8 @@ class BeamSearchScorerTS(torch.nn.Module):
if beam_idx < self.group_size:
raise ValueError(
f"At most {self.group_size} tokens in {next_tokens[batch_idx]} can be equal to `eos_token_id: {eos_token_id}`. Make sure {next_tokens[batch_idx]} are corrected."
f"At most {self.group_size} tokens in {next_tokens[batch_idx]} can be equal to `eos_token_id:"
f" {eos_token_id}`. Make sure {next_tokens[batch_idx]} are corrected."
)
# Check if we are done so that we can save a pad step if all(done)

View File

@ -53,14 +53,16 @@ def parse_args():
"--max_length",
type=int,
default=5,
help=("The maximum total input sequence length after tokenization."),
help="The maximum total input sequence length after tokenization.",
)
parser.add_argument(
"--num_beams",
type=int,
default=None,
help="Number of beams to use for evaluation. This argument will be "
"passed to ``model.generate``, which is used during ``evaluate`` and ``predict``.",
help=(
"Number of beams to use for evaluation. This argument will be "
"passed to ``model.generate``, which is used during ``evaluate`` and ``predict``."
),
)
parser.add_argument(
"--model_name_or_path",

View File

@ -535,7 +535,7 @@ class FastAttentionviaLowRankDecomposition(FastAttention):
assert key.ndim == value.ndim
for ax in axis:
if not (query.ndim >= 3 and 1 <= ax < query.ndim - 2):
raise ValueError("Attention axis must be between the batch " "axis and the last-two axes.")
raise ValueError("Attention axis must be between the batch axis and the last-two axes.")
n = key.ndim
# Constructing projection tensor.

View File

@ -98,8 +98,9 @@ class ModelArguments:
model_name_or_path: Optional[str] = field(
default=None,
metadata={
"help": "The model checkpoint for weights initialization."
"Don't set if you want to train a model from scratch."
"help": (
"The model checkpoint for weights initialization.Don't set if you want to train a model from scratch."
)
},
)
performer: bool = field(
@ -159,8 +160,10 @@ class DataTrainingArguments:
max_seq_length: Optional[int] = field(
default=None,
metadata={
"help": "The maximum total input sequence length after tokenization. Sequences longer "
"than this will be truncated. Default to the max input length of the model."
"help": (
"The maximum total input sequence length after tokenization. Sequences longer "
"than this will be truncated. Default to the max input length of the model."
)
},
)
preprocessing_num_workers: Optional[int] = field(
@ -173,8 +176,10 @@ class DataTrainingArguments:
pad_to_max_length: bool = field(
default=False,
metadata={
"help": "Whether to pad all samples to `max_seq_length`. "
"If False, will pad the samples dynamically when batching to the maximum length in the batch."
"help": (
"Whether to pad all samples to `max_seq_length`. "
"If False, will pad the samples dynamically when batching to the maximum length in the batch."
)
},
)

View File

@ -175,8 +175,7 @@ def evaluate_performance(data_loader, discriminator, device="cpu"):
test_loss /= len(data_loader.dataset)
print(
"Performance on test set: "
"Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)".format(
"Performance on test set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)".format(
test_loss, correct, len(data_loader.dataset), 100.0 * correct / len(data_loader.dataset)
)
)
@ -309,7 +308,7 @@ def train_discriminator(
x.append(seq)
y.append(d["label"])
except Exception:
print("Error evaluating / tokenizing" " line {}, skipping it".format(i))
print("Error evaluating / tokenizing line {}, skipping it".format(i))
pass
full_dataset = Dataset(x, y)
@ -349,7 +348,7 @@ def train_discriminator(
x.append(seq)
y.append(int(np.sum(d["label"]) > 0))
except Exception:
print("Error evaluating / tokenizing" " line {}, skipping it".format(i))
print("Error evaluating / tokenizing line {}, skipping it".format(i))
pass
full_dataset = Dataset(x, y)
@ -370,7 +369,7 @@ def train_discriminator(
# class \t text
if dataset_fp is None:
raise ValueError("When generic dataset is selected, " "dataset_fp needs to be specified aswell.")
raise ValueError("When generic dataset is selected, dataset_fp needs to be specified aswell.")
classes = set()
with open(dataset_fp) as f:
@ -490,15 +489,17 @@ if __name__ == "__main__":
type=str,
default="SST",
choices=("SST", "clickbait", "toxic", "generic"),
help="dataset to train the discriminator on."
"In case of generic, the dataset is expected"
"to be a TSBV file with structure: class \\t text",
help=(
"dataset to train the discriminator on."
"In case of generic, the dataset is expected"
"to be a TSBV file with structure: class \\t text"
),
)
parser.add_argument(
"--dataset_fp",
type=str,
default="",
help="File path of the dataset to use. " "Needed only in case of generic datadset",
help="File path of the dataset to use. Needed only in case of generic datadset",
)
parser.add_argument(
"--pretrained_model", type=str, default="gpt2-medium", help="Pretrained model to use as encoder"

View File

@ -87,8 +87,10 @@ parser.add_argument(
"--max_seq_length",
default=384,
type=int,
help="The maximum total input sequence length after WordPiece tokenization. Sequences "
"longer than this will be truncated, and sequences shorter than this will be padded.",
help=(
"The maximum total input sequence length after WordPiece tokenization. Sequences "
"longer than this will be truncated, and sequences shorter than this will be padded."
),
)
parser.add_argument(
"--doc_stride",
@ -109,8 +111,10 @@ parser.add_argument(
"--max_answer_length",
default=30,
type=int,
help="The maximum length of an answer that can be generated. This is needed because the start "
"and end predictions are not conditioned on one another.",
help=(
"The maximum length of an answer that can be generated. This is needed because the start "
"and end predictions are not conditioned on one another."
),
)
parser.add_argument("--seed", type=int, default=42, help="random seed for initialization")

Some files were not shown because too many files have changed in this diff Show More