diff --git a/.circleci/config.yml b/.circleci/config.yml index 46b10b59a3..0e8f394e9e 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -854,7 +854,7 @@ jobs: key: v0.4-code_quality-{{ checksum "setup.py" }} paths: - '~/.cache/pip' - - run: black --check examples tests src utils + - run: black --check --preview examples tests src utils - run: isort --check-only examples tests src utils - run: python utils/custom_init_isort.py --check_only - run: flake8 examples tests src utils diff --git a/Makefile b/Makefile index 143be675b5..c9226bb0d8 100644 --- a/Makefile +++ b/Makefile @@ -9,7 +9,7 @@ modified_only_fixup: $(eval modified_py_files := $(shell python utils/get_modified_files.py $(check_dirs))) @if test -n "$(modified_py_files)"; then \ echo "Checking/fixing $(modified_py_files)"; \ - black $(modified_py_files); \ + black --preview $(modified_py_files); \ isort $(modified_py_files); \ flake8 $(modified_py_files); \ else \ @@ -45,7 +45,7 @@ repo-consistency: # this target runs checks on all files quality: - black --check $(check_dirs) + black --check --preview $(check_dirs) isort --check-only $(check_dirs) python utils/custom_init_isort.py --check_only flake8 $(check_dirs) @@ -60,7 +60,7 @@ extra_style_checks: # this target runs checks on all files and potentially modifies some of them style: - black $(check_dirs) + black --preview $(check_dirs) isort $(check_dirs) ${MAKE} autogenerate_code ${MAKE} extra_style_checks diff --git a/examples/flax/image-captioning/create_model_from_encoder_decoder_models.py b/examples/flax/image-captioning/create_model_from_encoder_decoder_models.py index 953aa136e9..ab2fb8568d 100644 --- a/examples/flax/image-captioning/create_model_from_encoder_decoder_models.py +++ b/examples/flax/image-captioning/create_model_from_encoder_decoder_models.py @@ -42,14 +42,18 @@ class ModelArguments: ) encoder_model_name_or_path: str = field( metadata={ - "help": "The encoder model checkpoint for weights initialization." - "Don't set if you want to train an encoder model from scratch." + "help": ( + "The encoder model checkpoint for weights initialization." + "Don't set if you want to train an encoder model from scratch." + ) }, ) decoder_model_name_or_path: str = field( metadata={ - "help": "The decoder model checkpoint for weights initialization." - "Don't set if you want to train a decoder model from scratch." + "help": ( + "The decoder model checkpoint for weights initialization." + "Don't set if you want to train a decoder model from scratch." + ) }, ) encoder_config_name: Optional[str] = field( diff --git a/examples/flax/image-captioning/run_image_captioning_flax.py b/examples/flax/image-captioning/run_image_captioning_flax.py index b1c9012777..23f8a4d5ad 100644 --- a/examples/flax/image-captioning/run_image_captioning_flax.py +++ b/examples/flax/image-captioning/run_image_captioning_flax.py @@ -175,14 +175,19 @@ class ModelArguments: dtype: Optional[str] = field( default="float32", metadata={ - "help": "Floating-point format in which the model weights should be initialized and trained. Choose one of `[float32, float16, bfloat16]`." + "help": ( + "Floating-point format in which the model weights should be initialized and trained. Choose one of" + " `[float32, float16, bfloat16]`." + ) }, ) use_auth_token: bool = field( default=False, metadata={ - "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script " - "with private models)." + "help": ( + "Will use the token generated when running `transformers-cli login` (necessary to use this script " + "with private models)." + ) }, ) @@ -222,38 +227,48 @@ class DataTrainingArguments: max_target_length: Optional[int] = field( default=128, metadata={ - "help": "The maximum total sequence length for target text after tokenization. Sequences longer " - "than this will be truncated, sequences shorter will be padded." + "help": ( + "The maximum total sequence length for target text after tokenization. Sequences longer " + "than this will be truncated, sequences shorter will be padded." + ) }, ) val_max_target_length: Optional[int] = field( default=None, metadata={ - "help": "The maximum total sequence length for validation target text after tokenization. Sequences longer " - "than this will be truncated, sequences shorter will be padded. Will default to `max_target_length`." - "This argument is also used to override the `max_length` param of `model.generate`, which is used " - "during evaluation." + "help": ( + "The maximum total sequence length for validation target text after tokenization. Sequences longer " + "than this will be truncated, sequences shorter will be padded. Will default to `max_target_length`." + "This argument is also used to override the `max_length` param of `model.generate`, which is used " + "during evaluation." + ) }, ) max_train_samples: Optional[int] = field( default=None, metadata={ - "help": "For debugging purposes or quicker training, truncate the number of training examples to this " - "value if set." + "help": ( + "For debugging purposes or quicker training, truncate the number of training examples to this " + "value if set." + ) }, ) max_eval_samples: Optional[int] = field( default=None, metadata={ - "help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this " - "value if set." + "help": ( + "For debugging purposes or quicker training, truncate the number of evaluation examples to this " + "value if set." + ) }, ) max_predict_samples: Optional[int] = field( default=None, metadata={ - "help": "For debugging purposes or quicker training, truncate the number of prediction examples to this " - "value if set." + "help": ( + "For debugging purposes or quicker training, truncate the number of prediction examples to this " + "value if set." + ) }, ) preprocessing_num_workers: Optional[int] = field( @@ -266,8 +281,10 @@ class DataTrainingArguments: num_beams: Optional[int] = field( default=None, metadata={ - "help": "Number of beams to use for evaluation. This argument will be passed to `model.generate`, " - "which is used during evaluation." + "help": ( + "Number of beams to use for evaluation. This argument will be passed to `model.generate`, " + "which is used during evaluation." + ) }, ) overwrite_cache: bool = field( @@ -623,7 +640,7 @@ def main(): eval_batch_size = int(training_args.per_device_eval_batch_size) * jax.device_count() if training_args.block_size % train_batch_size > 0 or training_args.block_size % eval_batch_size > 0: raise ValueError( - f"`training_args.block_size` needs to be a multiple of the global train/eval batch size." + "`training_args.block_size` needs to be a multiple of the global train/eval batch size." f"Got {training_args.block_size}, {train_batch_size} and {eval_batch_size} respectively instead." ) @@ -1136,7 +1153,7 @@ def main(): ) # train - for (batch_idx, _) in enumerate(tqdm(range(steps_per_epoch), desc="Training...", position=1, leave=False)): + for batch_idx, _ in enumerate(tqdm(range(steps_per_epoch), desc="Training...", position=1, leave=False)): cur_step += 1 batch = next(train_batches) @@ -1150,7 +1167,10 @@ def main(): if training_args.logging_steps > 0 and cur_step % training_args.logging_steps == 0: _train_metric = unreplicate(train_metric) - desc = f"Epoch... ({epoch + 1}/{num_epochs} | Step: {cur_step} | Loss: {_train_metric['loss']} | Learning Rate: {_train_metric['learning_rate']} | Time per step: {time_per_step})" + desc = ( + f"Epoch... ({epoch + 1}/{num_epochs} | Step: {cur_step} | Loss: {_train_metric['loss']} |" + f" Learning Rate: {_train_metric['learning_rate']} | Time per step: {time_per_step})" + ) epochs.desc = desc epochs.write(desc) diff --git a/examples/flax/language-modeling/run_clm_flax.py b/examples/flax/language-modeling/run_clm_flax.py index afb6d75b38..ef16e6e886 100755 --- a/examples/flax/language-modeling/run_clm_flax.py +++ b/examples/flax/language-modeling/run_clm_flax.py @@ -138,8 +138,9 @@ class ModelArguments: model_name_or_path: Optional[str] = field( default=None, metadata={ - "help": "The model checkpoint for weights initialization." - "Don't set if you want to train a model from scratch." + "help": ( + "The model checkpoint for weights initialization.Don't set if you want to train a model from scratch." + ) }, ) model_type: Optional[str] = field( @@ -162,14 +163,19 @@ class ModelArguments: dtype: Optional[str] = field( default="float32", metadata={ - "help": "Floating-point format in which the model weights should be initialized and trained. Choose one of `[float32, float16, bfloat16]`." + "help": ( + "Floating-point format in which the model weights should be initialized and trained. Choose one of" + " `[float32, float16, bfloat16]`." + ) }, ) use_auth_token: bool = field( default=False, metadata={ - "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script " - "with private models)." + "help": ( + "Will use the token generated when running `transformers-cli login` (necessary to use this script " + "with private models)." + ) }, ) @@ -194,15 +200,19 @@ class DataTrainingArguments: max_train_samples: Optional[int] = field( default=None, metadata={ - "help": "For debugging purposes or quicker training, truncate the number of training examples to this " - "value if set." + "help": ( + "For debugging purposes or quicker training, truncate the number of training examples to this " + "value if set." + ) }, ) max_eval_samples: Optional[int] = field( default=None, metadata={ - "help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this " - "value if set." + "help": ( + "For debugging purposes or quicker training, truncate the number of evaluation examples to this " + "value if set." + ) }, ) overwrite_cache: bool = field( @@ -217,9 +227,11 @@ class DataTrainingArguments: block_size: Optional[int] = field( default=None, metadata={ - "help": "Optional input sequence length after tokenization. " - "The training dataset will be truncated in block of this size for training. " - "Default to the model max input length for single sentence inputs (take into account special tokens)." + "help": ( + "Optional input sequence length after tokenization. " + "The training dataset will be truncated in block of this size for training. " + "Default to the model max input length for single sentence inputs (take into account special tokens)." + ) }, ) overwrite_cache: bool = field( @@ -505,7 +517,8 @@ def main(): # clm input could be much much longer than block_size if "Token indices sequence length is longer than the" in cl.out: tok_logger.warning( - "^^^^^^^^^^^^^^^^ Please ignore the warning above - this long input will be chunked into smaller bits before being passed to the model." + "^^^^^^^^^^^^^^^^ Please ignore the warning above - this long input will be chunked into smaller bits" + " before being passed to the model." ) return output @@ -735,7 +748,8 @@ def main(): write_train_metric(summary_writer, train_metrics, train_time, cur_step) epochs.write( - f"Step... ({cur_step} | Loss: {train_metric['loss'].mean()}, Learning Rate: {train_metric['learning_rate'].mean()})" + f"Step... ({cur_step} | Loss: {train_metric['loss'].mean()}, Learning Rate:" + f" {train_metric['learning_rate'].mean()})" ) train_metrics = [] @@ -762,7 +776,10 @@ def main(): eval_metrics["perplexity"] = float("inf") # Print metrics and update progress bar - desc = f"Step... ({cur_step} | Eval Loss: {eval_metrics['loss']} | Eval Perplexity: {eval_metrics['perplexity']})" + desc = ( + f"Step... ({cur_step} | Eval Loss: {eval_metrics['loss']} | Eval Perplexity:" + f" {eval_metrics['perplexity']})" + ) epochs.write(desc) epochs.desc = desc diff --git a/examples/flax/language-modeling/run_mlm_flax.py b/examples/flax/language-modeling/run_mlm_flax.py index 6ea0f6e156..04796e83fa 100755 --- a/examples/flax/language-modeling/run_mlm_flax.py +++ b/examples/flax/language-modeling/run_mlm_flax.py @@ -136,8 +136,9 @@ class ModelArguments: model_name_or_path: Optional[str] = field( default=None, metadata={ - "help": "The model checkpoint for weights initialization." - "Don't set if you want to train a model from scratch." + "help": ( + "The model checkpoint for weights initialization.Don't set if you want to train a model from scratch." + ) }, ) model_type: Optional[str] = field( @@ -160,14 +161,19 @@ class ModelArguments: dtype: Optional[str] = field( default="float32", metadata={ - "help": "Floating-point format in which the model weights should be initialized and trained. Choose one of `[float32, float16, bfloat16]`." + "help": ( + "Floating-point format in which the model weights should be initialized and trained. Choose one of" + " `[float32, float16, bfloat16]`." + ) }, ) use_auth_token: bool = field( default=False, metadata={ - "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script " - "with private models)." + "help": ( + "Will use the token generated when running `transformers-cli login` (necessary to use this script " + "with private models)." + ) }, ) @@ -209,8 +215,10 @@ class DataTrainingArguments: max_seq_length: Optional[int] = field( default=None, metadata={ - "help": "The maximum total input sequence length after tokenization. Sequences longer " - "than this will be truncated. Default to the max input length of the model." + "help": ( + "The maximum total input sequence length after tokenization. Sequences longer " + "than this will be truncated. Default to the max input length of the model." + ) }, ) preprocessing_num_workers: Optional[int] = field( @@ -223,8 +231,10 @@ class DataTrainingArguments: pad_to_max_length: bool = field( default=False, metadata={ - "help": "Whether to pad all samples to `max_seq_length`. " - "If False, will pad the samples dynamically when batching to the maximum length in the batch." + "help": ( + "Whether to pad all samples to `max_seq_length`. " + "If False, will pad the samples dynamically when batching to the maximum length in the batch." + ) }, ) line_by_line: bool = field( @@ -764,7 +774,8 @@ def main(): write_train_metric(summary_writer, train_metrics, train_time, cur_step) epochs.write( - f"Step... ({cur_step} | Loss: {train_metric['loss']}, Learning Rate: {train_metric['learning_rate']})" + f"Step... ({cur_step} | Loss: {train_metric['loss']}, Learning Rate:" + f" {train_metric['learning_rate']})" ) train_metrics = [] diff --git a/examples/flax/language-modeling/run_t5_mlm_flax.py b/examples/flax/language-modeling/run_t5_mlm_flax.py index 368ecf0e61..e5ed47a8a5 100755 --- a/examples/flax/language-modeling/run_t5_mlm_flax.py +++ b/examples/flax/language-modeling/run_t5_mlm_flax.py @@ -135,8 +135,9 @@ class ModelArguments: model_name_or_path: Optional[str] = field( default=None, metadata={ - "help": "The model checkpoint for weights initialization." - "Don't set if you want to train a model from scratch." + "help": ( + "The model checkpoint for weights initialization.Don't set if you want to train a model from scratch." + ) }, ) model_type: Optional[str] = field( @@ -159,14 +160,19 @@ class ModelArguments: dtype: Optional[str] = field( default="float32", metadata={ - "help": "Floating-point format in which the model weights should be initialized and trained. Choose one of `[float32, float16, bfloat16]`." + "help": ( + "Floating-point format in which the model weights should be initialized and trained. Choose one of" + " `[float32, float16, bfloat16]`." + ) }, ) use_auth_token: bool = field( default=False, metadata={ - "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script " - "with private models)." + "help": ( + "Will use the token generated when running `transformers-cli login` (necessary to use this script " + "with private models)." + ) }, ) @@ -208,7 +214,10 @@ class DataTrainingArguments: max_seq_length: Optional[int] = field( default=None, metadata={ - "help": "The maximum total input sequence length after tokenization and masking. Sequences longer than this will be truncated. Default to the max input length of the model." + "help": ( + "The maximum total input sequence length after tokenization and masking. Sequences longer than this" + " will be truncated. Default to the max input length of the model." + ) }, ) preprocessing_num_workers: Optional[int] = field( @@ -337,12 +346,14 @@ class FlaxDataCollatorForT5MLM: if batch["input_ids"].shape[-1] != self.input_length: raise ValueError( - f"`input_ids` are incorrectly preprocessed. `input_ids` length is {batch['input_ids'].shape[-1]}, but should be {self.target_length}." + f"`input_ids` are incorrectly preprocessed. `input_ids` length is {batch['input_ids'].shape[-1]}, but" + f" should be {self.target_length}." ) if batch["labels"].shape[-1] != self.target_length: raise ValueError( - f"`labels` are incorrectly preprocessed. `labels` length is {batch['labels'].shape[-1]}, but should be {self.target_length}." + f"`labels` are incorrectly preprocessed. `labels` length is {batch['labels'].shape[-1]}, but should be" + f" {self.target_length}." ) # to check that tokens are correctly preprocessed, one can run `self.tokenizer.batch_decode(input_ids)` and `self.tokenizer.batch_decode(labels)` here... @@ -884,7 +895,8 @@ def main(): write_train_metric(summary_writer, train_metrics, train_time, cur_step) epochs.write( - f"Step... ({cur_step} | Loss: {train_metric['loss'].mean()}, Learning Rate: {train_metric['learning_rate'].mean()})" + f"Step... ({cur_step} | Loss: {train_metric['loss'].mean()}, Learning Rate:" + f" {train_metric['learning_rate'].mean()})" ) train_metrics = [] diff --git a/examples/flax/question-answering/run_qa.py b/examples/flax/question-answering/run_qa.py index 361da133f9..eafbe876e1 100644 --- a/examples/flax/question-answering/run_qa.py +++ b/examples/flax/question-answering/run_qa.py @@ -157,14 +157,19 @@ class ModelArguments: use_auth_token: bool = field( default=False, metadata={ - "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script " - "with private models)." + "help": ( + "Will use the token generated when running `transformers-cli login` (necessary to use this script " + "with private models)." + ) }, ) dtype: Optional[str] = field( default="float32", metadata={ - "help": "Floating-point format in which the model weights should be initialized and trained. Choose one of `[float32, float16, bfloat16]`." + "help": ( + "Floating-point format in which the model weights should be initialized and trained. Choose one of" + " `[float32, float16, bfloat16]`." + ) }, ) @@ -200,37 +205,46 @@ class DataTrainingArguments: max_seq_length: int = field( default=384, metadata={ - "help": "The maximum total input sequence length after tokenization. Sequences longer " - "than this will be truncated, sequences shorter will be padded." + "help": ( + "The maximum total input sequence length after tokenization. Sequences longer " + "than this will be truncated, sequences shorter will be padded." + ) }, ) pad_to_max_length: bool = field( default=False, metadata={ - "help": "Whether to pad all samples to `max_seq_length`. " - "If False, will pad the samples dynamically when batching to the maximum length in the batch (which can " - "be faster on GPU but will be slower on TPU)." + "help": ( + "Whether to pad all samples to `max_seq_length`. If False, will pad the samples dynamically when" + " batching to the maximum length in the batch (which can be faster on GPU but will be slower on TPU)." + ) }, ) max_train_samples: Optional[int] = field( default=None, metadata={ - "help": "For debugging purposes or quicker training, truncate the number of training examples to this " - "value if set." + "help": ( + "For debugging purposes or quicker training, truncate the number of training examples to this " + "value if set." + ) }, ) max_eval_samples: Optional[int] = field( default=None, metadata={ - "help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this " - "value if set." + "help": ( + "For debugging purposes or quicker training, truncate the number of evaluation examples to this " + "value if set." + ) }, ) max_predict_samples: Optional[int] = field( default=None, metadata={ - "help": "For debugging purposes or quicker training, truncate the number of prediction examples to this " - "value if set." + "help": ( + "For debugging purposes or quicker training, truncate the number of prediction examples to this " + "value if set." + ) }, ) version_2_with_negative: bool = field( @@ -239,9 +253,11 @@ class DataTrainingArguments: null_score_diff_threshold: float = field( default=0.0, metadata={ - "help": "The threshold used to select the null answer: if the best answer has a score that is less than " - "the score of the null answer minus this threshold, the null answer is selected for this example. " - "Only useful when `version_2_with_negative=True`." + "help": ( + "The threshold used to select the null answer: if the best answer has a score that is less than " + "the score of the null answer minus this threshold, the null answer is selected for this example. " + "Only useful when `version_2_with_negative=True`." + ) }, ) doc_stride: int = field( @@ -255,8 +271,10 @@ class DataTrainingArguments: max_answer_length: int = field( default=30, metadata={ - "help": "The maximum length of an answer that can be generated. This is needed because the start " - "and end predictions are not conditioned on one another." + "help": ( + "The maximum length of an answer that can be generated. This is needed because the start " + "and end predictions are not conditioned on one another." + ) }, ) @@ -498,9 +516,9 @@ def main(): # region Tokenizer check: this script requires a fast tokenizer. if not isinstance(tokenizer, PreTrainedTokenizerFast): raise ValueError( - "This example script only works for models that have a fast tokenizer. Checkout the big table of models " - "at https://huggingface.co/transformers/index.html#supported-frameworks to find the model types that meet this " - "requirement" + "This example script only works for models that have a fast tokenizer. Checkout the big table of models at" + " https://huggingface.co/transformers/index.html#supported-frameworks to find the model types that meet" + " this requirement" ) # endregion @@ -928,7 +946,8 @@ def main(): write_train_metric(summary_writer, train_metrics, train_time, cur_step) epochs.write( - f"Step... ({cur_step}/{total_steps} | Training Loss: {train_metric['loss']}, Learning Rate: {train_metric['learning_rate']})" + f"Step... ({cur_step}/{total_steps} | Training Loss: {train_metric['loss']}, Learning Rate:" + f" {train_metric['learning_rate']})" ) train_metrics = [] diff --git a/examples/flax/summarization/run_summarization_flax.py b/examples/flax/summarization/run_summarization_flax.py index 3ebff73b98..9730daa163 100644 --- a/examples/flax/summarization/run_summarization_flax.py +++ b/examples/flax/summarization/run_summarization_flax.py @@ -149,8 +149,9 @@ class ModelArguments: model_name_or_path: Optional[str] = field( default=None, metadata={ - "help": "The model checkpoint for weights initialization." - "Don't set if you want to train a model from scratch." + "help": ( + "The model checkpoint for weights initialization.Don't set if you want to train a model from scratch." + ) }, ) model_type: Optional[str] = field( @@ -173,14 +174,19 @@ class ModelArguments: dtype: Optional[str] = field( default="float32", metadata={ - "help": "Floating-point format in which the model weights should be initialized and trained. Choose one of `[float32, float16, bfloat16]`." + "help": ( + "Floating-point format in which the model weights should be initialized and trained. Choose one of" + " `[float32, float16, bfloat16]`." + ) }, ) use_auth_token: bool = field( default=False, metadata={ - "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script " - "with private models)." + "help": ( + "Will use the token generated when running `transformers-cli login` (necessary to use this script " + "with private models)." + ) }, ) @@ -217,45 +223,57 @@ class DataTrainingArguments: max_source_length: Optional[int] = field( default=1024, metadata={ - "help": "The maximum total input sequence length after tokenization. Sequences longer " - "than this will be truncated, sequences shorter will be padded." + "help": ( + "The maximum total input sequence length after tokenization. Sequences longer " + "than this will be truncated, sequences shorter will be padded." + ) }, ) max_target_length: Optional[int] = field( default=128, metadata={ - "help": "The maximum total sequence length for target text after tokenization. Sequences longer " - "than this will be truncated, sequences shorter will be padded." + "help": ( + "The maximum total sequence length for target text after tokenization. Sequences longer " + "than this will be truncated, sequences shorter will be padded." + ) }, ) val_max_target_length: Optional[int] = field( default=None, metadata={ - "help": "The maximum total sequence length for validation target text after tokenization. Sequences longer " - "than this will be truncated, sequences shorter will be padded. Will default to `max_target_length`." - "This argument is also used to override the `max_length` param of `model.generate`, which is used " - "during evaluation." + "help": ( + "The maximum total sequence length for validation target text after tokenization. Sequences longer " + "than this will be truncated, sequences shorter will be padded. Will default to `max_target_length`." + "This argument is also used to override the `max_length` param of `model.generate`, which is used " + "during evaluation." + ) }, ) max_train_samples: Optional[int] = field( default=None, metadata={ - "help": "For debugging purposes or quicker training, truncate the number of training examples to this " - "value if set." + "help": ( + "For debugging purposes or quicker training, truncate the number of training examples to this " + "value if set." + ) }, ) max_eval_samples: Optional[int] = field( default=None, metadata={ - "help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this " - "value if set." + "help": ( + "For debugging purposes or quicker training, truncate the number of evaluation examples to this " + "value if set." + ) }, ) max_predict_samples: Optional[int] = field( default=None, metadata={ - "help": "For debugging purposes or quicker training, truncate the number of prediction examples to this " - "value if set." + "help": ( + "For debugging purposes or quicker training, truncate the number of prediction examples to this " + "value if set." + ) }, ) preprocessing_num_workers: Optional[int] = field( @@ -271,8 +289,10 @@ class DataTrainingArguments: num_beams: Optional[int] = field( default=None, metadata={ - "help": "Number of beams to use for evaluation. This argument will be passed to `model.generate`, " - "which is used during evaluation." + "help": ( + "Number of beams to use for evaluation. This argument will be passed to `model.generate`, " + "which is used during evaluation." + ) }, ) overwrite_cache: bool = field( @@ -831,7 +851,8 @@ def main(): train_metric = unreplicate(train_metric) epochs.write( - f"Epoch... ({epoch + 1}/{num_epochs} | Loss: {train_metric['loss']}, Learning Rate: {train_metric['learning_rate']})" + f"Epoch... ({epoch + 1}/{num_epochs} | Loss: {train_metric['loss']}, Learning Rate:" + f" {train_metric['learning_rate']})" ) # ======================== Evaluating ============================== diff --git a/examples/flax/text-classification/run_flax_glue.py b/examples/flax/text-classification/run_flax_glue.py index b22e902a4f..59e28397d1 100755 --- a/examples/flax/text-classification/run_flax_glue.py +++ b/examples/flax/text-classification/run_flax_glue.py @@ -103,8 +103,10 @@ class ModelArguments: use_auth_token: bool = field( default=False, metadata={ - "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script " - "with private models)." + "help": ( + "Will use the token generated when running `transformers-cli login` (necessary to use this script " + "with private models)." + ) }, ) @@ -148,29 +150,37 @@ class DataTrainingArguments: max_seq_length: int = field( default=None, metadata={ - "help": "The maximum total input sequence length after tokenization. If set, sequences longer " - "than this will be truncated, sequences shorter will be padded." + "help": ( + "The maximum total input sequence length after tokenization. If set, sequences longer " + "than this will be truncated, sequences shorter will be padded." + ) }, ) max_train_samples: Optional[int] = field( default=None, metadata={ - "help": "For debugging purposes or quicker training, truncate the number of training examples to this " - "value if set." + "help": ( + "For debugging purposes or quicker training, truncate the number of training examples to this " + "value if set." + ) }, ) max_eval_samples: Optional[int] = field( default=None, metadata={ - "help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this " - "value if set." + "help": ( + "For debugging purposes or quicker training, truncate the number of evaluation examples to this " + "value if set." + ) }, ) max_predict_samples: Optional[int] = field( default=None, metadata={ - "help": "For debugging purposes or quicker training, truncate the number of prediction examples to this " - "value if set." + "help": ( + "For debugging purposes or quicker training, truncate the number of prediction examples to this " + "value if set." + ) }, ) @@ -585,7 +595,8 @@ def main(): write_train_metric(summary_writer, train_metrics, train_time, cur_step) epochs.write( - f"Step... ({cur_step}/{total_steps} | Training Loss: {train_metric['loss']}, Learning Rate: {train_metric['learning_rate']})" + f"Step... ({cur_step}/{total_steps} | Training Loss: {train_metric['loss']}, Learning Rate:" + f" {train_metric['learning_rate']})" ) train_metrics = [] diff --git a/examples/flax/token-classification/run_flax_ner.py b/examples/flax/token-classification/run_flax_ner.py index 21f832f9dc..79a1e85fb2 100644 --- a/examples/flax/token-classification/run_flax_ner.py +++ b/examples/flax/token-classification/run_flax_ner.py @@ -150,8 +150,10 @@ class ModelArguments: use_auth_token: bool = field( default=False, metadata={ - "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script " - "with private models)." + "help": ( + "Will use the token generated when running `transformers-cli login` (necessary to use this script " + "with private models)." + ) }, ) @@ -196,36 +198,46 @@ class DataTrainingArguments: max_seq_length: int = field( default=None, metadata={ - "help": "The maximum total input sequence length after tokenization. If set, sequences longer " - "than this will be truncated, sequences shorter will be padded." + "help": ( + "The maximum total input sequence length after tokenization. If set, sequences longer " + "than this will be truncated, sequences shorter will be padded." + ) }, ) max_train_samples: Optional[int] = field( default=None, metadata={ - "help": "For debugging purposes or quicker training, truncate the number of training examples to this " - "value if set." + "help": ( + "For debugging purposes or quicker training, truncate the number of training examples to this " + "value if set." + ) }, ) max_eval_samples: Optional[int] = field( default=None, metadata={ - "help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this " - "value if set." + "help": ( + "For debugging purposes or quicker training, truncate the number of evaluation examples to this " + "value if set." + ) }, ) max_predict_samples: Optional[int] = field( default=None, metadata={ - "help": "For debugging purposes or quicker training, truncate the number of prediction examples to this " - "value if set." + "help": ( + "For debugging purposes or quicker training, truncate the number of prediction examples to this " + "value if set." + ) }, ) label_all_tokens: bool = field( default=False, metadata={ - "help": "Whether to put the label for one word on all tokens of generated by that word or just on the " - "one (in which case the other tokens will have a padding index)." + "help": ( + "Whether to put the label for one word on all tokens of generated by that word or just on the " + "one (in which case the other tokens will have a padding index)." + ) }, ) return_entity_level_metrics: bool = field( @@ -693,7 +705,8 @@ def main(): write_train_metric(summary_writer, train_metrics, train_time, cur_step) epochs.write( - f"Step... ({cur_step}/{total_steps} | Training Loss: {train_metric['loss']}, Learning Rate: {train_metric['learning_rate']})" + f"Step... ({cur_step}/{total_steps} | Training Loss: {train_metric['loss']}, Learning Rate:" + f" {train_metric['learning_rate']})" ) train_metrics = [] @@ -744,7 +757,8 @@ def main(): logger.info(f"Step... ({cur_step}/{total_steps} | Validation metrics: {eval_metrics}") else: logger.info( - f"Step... ({cur_step}/{total_steps} | Validation f1: {eval_metrics['f1']}, Validation Acc: {eval_metrics['accuracy']})" + f"Step... ({cur_step}/{total_steps} | Validation f1: {eval_metrics['f1']}, Validation Acc:" + f" {eval_metrics['accuracy']})" ) if has_tensorboard and jax.process_index() == 0: diff --git a/examples/flax/vision/run_image_classification.py b/examples/flax/vision/run_image_classification.py index 0dc7b2f957..d00f99399a 100644 --- a/examples/flax/vision/run_image_classification.py +++ b/examples/flax/vision/run_image_classification.py @@ -134,8 +134,9 @@ class ModelArguments: model_name_or_path: Optional[str] = field( default=None, metadata={ - "help": "The model checkpoint for weights initialization." - "Don't set if you want to train a model from scratch." + "help": ( + "The model checkpoint for weights initialization.Don't set if you want to train a model from scratch." + ) }, ) model_type: Optional[str] = field( @@ -151,14 +152,19 @@ class ModelArguments: dtype: Optional[str] = field( default="float32", metadata={ - "help": "Floating-point format in which the model weights should be initialized and trained. Choose one of `[float32, float16, bfloat16]`." + "help": ( + "Floating-point format in which the model weights should be initialized and trained. Choose one of" + " `[float32, float16, bfloat16]`." + ) }, ) use_auth_token: bool = field( default=False, metadata={ - "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script " - "with private models)." + "help": ( + "Will use the token generated when running `transformers-cli login` (necessary to use this script " + "with private models)." + ) }, ) @@ -179,15 +185,19 @@ class DataTrainingArguments: max_train_samples: Optional[int] = field( default=None, metadata={ - "help": "For debugging purposes or quicker training, truncate the number of training examples to this " - "value if set." + "help": ( + "For debugging purposes or quicker training, truncate the number of training examples to this " + "value if set." + ) }, ) max_eval_samples: Optional[int] = field( default=None, metadata={ - "help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this " - "value if set." + "help": ( + "For debugging purposes or quicker training, truncate the number of evaluation examples to this " + "value if set." + ) }, ) overwrite_cache: bool = field( @@ -509,7 +519,8 @@ def main(): train_step_progress_bar.close() epochs.write( - f"Epoch... ({epoch + 1}/{num_epochs} | Loss: {train_metric['loss']}, Learning Rate: {train_metric['learning_rate']})" + f"Epoch... ({epoch + 1}/{num_epochs} | Loss: {train_metric['loss']}, Learning Rate:" + f" {train_metric['learning_rate']})" ) # ======================== Evaluating ============================== diff --git a/examples/legacy/multiple_choice/run_multiple_choice.py b/examples/legacy/multiple_choice/run_multiple_choice.py index aeb9b9dc43..d8007da6cb 100644 --- a/examples/legacy/multiple_choice/run_multiple_choice.py +++ b/examples/legacy/multiple_choice/run_multiple_choice.py @@ -78,8 +78,10 @@ class DataTrainingArguments: max_seq_length: int = field( default=128, metadata={ - "help": "The maximum total input sequence length after tokenization. Sequences longer " - "than this will be truncated, sequences shorter will be padded." + "help": ( + "The maximum total input sequence length after tokenization. Sequences longer " + "than this will be truncated, sequences shorter will be padded." + ) }, ) overwrite_cache: bool = field( @@ -102,7 +104,8 @@ def main(): and not training_args.overwrite_output_dir ): raise ValueError( - f"Output directory ({training_args.output_dir}) already exists and is not empty. Use --overwrite_output_dir to overcome." + f"Output directory ({training_args.output_dir}) already exists and is not empty. Use" + " --overwrite_output_dir to overcome." ) # Setup logging diff --git a/examples/legacy/multiple_choice/utils_multiple_choice.py b/examples/legacy/multiple_choice/utils_multiple_choice.py index 2b6b5cc183..3dbc3689cc 100644 --- a/examples/legacy/multiple_choice/utils_multiple_choice.py +++ b/examples/legacy/multiple_choice/utils_multiple_choice.py @@ -182,7 +182,7 @@ if is_tf_available(): ) def gen(): - for (ex_index, ex) in tqdm.tqdm(enumerate(self.features), desc="convert examples to features"): + for ex_index, ex in tqdm.tqdm(enumerate(self.features), desc="convert examples to features"): if ex_index % 10000 == 0: logger.info("Writing example %d of %d" % (ex_index, len(examples))) @@ -297,7 +297,7 @@ class RaceProcessor(DataProcessor): def _create_examples(self, lines, set_type): """Creates examples for the training and dev sets.""" examples = [] - for (_, data_raw) in enumerate(lines): + for _, data_raw in enumerate(lines): race_id = "%s-%s" % (set_type, data_raw["race_id"]) article = data_raw["article"] for i in range(len(data_raw["answers"])): @@ -518,7 +518,7 @@ def convert_examples_to_features( label_map = {label: i for i, label in enumerate(label_list)} features = [] - for (ex_index, example) in tqdm.tqdm(enumerate(examples), desc="convert examples to features"): + for ex_index, example in tqdm.tqdm(enumerate(examples), desc="convert examples to features"): if ex_index % 10000 == 0: logger.info("Writing example %d of %d" % (ex_index, len(examples))) choices_inputs = [] diff --git a/examples/legacy/pytorch-lightning/lightning_base.py b/examples/legacy/pytorch-lightning/lightning_base.py index b7f53076e3..b3104a25a8 100644 --- a/examples/legacy/pytorch-lightning/lightning_base.py +++ b/examples/legacy/pytorch-lightning/lightning_base.py @@ -312,8 +312,10 @@ def add_generic_args(parser, root_dir) -> None: "--fp16_opt_level", type=str, default="O2", - help="For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']." - "See details at https://nvidia.github.io/apex/amp.html", + help=( + "For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']." + "See details at https://nvidia.github.io/apex/amp.html" + ), ) parser.add_argument("--n_tpu_cores", dest="tpu_cores", type=int) parser.add_argument("--max_grad_norm", dest="gradient_clip_val", default=1.0, type=float, help="Max gradient norm") diff --git a/examples/legacy/pytorch-lightning/run_glue.py b/examples/legacy/pytorch-lightning/run_glue.py index abb06bf526..63b58bcf41 100644 --- a/examples/legacy/pytorch-lightning/run_glue.py +++ b/examples/legacy/pytorch-lightning/run_glue.py @@ -148,8 +148,10 @@ class GLUETransformer(BaseTransformer): "--max_seq_length", default=128, type=int, - help="The maximum total input sequence length after tokenization. Sequences longer " - "than this will be truncated, sequences shorter will be padded.", + help=( + "The maximum total input sequence length after tokenization. Sequences longer " + "than this will be truncated, sequences shorter will be padded." + ), ) parser.add_argument( diff --git a/examples/legacy/pytorch-lightning/run_ner.py b/examples/legacy/pytorch-lightning/run_ner.py index 1066c6fed4..b1bdd125c2 100644 --- a/examples/legacy/pytorch-lightning/run_ner.py +++ b/examples/legacy/pytorch-lightning/run_ner.py @@ -173,8 +173,10 @@ class NERTransformer(BaseTransformer): "--max_seq_length", default=128, type=int, - help="The maximum total input sequence length after tokenization. Sequences longer " - "than this will be truncated, sequences shorter will be padded.", + help=( + "The maximum total input sequence length after tokenization. Sequences longer " + "than this will be truncated, sequences shorter will be padded." + ), ) parser.add_argument( diff --git a/examples/legacy/question-answering/run_squad.py b/examples/legacy/question-answering/run_squad.py index fbf2ebd635..674e7a9acc 100644 --- a/examples/legacy/question-answering/run_squad.py +++ b/examples/legacy/question-answering/run_squad.py @@ -551,8 +551,10 @@ def main(): "--max_seq_length", default=384, type=int, - help="The maximum total input sequence length after WordPiece tokenization. Sequences " - "longer than this will be truncated, and sequences shorter than this will be padded.", + help=( + "The maximum total input sequence length after WordPiece tokenization. Sequences " + "longer than this will be truncated, and sequences shorter than this will be padded." + ), ) parser.add_argument( "--doc_stride", @@ -564,8 +566,10 @@ def main(): "--max_query_length", default=64, type=int, - help="The maximum number of tokens for the question. Questions longer than this will " - "be truncated to this length.", + help=( + "The maximum number of tokens for the question. Questions longer than this will " + "be truncated to this length." + ), ) parser.add_argument("--do_train", action="store_true", help="Whether to run training.") parser.add_argument("--do_eval", action="store_true", help="Whether to run eval on the dev set.") @@ -610,20 +614,27 @@ def main(): "--max_answer_length", default=30, type=int, - help="The maximum length of an answer that can be generated. This is needed because the start " - "and end predictions are not conditioned on one another.", + help=( + "The maximum length of an answer that can be generated. This is needed because the start " + "and end predictions are not conditioned on one another." + ), ) parser.add_argument( "--verbose_logging", action="store_true", - help="If true, all of the warnings related to data processing will be printed. " - "A number of warnings are expected for a normal SQuAD evaluation.", + help=( + "If true, all of the warnings related to data processing will be printed. " + "A number of warnings are expected for a normal SQuAD evaluation." + ), ) parser.add_argument( "--lang_id", default=0, type=int, - help="language id of input for language-specific xlm models (see tokenization_xlm.PRETRAINED_INIT_CONFIGURATION)", + help=( + "language id of input for language-specific xlm models (see" + " tokenization_xlm.PRETRAINED_INIT_CONFIGURATION)" + ), ) parser.add_argument("--logging_steps", type=int, default=500, help="Log every X updates steps.") @@ -652,8 +663,10 @@ def main(): "--fp16_opt_level", type=str, default="O1", - help="For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']." - "See details at https://nvidia.github.io/apex/amp.html", + help=( + "For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']." + "See details at https://nvidia.github.io/apex/amp.html" + ), ) parser.add_argument("--server_ip", type=str, default="", help="Can be used for distant debugging.") parser.add_argument("--server_port", type=str, default="", help="Can be used for distant debugging.") diff --git a/examples/legacy/question-answering/run_squad_trainer.py b/examples/legacy/question-answering/run_squad_trainer.py index 7089326372..314b140e82 100644 --- a/examples/legacy/question-answering/run_squad_trainer.py +++ b/examples/legacy/question-answering/run_squad_trainer.py @@ -84,7 +84,8 @@ def main(): and not training_args.overwrite_output_dir ): raise ValueError( - f"Output directory ({training_args.output_dir}) already exists and is not empty. Use --overwrite_output_dir to overcome." + f"Output directory ({training_args.output_dir}) already exists and is not empty. Use" + " --overwrite_output_dir to overcome." ) # Setup logging diff --git a/examples/legacy/run_language_modeling.py b/examples/legacy/run_language_modeling.py index 12b62f5d81..59490f710e 100755 --- a/examples/legacy/run_language_modeling.py +++ b/examples/legacy/run_language_modeling.py @@ -68,7 +68,10 @@ class ModelArguments: model_name_or_path: Optional[str] = field( default=None, metadata={ - "help": "The model checkpoint for weights initialization. Leave None if you want to train a model from scratch." + "help": ( + "The model checkpoint for weights initialization. Leave None if you want to train a model from" + " scratch." + ) }, ) model_type: Optional[str] = field( @@ -99,8 +102,10 @@ class DataTrainingArguments: train_data_files: Optional[str] = field( default=None, metadata={ - "help": "The input training data files (multiple files in glob format). " - "Very often splitting large files to smaller files can prevent tokenizer going out of memory" + "help": ( + "The input training data files (multiple files in glob format). " + "Very often splitting large files to smaller files can prevent tokenizer going out of memory" + ) }, ) eval_data_file: Optional[str] = field( @@ -130,7 +135,10 @@ class DataTrainingArguments: plm_probability: float = field( default=1 / 6, metadata={ - "help": "Ratio of length of a span of masked tokens to surrounding context length for permutation language modeling." + "help": ( + "Ratio of length of a span of masked tokens to surrounding context length for permutation language" + " modeling." + ) }, ) max_span_length: int = field( @@ -140,9 +148,11 @@ class DataTrainingArguments: block_size: int = field( default=-1, metadata={ - "help": "Optional input sequence length after tokenization." - "The training dataset will be truncated in block of this size for training." - "Default to the model max input length for single sentence inputs (take into account special tokens)." + "help": ( + "Optional input sequence length after tokenization." + "The training dataset will be truncated in block of this size for training." + "Default to the model max input length for single sentence inputs (take into account special tokens)." + ) }, ) overwrite_cache: bool = field( @@ -206,7 +216,8 @@ def main(): and not training_args.overwrite_output_dir ): raise ValueError( - f"Output directory ({training_args.output_dir}) already exists and is not empty. Use --overwrite_output_dir to overcome." + f"Output directory ({training_args.output_dir}) already exists and is not empty. Use" + " --overwrite_output_dir to overcome." ) # Setup logging @@ -253,8 +264,8 @@ def main(): tokenizer = AutoTokenizer.from_pretrained(model_args.model_name_or_path, cache_dir=model_args.cache_dir) else: raise ValueError( - "You are instantiating a new tokenizer from scratch. This is not supported, but you can do it from another script, save it," - "and load it from here, using --tokenizer_name" + "You are instantiating a new tokenizer from scratch. This is not supported, but you can do it from another" + " script, save it,and load it from here, using --tokenizer_name" ) if model_args.model_name_or_path: diff --git a/examples/legacy/run_openai_gpt.py b/examples/legacy/run_openai_gpt.py index 2af3e267d2..1f02570f8f 100755 --- a/examples/legacy/run_openai_gpt.py +++ b/examples/legacy/run_openai_gpt.py @@ -126,15 +126,15 @@ def main(): "--max_steps", default=-1, type=int, - help="If > 0: set total number of training \ - steps to perform. Override num_train_epochs.", + help=( + "If > 0: set total number of training steps to perform. Override num_train_epochs." + ), ) parser.add_argument( "--gradient_accumulation_steps", type=int, default=1, - help="Number of updates steps to accumulate before\ - performing a backward/update pass.", + help="Number of updates steps to accumulate before performing a backward/update pass.", ) parser.add_argument("--learning_rate", type=float, default=6.25e-5) parser.add_argument("--warmup_steps", default=0, type=int, help="Linear warmup over warmup_steps.") diff --git a/examples/legacy/run_swag.py b/examples/legacy/run_swag.py index e776041089..5cac156724 100755 --- a/examples/legacy/run_swag.py +++ b/examples/legacy/run_swag.py @@ -516,8 +516,10 @@ def main(): "--max_seq_length", default=384, type=int, - help="The maximum total input sequence length after tokenization. Sequences " - "longer than this will be truncated, and sequences shorter than this will be padded.", + help=( + "The maximum total input sequence length after tokenization. Sequences " + "longer than this will be truncated, and sequences shorter than this will be padded." + ), ) parser.add_argument("--do_train", action="store_true", help="Whether to run training.") parser.add_argument("--do_eval", action="store_true", help="Whether to run eval on the dev set.") @@ -576,8 +578,10 @@ def main(): "--fp16_opt_level", type=str, default="O1", - help="For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']." - "See details at https://nvidia.github.io/apex/amp.html", + help=( + "For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']." + "See details at https://nvidia.github.io/apex/amp.html" + ), ) parser.add_argument("--server_ip", type=str, default="", help="Can be used for distant debugging.") parser.add_argument("--server_port", type=str, default="", help="Can be used for distant debugging.") diff --git a/examples/legacy/seq2seq/finetune_trainer.py b/examples/legacy/seq2seq/finetune_trainer.py index 3efc8f90f2..f174f7fb50 100755 --- a/examples/legacy/seq2seq/finetune_trainer.py +++ b/examples/legacy/seq2seq/finetune_trainer.py @@ -90,31 +90,39 @@ class DataTrainingArguments: max_source_length: Optional[int] = field( default=1024, metadata={ - "help": "The maximum total input sequence length after tokenization. Sequences longer " - "than this will be truncated, sequences shorter will be padded." + "help": ( + "The maximum total input sequence length after tokenization. Sequences longer " + "than this will be truncated, sequences shorter will be padded." + ) }, ) max_target_length: Optional[int] = field( default=128, metadata={ - "help": "The maximum total sequence length for target text after tokenization. Sequences longer " - "than this will be truncated, sequences shorter will be padded." + "help": ( + "The maximum total sequence length for target text after tokenization. Sequences longer " + "than this will be truncated, sequences shorter will be padded." + ) }, ) val_max_target_length: Optional[int] = field( default=142, metadata={ - "help": "The maximum total sequence length for validation target text after tokenization. Sequences longer " - "than this will be truncated, sequences shorter will be padded. " - "This argument is also used to override the ``max_length`` param of ``model.generate``, which is used " - "during ``evaluate`` and ``predict``." + "help": ( + "The maximum total sequence length for validation target text after tokenization. Sequences longer " + "than this will be truncated, sequences shorter will be padded. " + "This argument is also used to override the ``max_length`` param of ``model.generate``, which is used " + "during ``evaluate`` and ``predict``." + ) }, ) test_max_target_length: Optional[int] = field( default=142, metadata={ - "help": "The maximum total sequence length for test target text after tokenization. Sequences longer " - "than this will be truncated, sequences shorter will be padded." + "help": ( + "The maximum total sequence length for test target text after tokenization. Sequences longer " + "than this will be truncated, sequences shorter will be padded." + ) }, ) n_train: Optional[int] = field(default=-1, metadata={"help": "# training examples. -1 means use all."}) diff --git a/examples/legacy/seq2seq/old_test_calculate_rouge.py b/examples/legacy/seq2seq/old_test_calculate_rouge.py index bd1dd57a27..17b87cb481 100644 --- a/examples/legacy/seq2seq/old_test_calculate_rouge.py +++ b/examples/legacy/seq2seq/old_test_calculate_rouge.py @@ -22,15 +22,30 @@ from utils import calculate_rouge PRED = [ - 'Prosecutor: "No videos were used in the crash investigation" German papers say they saw a cell phone video of the final seconds on board Flight 9525. The Germanwings co-pilot says he had a "previous episode of severe depression" German airline confirms it knew of Andreas Lubitz\'s depression years before he took control.', - "The Palestinian Authority officially becomes the 123rd member of the International Criminal Court. The formal accession was marked with a ceremony at The Hague, in the Netherlands. The Palestinians signed the ICC's founding Rome Statute in January. Israel and the United States opposed the Palestinians' efforts to join the body.", - "Amnesty International releases its annual report on the death penalty. The report catalogs the use of state-sanctioned killing as a punitive measure across the globe. At least 607 people were executed around the world in 2014, compared to 778 in 2013. The U.S. remains one of the worst offenders for imposing capital punishment.", + 'Prosecutor: "No videos were used in the crash investigation" German papers say they saw a cell phone video of the' + ' final seconds on board Flight 9525. The Germanwings co-pilot says he had a "previous episode of severe' + " depression\" German airline confirms it knew of Andreas Lubitz's depression years before he took control.", + "The Palestinian Authority officially becomes the 123rd member of the International Criminal Court. The formal" + " accession was marked with a ceremony at The Hague, in the Netherlands. The Palestinians signed the ICC's" + " founding Rome Statute in January. Israel and the United States opposed the Palestinians' efforts to join the" + " body.", + "Amnesty International releases its annual report on the death penalty. The report catalogs the use of" + " state-sanctioned killing as a punitive measure across the globe. At least 607 people were executed around the" + " world in 2014, compared to 778 in 2013. The U.S. remains one of the worst offenders for imposing capital" + " punishment.", ] TGT = [ - 'Marseille prosecutor says "so far no videos were used in the crash investigation" despite media reports . Journalists at Bild and Paris Match are "very confident" the video clip is real, an editor says . Andreas Lubitz had informed his Lufthansa training school of an episode of severe depression, airline says .', - "Membership gives the ICC jurisdiction over alleged crimes committed in Palestinian territories since last June . Israel and the United States opposed the move, which could open the door to war crimes investigations against Israelis .", - "Amnesty's annual death penalty report catalogs encouraging signs, but setbacks in numbers of those sentenced to death . Organization claims that governments around the world are using the threat of terrorism to advance executions . The number of executions worldwide has gone down by almost 22% compared with 2013, but death sentences up by 28% .", + 'Marseille prosecutor says "so far no videos were used in the crash investigation" despite media reports .' + ' Journalists at Bild and Paris Match are "very confident" the video clip is real, an editor says . Andreas Lubitz' + " had informed his Lufthansa training school of an episode of severe depression, airline says .", + "Membership gives the ICC jurisdiction over alleged crimes committed in Palestinian territories since last June ." + " Israel and the United States opposed the move, which could open the door to war crimes investigations against" + " Israelis .", + "Amnesty's annual death penalty report catalogs encouraging signs, but setbacks in numbers of those sentenced to" + " death . Organization claims that governments around the world are using the threat of terrorism to advance" + " executions . The number of executions worldwide has gone down by almost 22% compared with 2013, but death" + " sentences up by 28% .", ] @@ -65,7 +80,8 @@ def test_single_sent_scores_dont_depend_on_newline_sep(): ] tgt = [ "Margot Frank, died in 1945, a month earlier than previously thought.", - 'Prosecutor: "No videos were used in the crash investigation" German papers say they saw a cell phone video of the final seconds on board Flight 9525.', + 'Prosecutor: "No videos were used in the crash investigation" German papers say they saw a cell phone video of' + " the final seconds on board Flight 9525.", ] assert calculate_rouge(pred, tgt, newline_sep=True) == calculate_rouge(pred, tgt, newline_sep=False) diff --git a/examples/legacy/seq2seq/run_eval.py b/examples/legacy/seq2seq/run_eval.py index e21f57c1c6..a8aa8e7ef9 100755 --- a/examples/legacy/seq2seq/run_eval.py +++ b/examples/legacy/seq2seq/run_eval.py @@ -121,7 +121,10 @@ def run_generate(verbose=True): nargs="?", type=str, const=datetime_now(), - help="use in conjunction w/ --dump-args to print with the results whatever other info you'd like, e.g. lang=en-ru. If no value is passed, the current datetime string will be used.", + help=( + "use in conjunction w/ --dump-args to print with the results whatever other info you'd like, e.g." + " lang=en-ru. If no value is passed, the current datetime string will be used." + ), ) # Unspecified args like --num_beams=2 --decoder_start_token_id=4 are passed to model.generate args, rest = parser.parse_known_args() diff --git a/examples/legacy/seq2seq/run_eval_search.py b/examples/legacy/seq2seq/run_eval_search.py index f7b3bda0f5..e1a0c8660c 100755 --- a/examples/legacy/seq2seq/run_eval_search.py +++ b/examples/legacy/seq2seq/run_eval_search.py @@ -35,7 +35,7 @@ def parse_search_arg(search): groups = search.split() entries = {k: vs for k, vs in (g.split("=") for g in groups)} entry_names = list(entries.keys()) - sets = [list((f"--{k} {v}") for v in vs.split(":")) for k, vs in entries.items()] + sets = [list(f"--{k} {v}" for v in vs.split(":")) for k, vs in entries.items()] matrix = [list(x) for x in itertools.product(*sets)] return matrix, entry_names @@ -66,7 +66,10 @@ def run_search(): prog = sys.argv[0] parser = argparse.ArgumentParser( - usage="\n\nImportant: this script accepts all arguments `run_eval.py` accepts and then a few extra, therefore refer to `run_eval.py -h` for the complete list." + usage=( + "\n\nImportant: this script accepts all arguments `run_eval.py` accepts and then a few extra, therefore" + " refer to `run_eval.py -h` for the complete list." + ) ) parser.add_argument( "--search", @@ -83,7 +86,10 @@ def run_search(): nargs="?", type=str, const=datetime_now(), - help="add custom notes to be printed before the results table. If no value is passed, the current datetime string will be used.", + help=( + "add custom notes to be printed before the results table. If no value is passed, the current datetime" + " string will be used." + ), ) args, args_main = parser.parse_known_args() # we share some of the args diff --git a/examples/legacy/seq2seq/seq2seq_trainer.py b/examples/legacy/seq2seq/seq2seq_trainer.py index eeff082499..dbf12725f2 100644 --- a/examples/legacy/seq2seq/seq2seq_trainer.py +++ b/examples/legacy/seq2seq/seq2seq_trainer.py @@ -57,9 +57,10 @@ class Seq2SeqTrainer(Trainer): super().__init__(*args, **kwargs) if config is None: - assert isinstance( - self.model, PreTrainedModel - ), f"If no `config` is passed the model to be trained has to be of type `PreTrainedModel`, but is {self.model.__class__}" + assert isinstance(self.model, PreTrainedModel), ( + "If no `config` is passed the model to be trained has to be of type `PreTrainedModel`, but is" + f" {self.model.__class__}" + ) self.config = self.model.config else: self.config = config @@ -68,13 +69,15 @@ class Seq2SeqTrainer(Trainer): self.vocab_size = self.config.tgt_vocab_size if isinstance(self.config, FSMTConfig) else self.config.vocab_size if self.args.label_smoothing != 0 or (self.data_args is not None and self.data_args.ignore_pad_token_for_loss): - assert ( - self.config.pad_token_id is not None - ), "Make sure that `config.pad_token_id` is correcly defined when ignoring `pad_token` for loss calculation or doing label smoothing." + assert self.config.pad_token_id is not None, ( + "Make sure that `config.pad_token_id` is correcly defined when ignoring `pad_token` for loss" + " calculation or doing label smoothing." + ) if self.config.pad_token_id is None and self.config.eos_token_id is not None: logger.warning( - f"The `config.pad_token_id` is `None`. Using `config.eos_token_id` = {self.config.eos_token_id} for padding.." + f"The `config.pad_token_id` is `None`. Using `config.eos_token_id` = {self.config.eos_token_id} for" + " padding.." ) if self.args.label_smoothing == 0: @@ -248,7 +251,8 @@ class Seq2SeqTrainer(Trainer): if pad_token_id is None: raise ValueError( - f"Make sure that either `config.pad_token_id` or `config.eos_token_id` is defined if tensor has to be padded to `max_length`={max_length}" + "Make sure that either `config.pad_token_id` or `config.eos_token_id` is defined if tensor has to be" + f" padded to `max_length`={max_length}" ) padded_tensor = pad_token_id * torch.ones( diff --git a/examples/legacy/seq2seq/xla_spawn.py b/examples/legacy/seq2seq/xla_spawn.py index d84b419945..5df6bfa2d5 100644 --- a/examples/legacy/seq2seq/xla_spawn.py +++ b/examples/legacy/seq2seq/xla_spawn.py @@ -39,9 +39,7 @@ def parse_args(): """ parser = ArgumentParser( description=( - "PyTorch TPU distributed training launch " - "helper utility that will spawn up " - "multiple distributed processes" + "PyTorch TPU distributed training launch helper utility that will spawn up multiple distributed processes" ) ) diff --git a/examples/legacy/text-classification/run_tf_text_classification.py b/examples/legacy/text-classification/run_tf_text_classification.py index 3564775f30..1f845db04c 100755 --- a/examples/legacy/text-classification/run_tf_text_classification.py +++ b/examples/legacy/text-classification/run_tf_text_classification.py @@ -168,8 +168,10 @@ class DataTrainingArguments: max_seq_length: int = field( default=128, metadata={ - "help": "The maximum total input sequence length after tokenization. Sequences longer " - "than this will be truncated, sequences shorter will be padded." + "help": ( + "The maximum total input sequence length after tokenization. Sequences longer " + "than this will be truncated, sequences shorter will be padded." + ) }, ) overwrite_cache: bool = field( @@ -215,7 +217,8 @@ def main(): and not training_args.overwrite_output_dir ): raise ValueError( - f"Output directory ({training_args.output_dir}) already exists and is not empty. Use --overwrite_output_dir to overcome." + f"Output directory ({training_args.output_dir}) already exists and is not empty. Use" + " --overwrite_output_dir to overcome." ) # Setup logging diff --git a/examples/legacy/token-classification/run_ner.py b/examples/legacy/token-classification/run_ner.py index a653ecb91c..477ccb50fb 100644 --- a/examples/legacy/token-classification/run_ner.py +++ b/examples/legacy/token-classification/run_ner.py @@ -87,8 +87,10 @@ class DataTrainingArguments: max_seq_length: int = field( default=128, metadata={ - "help": "The maximum total input sequence length after tokenization. Sequences longer " - "than this will be truncated, sequences shorter will be padded." + "help": ( + "The maximum total input sequence length after tokenization. Sequences longer " + "than this will be truncated, sequences shorter will be padded." + ) }, ) overwrite_cache: bool = field( @@ -116,7 +118,8 @@ def main(): and not training_args.overwrite_output_dir ): raise ValueError( - f"Output directory ({training_args.output_dir}) already exists and is not empty. Use --overwrite_output_dir to overcome." + f"Output directory ({training_args.output_dir}) already exists and is not empty. Use" + " --overwrite_output_dir to overcome." ) module = import_module("tasks") diff --git a/examples/legacy/token-classification/run_tf_ner.py b/examples/legacy/token-classification/run_tf_ner.py index 0169a10f24..857d777238 100755 --- a/examples/legacy/token-classification/run_tf_ner.py +++ b/examples/legacy/token-classification/run_tf_ner.py @@ -88,8 +88,10 @@ class DataTrainingArguments: max_seq_length: int = field( default=128, metadata={ - "help": "The maximum total input sequence length after tokenization. Sequences longer " - "than this will be truncated, sequences shorter will be padded." + "help": ( + "The maximum total input sequence length after tokenization. Sequences longer " + "than this will be truncated, sequences shorter will be padded." + ) }, ) overwrite_cache: bool = field( @@ -111,7 +113,8 @@ def main(): and not training_args.overwrite_output_dir ): raise ValueError( - f"Output directory ({training_args.output_dir}) already exists and is not empty. Use --overwrite_output_dir to overcome." + f"Output directory ({training_args.output_dir}) already exists and is not empty. Use" + " --overwrite_output_dir to overcome." ) module = import_module("tasks") diff --git a/examples/legacy/token-classification/utils_ner.py b/examples/legacy/token-classification/utils_ner.py index 2537aecfca..e1fb4d18c7 100644 --- a/examples/legacy/token-classification/utils_ner.py +++ b/examples/legacy/token-classification/utils_ner.py @@ -103,7 +103,7 @@ class TokenClassificationTask: label_map = {label: i for i, label in enumerate(label_list)} features = [] - for (ex_index, example) in enumerate(examples): + for ex_index, example in enumerate(examples): if ex_index % 10_000 == 0: logger.info("Writing example %d of %d", ex_index, len(examples)) diff --git a/examples/pytorch/audio-classification/run_audio_classification.py b/examples/pytorch/audio-classification/run_audio_classification.py index 6ab9b66f94..c9d682315e 100644 --- a/examples/pytorch/audio-classification/run_audio_classification.py +++ b/examples/pytorch/audio-classification/run_audio_classification.py @@ -86,8 +86,9 @@ class DataTrainingArguments: eval_split_name: str = field( default="validation", metadata={ - "help": "The name of the training data set split to use (via the datasets library). Defaults to " - "'validation'" + "help": ( + "The name of the training data set split to use (via the datasets library). Defaults to 'validation'" + ) }, ) audio_column_name: str = field( @@ -100,15 +101,19 @@ class DataTrainingArguments: max_train_samples: Optional[int] = field( default=None, metadata={ - "help": "For debugging purposes or quicker training, truncate the number of training examples to this " - "value if set." + "help": ( + "For debugging purposes or quicker training, truncate the number of training examples to this " + "value if set." + ) }, ) max_eval_samples: Optional[int] = field( default=None, metadata={ - "help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this " - "value if set." + "help": ( + "For debugging purposes or quicker training, truncate the number of evaluation examples to this " + "value if set." + ) }, ) max_length_seconds: float = field( @@ -149,8 +154,10 @@ class ModelArguments: use_auth_token: bool = field( default=False, metadata={ - "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script " - "with private models)." + "help": ( + "Will use the token generated when running `transformers-cli login` (necessary to use this script " + "with private models)." + ) }, ) freeze_feature_extractor: Optional[bool] = field( diff --git a/examples/pytorch/contrastive-image-text/run_clip.py b/examples/pytorch/contrastive-image-text/run_clip.py index b9506592a5..ff8944c705 100644 --- a/examples/pytorch/contrastive-image-text/run_clip.py +++ b/examples/pytorch/contrastive-image-text/run_clip.py @@ -89,8 +89,10 @@ class ModelArguments: use_auth_token: bool = field( default=False, metadata={ - "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script " - "with private models)." + "help": ( + "Will use the token generated when running `transformers-cli login` (necessary to use this script " + "with private models)." + ) }, ) freeze_vision_model: bool = field( @@ -132,22 +134,28 @@ class DataTrainingArguments: max_seq_length: Optional[int] = field( default=128, metadata={ - "help": "The maximum total input sequence length after tokenization. Sequences longer " - "than this will be truncated, sequences shorter will be padded." + "help": ( + "The maximum total input sequence length after tokenization. Sequences longer " + "than this will be truncated, sequences shorter will be padded." + ) }, ) max_train_samples: Optional[int] = field( default=None, metadata={ - "help": "For debugging purposes or quicker training, truncate the number of training examples to this " - "value if set." + "help": ( + "For debugging purposes or quicker training, truncate the number of training examples to this " + "value if set." + ) }, ) max_eval_samples: Optional[int] = field( default=None, metadata={ - "help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this " - "value if set." + "help": ( + "For debugging purposes or quicker training, truncate the number of evaluation examples to this " + "value if set." + ) }, ) overwrite_cache: bool = field( diff --git a/examples/pytorch/image-classification/run_image_classification.py b/examples/pytorch/image-classification/run_image_classification.py index 48ead53dd9..e3fb769bc4 100644 --- a/examples/pytorch/image-classification/run_image_classification.py +++ b/examples/pytorch/image-classification/run_image_classification.py @@ -93,15 +93,19 @@ class DataTrainingArguments: max_train_samples: Optional[int] = field( default=None, metadata={ - "help": "For debugging purposes or quicker training, truncate the number of training examples to this " - "value if set." + "help": ( + "For debugging purposes or quicker training, truncate the number of training examples to this " + "value if set." + ) }, ) max_eval_samples: Optional[int] = field( default=None, metadata={ - "help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this " - "value if set." + "help": ( + "For debugging purposes or quicker training, truncate the number of evaluation examples to this " + "value if set." + ) }, ) @@ -140,8 +144,10 @@ class ModelArguments: use_auth_token: bool = field( default=False, metadata={ - "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script " - "with private models)." + "help": ( + "Will use the token generated when running `transformers-cli login` (necessary to use this script " + "with private models)." + ) }, ) diff --git a/examples/pytorch/image-classification/run_image_classification_no_trainer.py b/examples/pytorch/image-classification/run_image_classification_no_trainer.py index daf67015bf..5120c21753 100644 --- a/examples/pytorch/image-classification/run_image_classification_no_trainer.py +++ b/examples/pytorch/image-classification/run_image_classification_no_trainer.py @@ -62,7 +62,10 @@ def parse_args(): "--dataset_name", type=str, default="cifar10", - help="The name of the Dataset (from the HuggingFace hub) to train on (could be your own, possibly private, dataset).", + help=( + "The name of the Dataset (from the HuggingFace hub) to train on (could be your own, possibly private," + " dataset)." + ), ) parser.add_argument("--train_dir", type=str, default=None, help="A folder containing the training data.") parser.add_argument("--validation_dir", type=str, default=None, help="A folder containing the validation data.") @@ -70,15 +73,19 @@ def parse_args(): "--max_train_samples", type=int, default=None, - help="For debugging purposes or quicker training, truncate the number of training examples to this " - "value if set.", + help=( + "For debugging purposes or quicker training, truncate the number of training examples to this " + "value if set." + ), ) parser.add_argument( "--max_eval_samples", type=int, default=None, - help="For debugging purposes or quicker training, truncate the number of evaluation examples to this " - "value if set.", + help=( + "For debugging purposes or quicker training, truncate the number of evaluation examples to this " + "value if set." + ), ) parser.add_argument( "--train_val_split", diff --git a/examples/pytorch/image-pretraining/run_mae.py b/examples/pytorch/image-pretraining/run_mae.py index d7df24ac31..4765ed26f3 100644 --- a/examples/pytorch/image-pretraining/run_mae.py +++ b/examples/pytorch/image-pretraining/run_mae.py @@ -74,15 +74,19 @@ class DataTrainingArguments: max_train_samples: Optional[int] = field( default=None, metadata={ - "help": "For debugging purposes or quicker training, truncate the number of training examples to this " - "value if set." + "help": ( + "For debugging purposes or quicker training, truncate the number of training examples to this " + "value if set." + ) }, ) max_eval_samples: Optional[int] = field( default=None, metadata={ - "help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this " - "value if set." + "help": ( + "For debugging purposes or quicker training, truncate the number of evaluation examples to this " + "value if set." + ) }, ) @@ -104,8 +108,9 @@ class ModelArguments: model_name_or_path: str = field( default=None, metadata={ - "help": "The model checkpoint for weights initialization." - "Don't set if you want to train a model from scratch." + "help": ( + "The model checkpoint for weights initialization.Don't set if you want to train a model from scratch." + ) }, ) config_name: Optional[str] = field( @@ -114,8 +119,10 @@ class ModelArguments: config_overrides: Optional[str] = field( default=None, metadata={ - "help": "Override some existing default config settings when a model is trained from scratch. Example: " - "n_embd=10,resid_pdrop=0.2,scale_attn_weights=false,summary_type=cls_index" + "help": ( + "Override some existing default config settings when a model is trained from scratch. Example: " + "n_embd=10,resid_pdrop=0.2,scale_attn_weights=false,summary_type=cls_index" + ) }, ) cache_dir: Optional[str] = field( @@ -129,8 +136,10 @@ class ModelArguments: use_auth_token: bool = field( default=False, metadata={ - "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script " - "with private models)." + "help": ( + "Will use the token generated when running `transformers-cli login` (necessary to use this script " + "with private models)." + ) }, ) mask_ratio: float = field( diff --git a/examples/pytorch/image-pretraining/run_mim.py b/examples/pytorch/image-pretraining/run_mim.py index ef6b0369a0..429c726bca 100644 --- a/examples/pytorch/image-pretraining/run_mim.py +++ b/examples/pytorch/image-pretraining/run_mim.py @@ -87,15 +87,19 @@ class DataTrainingArguments: max_train_samples: Optional[int] = field( default=None, metadata={ - "help": "For debugging purposes or quicker training, truncate the number of training examples to this " - "value if set." + "help": ( + "For debugging purposes or quicker training, truncate the number of training examples to this " + "value if set." + ) }, ) max_eval_samples: Optional[int] = field( default=None, metadata={ - "help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this " - "value if set." + "help": ( + "For debugging purposes or quicker training, truncate the number of evaluation examples to this " + "value if set." + ) }, ) @@ -117,9 +121,11 @@ class ModelArguments: model_name_or_path: str = field( default=None, metadata={ - "help": "The model checkpoint for weights initialization. Can be a local path to a pytorch_model.bin or a " - "checkpoint identifier on the hub. " - "Don't set if you want to train a model from scratch." + "help": ( + "The model checkpoint for weights initialization. Can be a local path to a pytorch_model.bin or a " + "checkpoint identifier on the hub. " + "Don't set if you want to train a model from scratch." + ) }, ) model_type: Optional[str] = field( @@ -132,8 +138,10 @@ class ModelArguments: config_overrides: Optional[str] = field( default=None, metadata={ - "help": "Override some existing default config settings when a model is trained from scratch. Example: " - "n_embd=10,resid_pdrop=0.2,scale_attn_weights=false,summary_type=cls_index" + "help": ( + "Override some existing default config settings when a model is trained from scratch. Example: " + "n_embd=10,resid_pdrop=0.2,scale_attn_weights=false,summary_type=cls_index" + ) }, ) cache_dir: Optional[str] = field( @@ -148,20 +156,26 @@ class ModelArguments: use_auth_token: bool = field( default=False, metadata={ - "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script " - "with private models)." + "help": ( + "Will use the token generated when running `transformers-cli login` (necessary to use this script " + "with private models)." + ) }, ) image_size: Optional[int] = field( default=None, metadata={ - "help": "The size (resolution) of each image. If not specified, will use `image_size` of the configuration." + "help": ( + "The size (resolution) of each image. If not specified, will use `image_size` of the configuration." + ) }, ) patch_size: Optional[int] = field( default=None, metadata={ - "help": "The size (resolution) of each patch. If not specified, will use `patch_size` of the configuration." + "help": ( + "The size (resolution) of each patch. If not specified, will use `patch_size` of the configuration." + ) }, ) encoder_stride: Optional[int] = field( diff --git a/examples/pytorch/language-modeling/run_clm.py b/examples/pytorch/language-modeling/run_clm.py index 481e814e06..9661ed217d 100755 --- a/examples/pytorch/language-modeling/run_clm.py +++ b/examples/pytorch/language-modeling/run_clm.py @@ -73,8 +73,9 @@ class ModelArguments: model_name_or_path: Optional[str] = field( default=None, metadata={ - "help": "The model checkpoint for weights initialization." - "Don't set if you want to train a model from scratch." + "help": ( + "The model checkpoint for weights initialization.Don't set if you want to train a model from scratch." + ) }, ) model_type: Optional[str] = field( @@ -84,8 +85,10 @@ class ModelArguments: config_overrides: Optional[str] = field( default=None, metadata={ - "help": "Override some existing default config settings when a model is trained from scratch. Example: " - "n_embd=10,resid_pdrop=0.2,scale_attn_weights=false,summary_type=cls_index" + "help": ( + "Override some existing default config settings when a model is trained from scratch. Example: " + "n_embd=10,resid_pdrop=0.2,scale_attn_weights=false,summary_type=cls_index" + ) }, ) config_name: Optional[str] = field( @@ -109,8 +112,10 @@ class ModelArguments: use_auth_token: bool = field( default=False, metadata={ - "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script " - "with private models)." + "help": ( + "Will use the token generated when running `transformers-cli login` (necessary to use this script " + "with private models)." + ) }, ) @@ -141,24 +146,30 @@ class DataTrainingArguments: max_train_samples: Optional[int] = field( default=None, metadata={ - "help": "For debugging purposes or quicker training, truncate the number of training examples to this " - "value if set." + "help": ( + "For debugging purposes or quicker training, truncate the number of training examples to this " + "value if set." + ) }, ) max_eval_samples: Optional[int] = field( default=None, metadata={ - "help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this " - "value if set." + "help": ( + "For debugging purposes or quicker training, truncate the number of evaluation examples to this " + "value if set." + ) }, ) block_size: Optional[int] = field( default=None, metadata={ - "help": "Optional input sequence length after tokenization. " - "The training dataset will be truncated in block of this size for training. " - "Default to the model max input length for single sentence inputs (take into account special tokens)." + "help": ( + "Optional input sequence length after tokenization. " + "The training dataset will be truncated in block of this size for training. " + "Default to the model max input length for single sentence inputs (take into account special tokens)." + ) }, ) overwrite_cache: bool = field( @@ -390,7 +401,8 @@ def main(): # clm input could be much much longer than block_size if "Token indices sequence length is longer than the" in cl.out: tok_logger.warning( - "^^^^^^^^^^^^^^^^ Please ignore the warning above - this long input will be chunked into smaller bits before being passed to the model." + "^^^^^^^^^^^^^^^^ Please ignore the warning above - this long input will be chunked into smaller bits" + " before being passed to the model." ) return output diff --git a/examples/pytorch/language-modeling/run_clm_no_trainer.py b/examples/pytorch/language-modeling/run_clm_no_trainer.py index e9ac967c56..5bf8f83c87 100755 --- a/examples/pytorch/language-modeling/run_clm_no_trainer.py +++ b/examples/pytorch/language-modeling/run_clm_no_trainer.py @@ -168,7 +168,11 @@ def parse_args(): "--block_size", type=int, default=None, - help="Optional input sequence length after tokenization. The training dataset will be truncated in block of this size for training. Default to the model max input length for single sentence inputs (take into account special tokens).", + help=( + "Optional input sequence length after tokenization. The training dataset will be truncated in block of" + " this size for training. Default to the model max input length for single sentence inputs (take into" + " account special tokens)." + ), ) parser.add_argument( "--preprocessing_num_workers", diff --git a/examples/pytorch/language-modeling/run_mlm.py b/examples/pytorch/language-modeling/run_mlm.py index 6be4b5d396..4b7b7e1b69 100755 --- a/examples/pytorch/language-modeling/run_mlm.py +++ b/examples/pytorch/language-modeling/run_mlm.py @@ -70,8 +70,9 @@ class ModelArguments: model_name_or_path: Optional[str] = field( default=None, metadata={ - "help": "The model checkpoint for weights initialization." - "Don't set if you want to train a model from scratch." + "help": ( + "The model checkpoint for weights initialization.Don't set if you want to train a model from scratch." + ) }, ) model_type: Optional[str] = field( @@ -81,8 +82,10 @@ class ModelArguments: config_overrides: Optional[str] = field( default=None, metadata={ - "help": "Override some existing default config settings when a model is trained from scratch. Example: " - "n_embd=10,resid_pdrop=0.2,scale_attn_weights=false,summary_type=cls_index" + "help": ( + "Override some existing default config settings when a model is trained from scratch. Example: " + "n_embd=10,resid_pdrop=0.2,scale_attn_weights=false,summary_type=cls_index" + ) }, ) config_name: Optional[str] = field( @@ -106,8 +109,10 @@ class ModelArguments: use_auth_token: bool = field( default=False, metadata={ - "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script " - "with private models)." + "help": ( + "Will use the token generated when running `transformers-cli login` (necessary to use this script " + "with private models)." + ) }, ) @@ -147,8 +152,10 @@ class DataTrainingArguments: max_seq_length: Optional[int] = field( default=None, metadata={ - "help": "The maximum total input sequence length after tokenization. Sequences longer " - "than this will be truncated." + "help": ( + "The maximum total input sequence length after tokenization. Sequences longer " + "than this will be truncated." + ) }, ) preprocessing_num_workers: Optional[int] = field( @@ -165,22 +172,28 @@ class DataTrainingArguments: pad_to_max_length: bool = field( default=False, metadata={ - "help": "Whether to pad all samples to `max_seq_length`. " - "If False, will pad the samples dynamically when batching to the maximum length in the batch." + "help": ( + "Whether to pad all samples to `max_seq_length`. " + "If False, will pad the samples dynamically when batching to the maximum length in the batch." + ) }, ) max_train_samples: Optional[int] = field( default=None, metadata={ - "help": "For debugging purposes or quicker training, truncate the number of training examples to this " - "value if set." + "help": ( + "For debugging purposes or quicker training, truncate the number of training examples to this " + "value if set." + ) }, ) max_eval_samples: Optional[int] = field( default=None, metadata={ - "help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this " - "value if set." + "help": ( + "For debugging purposes or quicker training, truncate the number of evaluation examples to this " + "value if set." + ) }, ) diff --git a/examples/pytorch/language-modeling/run_mlm_no_trainer.py b/examples/pytorch/language-modeling/run_mlm_no_trainer.py index d6a8c1691e..32135dc219 100755 --- a/examples/pytorch/language-modeling/run_mlm_no_trainer.py +++ b/examples/pytorch/language-modeling/run_mlm_no_trainer.py @@ -171,7 +171,9 @@ def parse_args(): "--max_seq_length", type=int, default=None, - help="The maximum total input sequence length after tokenization. Sequences longer than this will be truncated.", + help=( + "The maximum total input sequence length after tokenization. Sequences longer than this will be truncated." + ), ) parser.add_argument( "--line_by_line", diff --git a/examples/pytorch/language-modeling/run_plm.py b/examples/pytorch/language-modeling/run_plm.py index c76dfe533b..8808953d97 100755 --- a/examples/pytorch/language-modeling/run_plm.py +++ b/examples/pytorch/language-modeling/run_plm.py @@ -63,8 +63,9 @@ class ModelArguments: model_name_or_path: Optional[str] = field( default=None, metadata={ - "help": "The model checkpoint for weights initialization." - "Don't set if you want to train a model from scratch." + "help": ( + "The model checkpoint for weights initialization.Don't set if you want to train a model from scratch." + ) }, ) config_name: Optional[str] = field( @@ -73,8 +74,10 @@ class ModelArguments: config_overrides: Optional[str] = field( default=None, metadata={ - "help": "Override some existing default config settings when a model is trained from scratch. Example: " - "n_embd=10,resid_pdrop=0.2,scale_attn_weights=false,summary_type=cls_index" + "help": ( + "Override some existing default config settings when a model is trained from scratch. Example: " + "n_embd=10,resid_pdrop=0.2,scale_attn_weights=false,summary_type=cls_index" + ) }, ) tokenizer_name: Optional[str] = field( @@ -95,8 +98,10 @@ class ModelArguments: use_auth_token: bool = field( default=False, metadata={ - "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script " - "with private models)." + "help": ( + "Will use the token generated when running `transformers-cli login` (necessary to use this script " + "with private models)." + ) }, ) @@ -136,8 +141,10 @@ class DataTrainingArguments: max_seq_length: int = field( default=512, metadata={ - "help": "The maximum total input sequence length after tokenization. Sequences longer " - "than this will be truncated." + "help": ( + "The maximum total input sequence length after tokenization. Sequences longer " + "than this will be truncated." + ) }, ) preprocessing_num_workers: Optional[int] = field( @@ -147,8 +154,10 @@ class DataTrainingArguments: plm_probability: float = field( default=1 / 6, metadata={ - "help": "Ratio of length of a span of masked tokens to surrounding context length for " - "permutation language modeling." + "help": ( + "Ratio of length of a span of masked tokens to surrounding context length for " + "permutation language modeling." + ) }, ) max_span_length: int = field( @@ -161,22 +170,28 @@ class DataTrainingArguments: pad_to_max_length: bool = field( default=False, metadata={ - "help": "Whether to pad all samples to `max_seq_length`. " - "If False, will pad the samples dynamically when batching to the maximum length in the batch." + "help": ( + "Whether to pad all samples to `max_seq_length`. " + "If False, will pad the samples dynamically when batching to the maximum length in the batch." + ) }, ) max_train_samples: Optional[int] = field( default=None, metadata={ - "help": "For debugging purposes or quicker training, truncate the number of training examples to this " - "value if set." + "help": ( + "For debugging purposes or quicker training, truncate the number of training examples to this " + "value if set." + ) }, ) max_eval_samples: Optional[int] = field( default=None, metadata={ - "help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this " - "value if set." + "help": ( + "For debugging purposes or quicker training, truncate the number of evaluation examples to this " + "value if set." + ) }, ) diff --git a/examples/pytorch/multiple-choice/run_swag.py b/examples/pytorch/multiple-choice/run_swag.py index cef1abd78a..1421bb4be6 100755 --- a/examples/pytorch/multiple-choice/run_swag.py +++ b/examples/pytorch/multiple-choice/run_swag.py @@ -82,8 +82,10 @@ class ModelArguments: use_auth_token: bool = field( default=False, metadata={ - "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script " - "with private models)." + "help": ( + "Will use the token generated when running `transformers-cli login` (necessary to use this script " + "with private models)." + ) }, ) @@ -109,30 +111,38 @@ class DataTrainingArguments: max_seq_length: Optional[int] = field( default=None, metadata={ - "help": "The maximum total input sequence length after tokenization. If passed, sequences longer " - "than this will be truncated, sequences shorter will be padded." + "help": ( + "The maximum total input sequence length after tokenization. If passed, sequences longer " + "than this will be truncated, sequences shorter will be padded." + ) }, ) pad_to_max_length: bool = field( default=False, metadata={ - "help": "Whether to pad all samples to the maximum sentence length. " - "If False, will pad the samples dynamically when batching to the maximum length in the batch. More " - "efficient on GPU but very bad for TPU." + "help": ( + "Whether to pad all samples to the maximum sentence length. " + "If False, will pad the samples dynamically when batching to the maximum length in the batch. More " + "efficient on GPU but very bad for TPU." + ) }, ) max_train_samples: Optional[int] = field( default=None, metadata={ - "help": "For debugging purposes or quicker training, truncate the number of training examples to this " - "value if set." + "help": ( + "For debugging purposes or quicker training, truncate the number of training examples to this " + "value if set." + ) }, ) max_eval_samples: Optional[int] = field( default=None, metadata={ - "help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this " - "value if set." + "help": ( + "For debugging purposes or quicker training, truncate the number of evaluation examples to this " + "value if set." + ) }, ) diff --git a/examples/pytorch/question-answering/run_qa.py b/examples/pytorch/question-answering/run_qa.py index dfcf8644fe..9b85625ec8 100755 --- a/examples/pytorch/question-answering/run_qa.py +++ b/examples/pytorch/question-answering/run_qa.py @@ -81,8 +81,10 @@ class ModelArguments: use_auth_token: bool = field( default=False, metadata={ - "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script " - "with private models)." + "help": ( + "Will use the token generated when running `transformers-cli login` (necessary to use this script " + "with private models)." + ) }, ) @@ -118,37 +120,46 @@ class DataTrainingArguments: max_seq_length: int = field( default=384, metadata={ - "help": "The maximum total input sequence length after tokenization. Sequences longer " - "than this will be truncated, sequences shorter will be padded." + "help": ( + "The maximum total input sequence length after tokenization. Sequences longer " + "than this will be truncated, sequences shorter will be padded." + ) }, ) pad_to_max_length: bool = field( default=True, metadata={ - "help": "Whether to pad all samples to `max_seq_length`. " - "If False, will pad the samples dynamically when batching to the maximum length in the batch (which can " - "be faster on GPU but will be slower on TPU)." + "help": ( + "Whether to pad all samples to `max_seq_length`. If False, will pad the samples dynamically when" + " batching to the maximum length in the batch (which can be faster on GPU but will be slower on TPU)." + ) }, ) max_train_samples: Optional[int] = field( default=None, metadata={ - "help": "For debugging purposes or quicker training, truncate the number of training examples to this " - "value if set." + "help": ( + "For debugging purposes or quicker training, truncate the number of training examples to this " + "value if set." + ) }, ) max_eval_samples: Optional[int] = field( default=None, metadata={ - "help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this " - "value if set." + "help": ( + "For debugging purposes or quicker training, truncate the number of evaluation examples to this " + "value if set." + ) }, ) max_predict_samples: Optional[int] = field( default=None, metadata={ - "help": "For debugging purposes or quicker training, truncate the number of prediction examples to this " - "value if set." + "help": ( + "For debugging purposes or quicker training, truncate the number of prediction examples to this " + "value if set." + ) }, ) version_2_with_negative: bool = field( @@ -157,9 +168,11 @@ class DataTrainingArguments: null_score_diff_threshold: float = field( default=0.0, metadata={ - "help": "The threshold used to select the null answer: if the best answer has a score that is less than " - "the score of the null answer minus this threshold, the null answer is selected for this example. " - "Only useful when `version_2_with_negative=True`." + "help": ( + "The threshold used to select the null answer: if the best answer has a score that is less than " + "the score of the null answer minus this threshold, the null answer is selected for this example. " + "Only useful when `version_2_with_negative=True`." + ) }, ) doc_stride: int = field( @@ -173,8 +186,10 @@ class DataTrainingArguments: max_answer_length: int = field( default=30, metadata={ - "help": "The maximum length of an answer that can be generated. This is needed because the start " - "and end predictions are not conditioned on one another." + "help": ( + "The maximum length of an answer that can be generated. This is needed because the start " + "and end predictions are not conditioned on one another." + ) }, ) @@ -319,9 +334,9 @@ def main(): # Tokenizer check: this script requires a fast tokenizer. if not isinstance(tokenizer, PreTrainedTokenizerFast): raise ValueError( - "This example script only works for models that have a fast tokenizer. Checkout the big table of models " - "at https://huggingface.co/transformers/index.html#supported-frameworks to find the model types that meet this " - "requirement" + "This example script only works for models that have a fast tokenizer. Checkout the big table of models at" + " https://huggingface.co/transformers/index.html#supported-frameworks to find the model types that meet" + " this requirement" ) # Preprocessing the datasets. diff --git a/examples/pytorch/question-answering/run_qa_beam_search.py b/examples/pytorch/question-answering/run_qa_beam_search.py index c4820b5205..b7430405a6 100755 --- a/examples/pytorch/question-answering/run_qa_beam_search.py +++ b/examples/pytorch/question-answering/run_qa_beam_search.py @@ -80,8 +80,10 @@ class ModelArguments: use_auth_token: bool = field( default=False, metadata={ - "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script " - "with private models)." + "help": ( + "Will use the token generated when running `transformers-cli login` (necessary to use this script " + "with private models)." + ) }, ) @@ -117,37 +119,46 @@ class DataTrainingArguments: max_seq_length: int = field( default=384, metadata={ - "help": "The maximum total input sequence length after tokenization. Sequences longer " - "than this will be truncated, sequences shorter will be padded." + "help": ( + "The maximum total input sequence length after tokenization. Sequences longer " + "than this will be truncated, sequences shorter will be padded." + ) }, ) pad_to_max_length: bool = field( default=True, metadata={ - "help": "Whether to pad all samples to `max_seq_length`. " - "If False, will pad the samples dynamically when batching to the maximum length in the batch (which can " - "be faster on GPU but will be slower on TPU)." + "help": ( + "Whether to pad all samples to `max_seq_length`. If False, will pad the samples dynamically when" + " batching to the maximum length in the batch (which can be faster on GPU but will be slower on TPU)." + ) }, ) max_train_samples: Optional[int] = field( default=None, metadata={ - "help": "For debugging purposes or quicker training, truncate the number of training examples to this " - "value if set." + "help": ( + "For debugging purposes or quicker training, truncate the number of training examples to this " + "value if set." + ) }, ) max_eval_samples: Optional[int] = field( default=None, metadata={ - "help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this " - "value if set." + "help": ( + "For debugging purposes or quicker training, truncate the number of evaluation examples to this " + "value if set." + ) }, ) max_predict_samples: Optional[int] = field( default=None, metadata={ - "help": "For debugging purposes or quicker training, truncate the number of prediction examples to this " - "value if set." + "help": ( + "For debugging purposes or quicker training, truncate the number of prediction examples to this " + "value if set." + ) }, ) version_2_with_negative: bool = field( @@ -156,9 +167,11 @@ class DataTrainingArguments: null_score_diff_threshold: float = field( default=0.0, metadata={ - "help": "The threshold used to select the null answer: if the best answer has a score that is less than " - "the score of the null answer minus this threshold, the null answer is selected for this example. " - "Only useful when `version_2_with_negative=True`." + "help": ( + "The threshold used to select the null answer: if the best answer has a score that is less than " + "the score of the null answer minus this threshold, the null answer is selected for this example. " + "Only useful when `version_2_with_negative=True`." + ) }, ) doc_stride: int = field( @@ -172,8 +185,10 @@ class DataTrainingArguments: max_answer_length: int = field( default=30, metadata={ - "help": "The maximum length of an answer that can be generated. This is needed because the start " - "and end predictions are not conditioned on one another." + "help": ( + "The maximum length of an answer that can be generated. This is needed because the start " + "and end predictions are not conditioned on one another." + ) }, ) diff --git a/examples/pytorch/question-answering/run_qa_beam_search_no_trainer.py b/examples/pytorch/question-answering/run_qa_beam_search_no_trainer.py index 64daff68f7..8da97157d5 100644 --- a/examples/pytorch/question-answering/run_qa_beam_search_no_trainer.py +++ b/examples/pytorch/question-answering/run_qa_beam_search_no_trainer.py @@ -116,8 +116,10 @@ def parse_args(): "--max_seq_length", type=int, default=384, - help="The maximum total input sequence length after tokenization. Sequences longer than this will be truncated," - " sequences shorter will be padded if `--pad_to_max_lengh` is passed.", + help=( + "The maximum total input sequence length after tokenization. Sequences longer than this will be truncated," + " sequences shorter will be padded if `--pad_to_max_lengh` is passed." + ), ) parser.add_argument( "--pad_to_max_length", @@ -190,9 +192,11 @@ def parse_args(): "--null_score_diff_threshold", type=float, default=0.0, - help="The threshold used to select the null answer: if the best answer has a score that is less than " - "the score of the null answer minus this threshold, the null answer is selected for this example. " - "Only useful when `version_2_with_negative=True`.", + help=( + "The threshold used to select the null answer: if the best answer has a score that is less than " + "the score of the null answer minus this threshold, the null answer is selected for this example. " + "Only useful when `version_2_with_negative=True`." + ), ) parser.add_argument( "--version_2_with_negative", @@ -203,22 +207,28 @@ def parse_args(): "--max_answer_length", type=int, default=30, - help="The maximum length of an answer that can be generated. This is needed because the start " - "and end predictions are not conditioned on one another.", + help=( + "The maximum length of an answer that can be generated. This is needed because the start " + "and end predictions are not conditioned on one another." + ), ) parser.add_argument( "--max_train_samples", type=int, default=None, - help="For debugging purposes or quicker training, truncate the number of training examples to this " - "value if set.", + help=( + "For debugging purposes or quicker training, truncate the number of training examples to this " + "value if set." + ), ) parser.add_argument( "--max_eval_samples", type=int, default=None, - help="For debugging purposes or quicker training, truncate the number of evaluation examples to this " - "value if set.", + help=( + "For debugging purposes or quicker training, truncate the number of evaluation examples to this " + "value if set." + ), ) parser.add_argument( "--overwrite_cache", type=bool, default=False, help="Overwrite the cached training and evaluation sets" diff --git a/examples/pytorch/question-answering/run_qa_no_trainer.py b/examples/pytorch/question-answering/run_qa_no_trainer.py index d3d51c9e70..f5e2a3663d 100755 --- a/examples/pytorch/question-answering/run_qa_no_trainer.py +++ b/examples/pytorch/question-answering/run_qa_no_trainer.py @@ -121,8 +121,10 @@ def parse_args(): "--max_seq_length", type=int, default=384, - help="The maximum total input sequence length after tokenization. Sequences longer than this will be truncated," - " sequences shorter will be padded if `--pad_to_max_lengh` is passed.", + help=( + "The maximum total input sequence length after tokenization. Sequences longer than this will be truncated," + " sequences shorter will be padded if `--pad_to_max_lengh` is passed." + ), ) parser.add_argument( "--pad_to_max_length", @@ -212,9 +214,11 @@ def parse_args(): "--null_score_diff_threshold", type=float, default=0.0, - help="The threshold used to select the null answer: if the best answer has a score that is less than " - "the score of the null answer minus this threshold, the null answer is selected for this example. " - "Only useful when `version_2_with_negative=True`.", + help=( + "The threshold used to select the null answer: if the best answer has a score that is less than " + "the score of the null answer minus this threshold, the null answer is selected for this example. " + "Only useful when `version_2_with_negative=True`." + ), ) parser.add_argument( "--version_2_with_negative", @@ -225,22 +229,28 @@ def parse_args(): "--max_answer_length", type=int, default=30, - help="The maximum length of an answer that can be generated. This is needed because the start " - "and end predictions are not conditioned on one another.", + help=( + "The maximum length of an answer that can be generated. This is needed because the start " + "and end predictions are not conditioned on one another." + ), ) parser.add_argument( "--max_train_samples", type=int, default=None, - help="For debugging purposes or quicker training, truncate the number of training examples to this " - "value if set.", + help=( + "For debugging purposes or quicker training, truncate the number of training examples to this " + "value if set." + ), ) parser.add_argument( "--max_eval_samples", type=int, default=None, - help="For debugging purposes or quicker training, truncate the number of evaluation examples to this " - "value if set.", + help=( + "For debugging purposes or quicker training, truncate the number of evaluation examples to this " + "value if set." + ), ) parser.add_argument( "--overwrite_cache", type=bool, default=False, help="Overwrite the cached training and evaluation sets" diff --git a/examples/pytorch/question-answering/run_seq2seq_qa.py b/examples/pytorch/question-answering/run_seq2seq_qa.py index f9ad59e843..66a52b93f7 100644 --- a/examples/pytorch/question-answering/run_seq2seq_qa.py +++ b/examples/pytorch/question-answering/run_seq2seq_qa.py @@ -81,8 +81,10 @@ class ModelArguments: use_auth_token: bool = field( default=False, metadata={ - "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script " - "with private models)." + "help": ( + "Will use the token generated when running `transformers-cli login` (necessary to use this script " + "with private models)." + ) }, ) @@ -130,53 +132,66 @@ class DataTrainingArguments: max_seq_length: int = field( default=384, metadata={ - "help": "The maximum total input sequence length after tokenization. Sequences longer " - "than this will be truncated, sequences shorter will be padded." + "help": ( + "The maximum total input sequence length after tokenization. Sequences longer " + "than this will be truncated, sequences shorter will be padded." + ) }, ) max_answer_length: int = field( default=30, metadata={ - "help": "The maximum length of an answer that can be generated. This is needed because the start " - "and end predictions are not conditioned on one another." + "help": ( + "The maximum length of an answer that can be generated. This is needed because the start " + "and end predictions are not conditioned on one another." + ) }, ) val_max_answer_length: Optional[int] = field( default=None, metadata={ - "help": "The maximum total sequence length for validation target text after tokenization. Sequences longer " - "than this will be truncated, sequences shorter will be padded. Will default to `max_answer_length`." - "This argument is also used to override the ``max_length`` param of ``model.generate``, which is used " - "during ``evaluate`` and ``predict``." + "help": ( + "The maximum total sequence length for validation target text after tokenization. Sequences longer " + "than this will be truncated, sequences shorter will be padded. Will default to `max_answer_length`." + "This argument is also used to override the ``max_length`` param of ``model.generate``, which is used " + "during ``evaluate`` and ``predict``." + ) }, ) pad_to_max_length: bool = field( default=True, metadata={ - "help": "Whether to pad all samples to `max_seq_length`. " - "If False, will pad the samples dynamically when batching to the maximum length in the batch (which can " - "be faster on GPU but will be slower on TPU)." + "help": ( + "Whether to pad all samples to `max_seq_length`. If False, will pad the samples dynamically when" + " batching to the maximum length in the batch (which can be faster on GPU but will be slower on TPU)." + ) }, ) max_train_samples: Optional[int] = field( default=None, metadata={ - "help": "For debugging purposes or quicker training, truncate the number of training examples to this " - "value if set." + "help": ( + "For debugging purposes or quicker training, truncate the number of training examples to this " + "value if set." + ) }, ) max_eval_samples: Optional[int] = field( default=None, metadata={ - "help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this " - "value if set." + "help": ( + "For debugging purposes or quicker training, truncate the number of evaluation examples to this " + "value if set." + ) }, ) max_predict_samples: Optional[int] = field( default=None, metadata={ - "help": "For debugging purposes or quicker training, truncate the number of prediction examples to this " - "value if set." + "help": ( + "For debugging purposes or quicker training, truncate the number of prediction examples to this " + "value if set." + ) }, ) version_2_with_negative: bool = field( @@ -185,9 +200,11 @@ class DataTrainingArguments: null_score_diff_threshold: float = field( default=0.0, metadata={ - "help": "The threshold used to select the null answer: if the best answer has a score that is less than " - "the score of the null answer minus this threshold, the null answer is selected for this example. " - "Only useful when `version_2_with_negative=True`." + "help": ( + "The threshold used to select the null answer: if the best answer has a score that is less than " + "the score of the null answer minus this threshold, the null answer is selected for this example. " + "Only useful when `version_2_with_negative=True`." + ) }, ) doc_stride: int = field( @@ -201,8 +218,10 @@ class DataTrainingArguments: num_beams: Optional[int] = field( default=None, metadata={ - "help": "Number of beams to use for evaluation. This argument will be passed to ``model.generate``, " - "which is used during ``evaluate`` and ``predict``." + "help": ( + "Number of beams to use for evaluation. This argument will be passed to ``model.generate``, " + "which is used during ``evaluate`` and ``predict``." + ) }, ) ignore_pad_token_for_loss: bool = field( diff --git a/examples/pytorch/semantic-segmentation/run_semantic_segmentation.py b/examples/pytorch/semantic-segmentation/run_semantic_segmentation.py index ba4107d091..a86589ccae 100644 --- a/examples/pytorch/semantic-segmentation/run_semantic_segmentation.py +++ b/examples/pytorch/semantic-segmentation/run_semantic_segmentation.py @@ -194,15 +194,19 @@ class DataTrainingArguments: max_train_samples: Optional[int] = field( default=None, metadata={ - "help": "For debugging purposes or quicker training, truncate the number of training examples to this " - "value if set." + "help": ( + "For debugging purposes or quicker training, truncate the number of training examples to this " + "value if set." + ) }, ) max_eval_samples: Optional[int] = field( default=None, metadata={ - "help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this " - "value if set." + "help": ( + "For debugging purposes or quicker training, truncate the number of evaluation examples to this " + "value if set." + ) }, ) reduce_labels: Optional[bool] = field( @@ -241,8 +245,10 @@ class ModelArguments: use_auth_token: bool = field( default=False, metadata={ - "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script " - "with private models)." + "help": ( + "Will use the token generated when running `transformers-cli login` (necessary to use this script " + "with private models)." + ) }, ) diff --git a/examples/pytorch/speech-pretraining/run_wav2vec2_pretraining_no_trainer.py b/examples/pytorch/speech-pretraining/run_wav2vec2_pretraining_no_trainer.py index a66d1f5493..b5f06391dd 100755 --- a/examples/pytorch/speech-pretraining/run_wav2vec2_pretraining_no_trainer.py +++ b/examples/pytorch/speech-pretraining/run_wav2vec2_pretraining_no_trainer.py @@ -219,7 +219,10 @@ def parse_args(): "--pad_to_multiple_of", type=int, default=None, - help="If set will pad the sequence to a multiple of the provided value. This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >= 7.5 (Volta).", + help=( + "If set will pad the sequence to a multiple of the provided value. This is especially useful to enable the" + " use of Tensor Cores on NVIDIA hardware with compute capability >= 7.5 (Volta)." + ), ) parser.add_argument( "--adam_beta1", @@ -440,7 +443,7 @@ def main(): # only normalized-inputs-training is supported if not feature_extractor.do_normalize: raise ValueError( - "Training is only supported for normalized inputs. " "Make sure ``feature_extractor.do_normalize == True``" + "Training is only supported for normalized inputs. Make sure ``feature_extractor.do_normalize == True``" ) # set max & min audio length in number of samples @@ -496,7 +499,8 @@ def main(): # apply_spec_augment has to be True, mask_feature_prob has to be 0.0 if not config.do_stable_layer_norm or config.feat_extract_norm != "layer": raise ValueError( - "PreTraining is only supported for ``config.do_stable_layer_norm=True`` and ``config.feat_extract_norm='layer'" + "PreTraining is only supported for ``config.do_stable_layer_norm=True`` and" + " ``config.feat_extract_norm='layer'" ) # initialize random model @@ -615,7 +619,7 @@ def main(): lr_scheduler.step() elif accelerator.is_local_main_process: progress_bar.write( - "Gradients have overflown - skipping update step... " f"Updating gradient scale to {scale}..." + f"Gradients have overflown - skipping update step... Updating gradient scale to {scale}..." ) # update gumbel temperature diff --git a/examples/pytorch/speech-recognition/run_speech_recognition_ctc.py b/examples/pytorch/speech-recognition/run_speech_recognition_ctc.py index a9b26758cd..84ec7b6a64 100755 --- a/examples/pytorch/speech-recognition/run_speech_recognition_ctc.py +++ b/examples/pytorch/speech-recognition/run_speech_recognition_ctc.py @@ -101,9 +101,11 @@ class ModelArguments: mask_time_prob: float = field( default=0.05, metadata={ - "help": "Probability of each feature vector along the time axis to be chosen as the start of the vector" - "span to be masked. Approximately ``mask_time_prob * sequence_length // mask_time_length`` feature" - "vectors will be masked along the time axis." + "help": ( + "Probability of each feature vector along the time axis to be chosen as the start of the vector" + "span to be masked. Approximately ``mask_time_prob * sequence_length // mask_time_length`` feature" + "vectors will be masked along the time axis." + ) }, ) mask_time_length: int = field( @@ -113,8 +115,11 @@ class ModelArguments: mask_feature_prob: float = field( default=0.0, metadata={ - "help": "Probability of each feature vector along the feature axis to be chosen as the start of the vector" - "span to be masked. Approximately ``mask_feature_prob * sequence_length // mask_feature_length`` feature bins will be masked along the time axis." + "help": ( + "Probability of each feature vector along the feature axis to be chosen as the start of the vectorspan" + " to be masked. Approximately ``mask_feature_prob * sequence_length // mask_feature_length`` feature" + " bins will be masked along the time axis." + ) }, ) mask_feature_length: int = field( @@ -146,8 +151,10 @@ class DataTrainingArguments: train_split_name: str = field( default="train+validation", metadata={ - "help": "The name of the training data set split to use (via the datasets library). Defaults to " - "'train+validation'" + "help": ( + "The name of the training data set split to use (via the datasets library). Defaults to " + "'train+validation'" + ) }, ) eval_split_name: str = field( @@ -174,15 +181,19 @@ class DataTrainingArguments: max_train_samples: Optional[int] = field( default=None, metadata={ - "help": "For debugging purposes or quicker training, truncate the number of training examples to this " - "value if set." + "help": ( + "For debugging purposes or quicker training, truncate the number of training examples to this " + "value if set." + ) }, ) max_eval_samples: Optional[int] = field( default=None, metadata={ - "help": "For debugging purposes or quicker training, truncate the number of validation examples to this " - "value if set." + "help": ( + "For debugging purposes or quicker training, truncate the number of validation examples to this " + "value if set." + ) }, ) chars_to_ignore: Optional[List[str]] = list_field( @@ -196,7 +207,10 @@ class DataTrainingArguments: max_duration_in_seconds: float = field( default=20.0, metadata={ - "help": "Filter audio files that are longer than `max_duration_in_seconds` seconds to 'max_duration_in_seconds`" + "help": ( + "Filter audio files that are longer than `max_duration_in_seconds` seconds to" + " 'max_duration_in_seconds`" + ) }, ) min_duration_in_seconds: float = field( @@ -205,17 +219,21 @@ class DataTrainingArguments: preprocessing_only: bool = field( default=False, metadata={ - "help": "Whether to only do data preprocessing and skip training. " - "This is especially useful when data preprocessing errors out in distributed training due to timeout. " - "In this case, one should run the preprocessing in a non-distributed setup with `preprocessing_only=True` " - "so that the cached datasets can consequently be loaded in distributed training" + "help": ( + "Whether to only do data preprocessing and skip training. This is especially useful when data" + " preprocessing errors out in distributed training due to timeout. In this case, one should run the" + " preprocessing in a non-distributed setup with `preprocessing_only=True` so that the cached datasets" + " can consequently be loaded in distributed training" + ) }, ) use_auth_token: bool = field( default=False, metadata={ - "help": "If :obj:`True`, will use the token generated when running" - ":obj:`transformers-cli login` as HTTP bearer authorization for remote files." + "help": ( + "If :obj:`True`, will use the token generated when running" + ":obj:`transformers-cli login` as HTTP bearer authorization for remote files." + ) }, ) unk_token: str = field( @@ -233,10 +251,12 @@ class DataTrainingArguments: phoneme_language: Optional[str] = field( default=None, metadata={ - "help": "The target language that should be used be" - " passed to the tokenizer for tokenization. Note that" - " this is only relevant if the model classifies the" - " input audio to a sequence of phoneme sequences." + "help": ( + "The target language that should be used be" + " passed to the tokenizer for tokenization. Note that" + " this is only relevant if the model classifies the" + " input audio to a sequence of phoneme sequences." + ) }, ) @@ -405,9 +425,9 @@ def main(): if data_args.audio_column_name not in raw_datasets["train"].column_names: raise ValueError( - f"--audio_column_name '{data_args.audio_column_name}' not found in dataset '{data_args.dataset_name}'. " - "Make sure to set `--audio_column_name` to the correct audio column - one of " - f"{', '.join(raw_datasets['train'].column_names)}." + f"--audio_column_name '{data_args.audio_column_name}' not found in dataset '{data_args.dataset_name}'." + " Make sure to set `--audio_column_name` to the correct audio column - one of" + f" {', '.join(raw_datasets['train'].column_names)}." ) if data_args.text_column_name not in raw_datasets["train"].column_names: @@ -720,7 +740,10 @@ def main(): "finetuned_from": model_args.model_name_or_path, "tasks": "speech-recognition", "tags": ["automatic-speech-recognition", data_args.dataset_name], - "dataset_args": f"Config: {config_name}, Training split: {data_args.train_split_name}, Eval split: {data_args.eval_split_name}", + "dataset_args": ( + f"Config: {config_name}, Training split: {data_args.train_split_name}, Eval split:" + f" {data_args.eval_split_name}" + ), "dataset": f"{data_args.dataset_name.upper()} - {config_name.upper()}", } if "common_voice" in data_args.dataset_name: diff --git a/examples/pytorch/speech-recognition/run_speech_recognition_seq2seq.py b/examples/pytorch/speech-recognition/run_speech_recognition_seq2seq.py index 1ddbfb5e91..9b05b7a3f8 100755 --- a/examples/pytorch/speech-recognition/run_speech_recognition_seq2seq.py +++ b/examples/pytorch/speech-recognition/run_speech_recognition_seq2seq.py @@ -87,8 +87,10 @@ class ModelArguments: use_auth_token: bool = field( default=False, metadata={ - "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script " - "with private models)." + "help": ( + "Will use the token generated when running `transformers-cli login` (necessary to use this script " + "with private models)." + ) }, ) freeze_feature_encoder: bool = field( @@ -122,15 +124,19 @@ class DataTrainingArguments: max_train_samples: Optional[int] = field( default=None, metadata={ - "help": "For debugging purposes or quicker training, truncate the number of training examples to this " - "value if set." + "help": ( + "For debugging purposes or quicker training, truncate the number of training examples to this " + "value if set." + ) }, ) max_eval_samples: Optional[int] = field( default=None, metadata={ - "help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this " - "value if set." + "help": ( + "For debugging purposes or quicker training, truncate the number of evaluation examples to this " + "value if set." + ) }, ) audio_column_name: str = field( @@ -144,7 +150,10 @@ class DataTrainingArguments: max_duration_in_seconds: float = field( default=20.0, metadata={ - "help": "Truncate audio files that are longer than `max_duration_in_seconds` seconds to 'max_duration_in_seconds`" + "help": ( + "Truncate audio files that are longer than `max_duration_in_seconds` seconds to" + " 'max_duration_in_seconds`" + ) }, ) min_duration_in_seconds: float = field( @@ -153,10 +162,12 @@ class DataTrainingArguments: preprocessing_only: bool = field( default=False, metadata={ - "help": "Whether to only do data preprocessing and skip training. " - "This is especially useful when data preprocessing errors out in distributed training due to timeout. " - "In this case, one should run the preprocessing in a non-distributed setup with `preprocessing_only=True` " - "so that the cached datasets can consequently be loaded in distributed training" + "help": ( + "Whether to only do data preprocessing and skip training. This is especially useful when data" + " preprocessing errors out in distributed training due to timeout. In this case, one should run the" + " preprocessing in a non-distributed setup with `preprocessing_only=True` so that the cached datasets" + " can consequently be loaded in distributed training" + ) }, ) train_split_name: str = field( diff --git a/examples/pytorch/summarization/run_summarization.py b/examples/pytorch/summarization/run_summarization.py index 96962cc361..ec80bb6dd6 100755 --- a/examples/pytorch/summarization/run_summarization.py +++ b/examples/pytorch/summarization/run_summarization.py @@ -101,15 +101,19 @@ class ModelArguments: use_auth_token: bool = field( default=False, metadata={ - "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script " - "with private models)." + "help": ( + "Will use the token generated when running `transformers-cli login` (necessary to use this script " + "with private models)." + ) }, ) resize_position_embeddings: Optional[bool] = field( default=None, metadata={ - "help": "Whether to automatically resize the position embeddings if `max_source_length` exceeds " - "the model's position embeddings." + "help": ( + "Whether to automatically resize the position embeddings if `max_source_length` exceeds " + "the model's position embeddings." + ) }, ) @@ -142,14 +146,15 @@ class DataTrainingArguments: validation_file: Optional[str] = field( default=None, metadata={ - "help": "An optional input evaluation data file to evaluate the metrics (rouge) on " - "(a jsonlines or csv file)." + "help": ( + "An optional input evaluation data file to evaluate the metrics (rouge) on (a jsonlines or csv file)." + ) }, ) test_file: Optional[str] = field( default=None, metadata={ - "help": "An optional input test data file to evaluate the metrics (rouge) on " "(a jsonlines or csv file)." + "help": "An optional input test data file to evaluate the metrics (rouge) on (a jsonlines or csv file)." }, ) overwrite_cache: bool = field( @@ -162,60 +167,76 @@ class DataTrainingArguments: max_source_length: Optional[int] = field( default=1024, metadata={ - "help": "The maximum total input sequence length after tokenization. Sequences longer " - "than this will be truncated, sequences shorter will be padded." + "help": ( + "The maximum total input sequence length after tokenization. Sequences longer " + "than this will be truncated, sequences shorter will be padded." + ) }, ) max_target_length: Optional[int] = field( default=128, metadata={ - "help": "The maximum total sequence length for target text after tokenization. Sequences longer " - "than this will be truncated, sequences shorter will be padded." + "help": ( + "The maximum total sequence length for target text after tokenization. Sequences longer " + "than this will be truncated, sequences shorter will be padded." + ) }, ) val_max_target_length: Optional[int] = field( default=None, metadata={ - "help": "The maximum total sequence length for validation target text after tokenization. Sequences longer " - "than this will be truncated, sequences shorter will be padded. Will default to `max_target_length`." - "This argument is also used to override the ``max_length`` param of ``model.generate``, which is used " - "during ``evaluate`` and ``predict``." + "help": ( + "The maximum total sequence length for validation target text after tokenization. Sequences longer " + "than this will be truncated, sequences shorter will be padded. Will default to `max_target_length`." + "This argument is also used to override the ``max_length`` param of ``model.generate``, which is used " + "during ``evaluate`` and ``predict``." + ) }, ) pad_to_max_length: bool = field( default=False, metadata={ - "help": "Whether to pad all samples to model maximum sentence length. " - "If False, will pad the samples dynamically when batching to the maximum length in the batch. More " - "efficient on GPU but very bad for TPU." + "help": ( + "Whether to pad all samples to model maximum sentence length. " + "If False, will pad the samples dynamically when batching to the maximum length in the batch. More " + "efficient on GPU but very bad for TPU." + ) }, ) max_train_samples: Optional[int] = field( default=None, metadata={ - "help": "For debugging purposes or quicker training, truncate the number of training examples to this " - "value if set." + "help": ( + "For debugging purposes or quicker training, truncate the number of training examples to this " + "value if set." + ) }, ) max_eval_samples: Optional[int] = field( default=None, metadata={ - "help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this " - "value if set." + "help": ( + "For debugging purposes or quicker training, truncate the number of evaluation examples to this " + "value if set." + ) }, ) max_predict_samples: Optional[int] = field( default=None, metadata={ - "help": "For debugging purposes or quicker training, truncate the number of prediction examples to this " - "value if set." + "help": ( + "For debugging purposes or quicker training, truncate the number of prediction examples to this " + "value if set." + ) }, ) num_beams: Optional[int] = field( default=None, metadata={ - "help": "Number of beams to use for evaluation. This argument will be passed to ``model.generate``, " - "which is used during ``evaluate`` and ``predict``." + "help": ( + "Number of beams to use for evaluation. This argument will be passed to ``model.generate``, " + "which is used during ``evaluate`` and ``predict``." + ) }, ) ignore_pad_token_for_loss: bool = field( @@ -231,9 +252,11 @@ class DataTrainingArguments: forced_bos_token: Optional[str] = field( default=None, metadata={ - "help": "The token to force as the first generated token after the decoder_start_token_id." - "Useful for multilingual models like mBART where the first generated token" - "needs to be the target language token (Usually it is the target language token)" + "help": ( + "The token to force as the first generated token after the decoder_start_token_id." + "Useful for multilingual models like mBART where the first generated token" + "needs to be the target language token (Usually it is the target language token)" + ) }, ) @@ -410,17 +433,18 @@ def main(): ): if model_args.resize_position_embeddings is None: logger.warning( - f"Increasing the model's number of position embedding vectors from {model.config.max_position_embeddings} " - f"to {data_args.max_source_length}." + "Increasing the model's number of position embedding vectors from" + f" {model.config.max_position_embeddings} to {data_args.max_source_length}." ) model.resize_position_embeddings(data_args.max_source_length) elif model_args.resize_position_embeddings: model.resize_position_embeddings(data_args.max_source_length) else: raise ValueError( - f"`--max_source_length` is set to {data_args.max_source_length}, but the model only has {model.config.max_position_embeddings}" - f" position encodings. Consider either reducing `--max_source_length` to {model.config.max_position_embeddings} or to automatically " - "resize the model's position encodings by passing `--resize_position_embeddings`." + f"`--max_source_length` is set to {data_args.max_source_length}, but the model only has" + f" {model.config.max_position_embeddings} position encodings. Consider either reducing" + f" `--max_source_length` to {model.config.max_position_embeddings} or to automatically resize the" + " model's position encodings by passing `--resize_position_embeddings`." ) prefix = data_args.source_prefix if data_args.source_prefix is not None else "" diff --git a/examples/pytorch/summarization/run_summarization_no_trainer.py b/examples/pytorch/summarization/run_summarization_no_trainer.py index 59ec178c97..e773a58373 100644 --- a/examples/pytorch/summarization/run_summarization_no_trainer.py +++ b/examples/pytorch/summarization/run_summarization_no_trainer.py @@ -111,20 +111,22 @@ def parse_args(): "--ignore_pad_token_for_loss", type=bool, default=True, - help="Whether to ignore the tokens corresponding to " "padded labels in the loss computation or not.", + help="Whether to ignore the tokens corresponding to padded labels in the loss computation or not.", ) parser.add_argument( "--max_source_length", type=int, default=1024, - help="The maximum total input sequence length after " - "tokenization.Sequences longer than this will be truncated, sequences shorter will be padded.", + help=( + "The maximum total input sequence length after " + "tokenization.Sequences longer than this will be truncated, sequences shorter will be padded." + ), ) parser.add_argument( "--source_prefix", type=str, default=None, - help="A prefix to add before every source text " "(useful for T5 models).", + help="A prefix to add before every source text (useful for T5 models).", ) parser.add_argument( "--preprocessing_num_workers", @@ -139,18 +141,22 @@ def parse_args(): "--max_target_length", type=int, default=128, - help="The maximum total sequence length for target text after " - "tokenization. Sequences longer than this will be truncated, sequences shorter will be padded." - "during ``evaluate`` and ``predict``.", + help=( + "The maximum total sequence length for target text after " + "tokenization. Sequences longer than this will be truncated, sequences shorter will be padded." + "during ``evaluate`` and ``predict``." + ), ) parser.add_argument( "--val_max_target_length", type=int, default=None, - help="The maximum total sequence length for validation " - "target text after tokenization.Sequences longer than this will be truncated, sequences shorter will be " - "padded. Will default to `max_target_length`.This argument is also used to override the ``max_length`` " - "param of ``model.generate``, which is used during ``evaluate`` and ``predict``.", + help=( + "The maximum total sequence length for validation " + "target text after tokenization.Sequences longer than this will be truncated, sequences shorter will be " + "padded. Will default to `max_target_length`.This argument is also used to override the ``max_length`` " + "param of ``model.generate``, which is used during ``evaluate`` and ``predict``." + ), ) parser.add_argument( "--max_length", @@ -165,8 +171,10 @@ def parse_args(): "--num_beams", type=int, default=None, - help="Number of beams to use for evaluation. This argument will be " - "passed to ``model.generate``, which is used during ``evaluate`` and ``predict``.", + help=( + "Number of beams to use for evaluation. This argument will be " + "passed to ``model.generate``, which is used during ``evaluate`` and ``predict``." + ), ) parser.add_argument( "--pad_to_max_length", diff --git a/examples/pytorch/text-classification/run_glue.py b/examples/pytorch/text-classification/run_glue.py index 4fb342dff7..ec6d210ce6 100755 --- a/examples/pytorch/text-classification/run_glue.py +++ b/examples/pytorch/text-classification/run_glue.py @@ -89,8 +89,10 @@ class DataTrainingArguments: max_seq_length: int = field( default=128, metadata={ - "help": "The maximum total input sequence length after tokenization. Sequences longer " - "than this will be truncated, sequences shorter will be padded." + "help": ( + "The maximum total input sequence length after tokenization. Sequences longer " + "than this will be truncated, sequences shorter will be padded." + ) }, ) overwrite_cache: bool = field( @@ -99,29 +101,37 @@ class DataTrainingArguments: pad_to_max_length: bool = field( default=True, metadata={ - "help": "Whether to pad all samples to `max_seq_length`. " - "If False, will pad the samples dynamically when batching to the maximum length in the batch." + "help": ( + "Whether to pad all samples to `max_seq_length`. " + "If False, will pad the samples dynamically when batching to the maximum length in the batch." + ) }, ) max_train_samples: Optional[int] = field( default=None, metadata={ - "help": "For debugging purposes or quicker training, truncate the number of training examples to this " - "value if set." + "help": ( + "For debugging purposes or quicker training, truncate the number of training examples to this " + "value if set." + ) }, ) max_eval_samples: Optional[int] = field( default=None, metadata={ - "help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this " - "value if set." + "help": ( + "For debugging purposes or quicker training, truncate the number of evaluation examples to this " + "value if set." + ) }, ) max_predict_samples: Optional[int] = field( default=None, metadata={ - "help": "For debugging purposes or quicker training, truncate the number of prediction examples to this " - "value if set." + "help": ( + "For debugging purposes or quicker training, truncate the number of prediction examples to this " + "value if set." + ) }, ) train_file: Optional[str] = field( @@ -180,8 +190,10 @@ class ModelArguments: use_auth_token: bool = field( default=False, metadata={ - "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script " - "with private models)." + "help": ( + "Will use the token generated when running `transformers-cli login` (necessary to use this script " + "with private models)." + ) }, ) diff --git a/examples/pytorch/text-classification/run_xnli.py b/examples/pytorch/text-classification/run_xnli.py index acd7e06019..dbc719d814 100755 --- a/examples/pytorch/text-classification/run_xnli.py +++ b/examples/pytorch/text-classification/run_xnli.py @@ -67,8 +67,10 @@ class DataTrainingArguments: max_seq_length: Optional[int] = field( default=128, metadata={ - "help": "The maximum total input sequence length after tokenization. Sequences longer " - "than this will be truncated, sequences shorter will be padded." + "help": ( + "The maximum total input sequence length after tokenization. Sequences longer " + "than this will be truncated, sequences shorter will be padded." + ) }, ) overwrite_cache: bool = field( @@ -77,29 +79,37 @@ class DataTrainingArguments: pad_to_max_length: bool = field( default=True, metadata={ - "help": "Whether to pad all samples to `max_seq_length`. " - "If False, will pad the samples dynamically when batching to the maximum length in the batch." + "help": ( + "Whether to pad all samples to `max_seq_length`. " + "If False, will pad the samples dynamically when batching to the maximum length in the batch." + ) }, ) max_train_samples: Optional[int] = field( default=None, metadata={ - "help": "For debugging purposes or quicker training, truncate the number of training examples to this " - "value if set." + "help": ( + "For debugging purposes or quicker training, truncate the number of training examples to this " + "value if set." + ) }, ) max_eval_samples: Optional[int] = field( default=None, metadata={ - "help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this " - "value if set." + "help": ( + "For debugging purposes or quicker training, truncate the number of evaluation examples to this " + "value if set." + ) }, ) max_predict_samples: Optional[int] = field( default=None, metadata={ - "help": "For debugging purposes or quicker training, truncate the number of prediction examples to this " - "value if set." + "help": ( + "For debugging purposes or quicker training, truncate the number of prediction examples to this " + "value if set." + ) }, ) server_ip: Optional[str] = field(default=None, metadata={"help": "For distant debugging."}) @@ -146,8 +156,10 @@ class ModelArguments: use_auth_token: bool = field( default=False, metadata={ - "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script " - "with private models)." + "help": ( + "Will use the token generated when running `transformers-cli login` (necessary to use this script " + "with private models)." + ) }, ) diff --git a/examples/pytorch/token-classification/run_ner.py b/examples/pytorch/token-classification/run_ner.py index 97d7d11fcc..36d136e31e 100755 --- a/examples/pytorch/token-classification/run_ner.py +++ b/examples/pytorch/token-classification/run_ner.py @@ -81,8 +81,10 @@ class ModelArguments: use_auth_token: bool = field( default=False, metadata={ - "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script " - "with private models)." + "help": ( + "Will use the token generated when running `transformers-cli login` (necessary to use this script " + "with private models)." + ) }, ) @@ -127,44 +129,56 @@ class DataTrainingArguments: max_seq_length: int = field( default=None, metadata={ - "help": "The maximum total input sequence length after tokenization. If set, sequences longer " - "than this will be truncated, sequences shorter will be padded." + "help": ( + "The maximum total input sequence length after tokenization. If set, sequences longer " + "than this will be truncated, sequences shorter will be padded." + ) }, ) pad_to_max_length: bool = field( default=False, metadata={ - "help": "Whether to pad all samples to model maximum sentence length. " - "If False, will pad the samples dynamically when batching to the maximum length in the batch. More " - "efficient on GPU but very bad for TPU." + "help": ( + "Whether to pad all samples to model maximum sentence length. " + "If False, will pad the samples dynamically when batching to the maximum length in the batch. More " + "efficient on GPU but very bad for TPU." + ) }, ) max_train_samples: Optional[int] = field( default=None, metadata={ - "help": "For debugging purposes or quicker training, truncate the number of training examples to this " - "value if set." + "help": ( + "For debugging purposes or quicker training, truncate the number of training examples to this " + "value if set." + ) }, ) max_eval_samples: Optional[int] = field( default=None, metadata={ - "help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this " - "value if set." + "help": ( + "For debugging purposes or quicker training, truncate the number of evaluation examples to this " + "value if set." + ) }, ) max_predict_samples: Optional[int] = field( default=None, metadata={ - "help": "For debugging purposes or quicker training, truncate the number of prediction examples to this " - "value if set." + "help": ( + "For debugging purposes or quicker training, truncate the number of prediction examples to this " + "value if set." + ) }, ) label_all_tokens: bool = field( default=False, metadata={ - "help": "Whether to put the label for one word on all tokens of generated by that word or just on the " - "one (in which case the other tokens will have a padding index)." + "help": ( + "Whether to put the label for one word on all tokens of generated by that word or just on the " + "one (in which case the other tokens will have a padding index)." + ) }, ) return_entity_level_metrics: bool = field( @@ -355,9 +369,9 @@ def main(): # Tokenizer check: this script requires a fast tokenizer. if not isinstance(tokenizer, PreTrainedTokenizerFast): raise ValueError( - "This example script only works for models that have a fast tokenizer. Checkout the big table of models " - "at https://huggingface.co/transformers/index.html#supported-frameworks to find the model types that meet this " - "requirement" + "This example script only works for models that have a fast tokenizer. Checkout the big table of models at" + " https://huggingface.co/transformers/index.html#supported-frameworks to find the model types that meet" + " this requirement" ) # Model has labels -> use them. @@ -373,8 +387,8 @@ def main(): else: logger.warning( "Your model seems to have been trained with labels, but they don't match the dataset: ", - f"model labels: {list(sorted(model.config.label2id.keys()))}, dataset labels: {list(sorted(label_list))}." - "\nIgnoring the model labels as a result.", + f"model labels: {list(sorted(model.config.label2id.keys()))}, dataset labels:" + f" {list(sorted(label_list))}.\nIgnoring the model labels as a result.", ) # Set the correspondences label/ID inside the model config diff --git a/examples/pytorch/token-classification/run_ner_no_trainer.py b/examples/pytorch/token-classification/run_ner_no_trainer.py index 234109b5d9..e22471026b 100755 --- a/examples/pytorch/token-classification/run_ner_no_trainer.py +++ b/examples/pytorch/token-classification/run_ner_no_trainer.py @@ -403,8 +403,8 @@ def main(): else: logger.warning( "Your model seems to have been trained with labels, but they don't match the dataset: ", - f"model labels: {list(sorted(model.config.label2id.keys()))}, dataset labels: {list(sorted(label_list))}." - "\nIgnoring the model labels as a result.", + f"model labels: {list(sorted(model.config.label2id.keys()))}, dataset labels:" + f" {list(sorted(label_list))}.\nIgnoring the model labels as a result.", ) # Set the correspondences label/ID inside the model config diff --git a/examples/pytorch/translation/run_translation.py b/examples/pytorch/translation/run_translation.py index dc08cd8693..758fde82df 100755 --- a/examples/pytorch/translation/run_translation.py +++ b/examples/pytorch/translation/run_translation.py @@ -91,8 +91,10 @@ class ModelArguments: use_auth_token: bool = field( default=False, metadata={ - "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script " - "with private models)." + "help": ( + "Will use the token generated when running `transformers-cli login` (necessary to use this script " + "with private models)." + ) }, ) @@ -116,15 +118,12 @@ class DataTrainingArguments: validation_file: Optional[str] = field( default=None, metadata={ - "help": "An optional input evaluation data file to evaluate the metrics (sacreblue) on " - "a jsonlines file." + "help": "An optional input evaluation data file to evaluate the metrics (sacreblue) on a jsonlines file." }, ) test_file: Optional[str] = field( default=None, - metadata={ - "help": "An optional input test data file to evaluate the metrics (sacreblue) on " "a jsonlines file." - }, + metadata={"help": "An optional input test data file to evaluate the metrics (sacreblue) on a jsonlines file."}, ) overwrite_cache: bool = field( default=False, metadata={"help": "Overwrite the cached training and evaluation sets"} @@ -136,60 +135,76 @@ class DataTrainingArguments: max_source_length: Optional[int] = field( default=1024, metadata={ - "help": "The maximum total input sequence length after tokenization. Sequences longer " - "than this will be truncated, sequences shorter will be padded." + "help": ( + "The maximum total input sequence length after tokenization. Sequences longer " + "than this will be truncated, sequences shorter will be padded." + ) }, ) max_target_length: Optional[int] = field( default=128, metadata={ - "help": "The maximum total sequence length for target text after tokenization. Sequences longer " - "than this will be truncated, sequences shorter will be padded." + "help": ( + "The maximum total sequence length for target text after tokenization. Sequences longer " + "than this will be truncated, sequences shorter will be padded." + ) }, ) val_max_target_length: Optional[int] = field( default=None, metadata={ - "help": "The maximum total sequence length for validation target text after tokenization. Sequences longer " - "than this will be truncated, sequences shorter will be padded. Will default to `max_target_length`." - "This argument is also used to override the ``max_length`` param of ``model.generate``, which is used " - "during ``evaluate`` and ``predict``." + "help": ( + "The maximum total sequence length for validation target text after tokenization. Sequences longer " + "than this will be truncated, sequences shorter will be padded. Will default to `max_target_length`." + "This argument is also used to override the ``max_length`` param of ``model.generate``, which is used " + "during ``evaluate`` and ``predict``." + ) }, ) pad_to_max_length: bool = field( default=False, metadata={ - "help": "Whether to pad all samples to model maximum sentence length. " - "If False, will pad the samples dynamically when batching to the maximum length in the batch. More " - "efficient on GPU but very bad for TPU." + "help": ( + "Whether to pad all samples to model maximum sentence length. " + "If False, will pad the samples dynamically when batching to the maximum length in the batch. More " + "efficient on GPU but very bad for TPU." + ) }, ) max_train_samples: Optional[int] = field( default=None, metadata={ - "help": "For debugging purposes or quicker training, truncate the number of training examples to this " - "value if set." + "help": ( + "For debugging purposes or quicker training, truncate the number of training examples to this " + "value if set." + ) }, ) max_eval_samples: Optional[int] = field( default=None, metadata={ - "help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this " - "value if set." + "help": ( + "For debugging purposes or quicker training, truncate the number of evaluation examples to this " + "value if set." + ) }, ) max_predict_samples: Optional[int] = field( default=None, metadata={ - "help": "For debugging purposes or quicker training, truncate the number of prediction examples to this " - "value if set." + "help": ( + "For debugging purposes or quicker training, truncate the number of prediction examples to this " + "value if set." + ) }, ) num_beams: Optional[int] = field( default=None, metadata={ - "help": "Number of beams to use for evaluation. This argument will be passed to ``model.generate``, " - "which is used during ``evaluate`` and ``predict``." + "help": ( + "Number of beams to use for evaluation. This argument will be passed to ``model.generate``, " + "which is used during ``evaluate`` and ``predict``." + ) }, ) ignore_pad_token_for_loss: bool = field( @@ -204,9 +219,11 @@ class DataTrainingArguments: forced_bos_token: Optional[str] = field( default=None, metadata={ - "help": "The token to force as the first generated token after the :obj:`decoder_start_token_id`." - "Useful for multilingual models like :doc:`mBART <../model_doc/mbart>` where the first generated token " - "needs to be the target language token.(Usually it is the target language token)" + "help": ( + "The token to force as the first generated token after the :obj:`decoder_start_token_id`.Useful for" + " multilingual models like :doc:`mBART <../model_doc/mbart>` where the first generated token needs to" + " be the target language token.(Usually it is the target language token)" + ) }, ) diff --git a/examples/pytorch/translation/run_translation_no_trainer.py b/examples/pytorch/translation/run_translation_no_trainer.py index 21eadf6aae..5d75808a28 100644 --- a/examples/pytorch/translation/run_translation_no_trainer.py +++ b/examples/pytorch/translation/run_translation_no_trainer.py @@ -95,41 +95,51 @@ def parse_args(): "--num_beams", type=int, default=None, - help="Number of beams to use for evaluation. This argument will be " - "passed to ``model.generate``, which is used during ``evaluate`` and ``predict``.", + help=( + "Number of beams to use for evaluation. This argument will be " + "passed to ``model.generate``, which is used during ``evaluate`` and ``predict``." + ), ) parser.add_argument( "--max_source_length", type=int, default=1024, - help="The maximum total input sequence length after " - "tokenization.Sequences longer than this will be truncated, sequences shorter will be padded.", + help=( + "The maximum total input sequence length after " + "tokenization.Sequences longer than this will be truncated, sequences shorter will be padded." + ), ) parser.add_argument( "--max_target_length", type=int, default=128, - help="The maximum total sequence length for target text after " - "tokenization. Sequences longer than this will be truncated, sequences shorter will be padded." - "during ``evaluate`` and ``predict``.", + help=( + "The maximum total sequence length for target text after " + "tokenization. Sequences longer than this will be truncated, sequences shorter will be padded." + "during ``evaluate`` and ``predict``." + ), ) parser.add_argument( "--val_max_target_length", type=int, default=None, - help="The maximum total sequence length for validation " - "target text after tokenization.Sequences longer than this will be truncated, sequences shorter will be " - "padded. Will default to `max_target_length`.This argument is also used to override the ``max_length`` " - "param of ``model.generate``, which is used during ``evaluate`` and ``predict``.", + help=( + "The maximum total sequence length for validation " + "target text after tokenization.Sequences longer than this will be truncated, sequences shorter will be " + "padded. Will default to `max_target_length`.This argument is also used to override the ``max_length`` " + "param of ``model.generate``, which is used during ``evaluate`` and ``predict``." + ), ) parser.add_argument( "--pad_to_max_length", type=bool, default=False, - help="Whether to pad all samples to model maximum sentence " - "length. If False, will pad the samples dynamically when batching to the maximum length in the batch. More" - "efficient on GPU but very bad for TPU.", + help=( + "Whether to pad all samples to model maximum sentence " + "length. If False, will pad the samples dynamically when batching to the maximum length in the batch. More" + "efficient on GPU but very bad for TPU." + ), ) parser.add_argument( "--validation_file", type=str, default=None, help="A csv or a json file containing the validation data." @@ -138,7 +148,7 @@ def parse_args(): "--ignore_pad_token_for_loss", type=bool, default=True, - help="Whether to ignore the tokens corresponding to " "padded labels in the loss computation or not.", + help="Whether to ignore the tokens corresponding to padded labels in the loss computation or not.", ) parser.add_argument("--source_lang", type=str, default=None, help="Source language id for translation.") parser.add_argument("--target_lang", type=str, default=None, help="Target language id for translation.") @@ -146,7 +156,7 @@ def parse_args(): "--source_prefix", type=str, default=None, - help="A prefix to add before every source text " "(useful for T5 models).", + help="A prefix to add before every source text (useful for T5 models).", ) parser.add_argument( "--preprocessing_num_workers", diff --git a/examples/pytorch/xla_spawn.py b/examples/pytorch/xla_spawn.py index d84b419945..5df6bfa2d5 100644 --- a/examples/pytorch/xla_spawn.py +++ b/examples/pytorch/xla_spawn.py @@ -39,9 +39,7 @@ def parse_args(): """ parser = ArgumentParser( description=( - "PyTorch TPU distributed training launch " - "helper utility that will spawn up " - "multiple distributed processes" + "PyTorch TPU distributed training launch helper utility that will spawn up multiple distributed processes" ) ) diff --git a/examples/research_projects/adversarial/run_hans.py b/examples/research_projects/adversarial/run_hans.py index 31acbd3a8a..0576471fbc 100644 --- a/examples/research_projects/adversarial/run_hans.py +++ b/examples/research_projects/adversarial/run_hans.py @@ -77,8 +77,10 @@ class DataTrainingArguments: max_seq_length: int = field( default=128, metadata={ - "help": "The maximum total input sequence length after tokenization. Sequences longer " - "than this will be truncated, sequences shorter will be padded." + "help": ( + "The maximum total input sequence length after tokenization. Sequences longer " + "than this will be truncated, sequences shorter will be padded." + ) }, ) overwrite_cache: bool = field( @@ -110,7 +112,8 @@ def main(): and not training_args.overwrite_output_dir ): raise ValueError( - f"Output directory ({training_args.output_dir}) already exists and is not empty. Use --overwrite_output_dir to overcome." + f"Output directory ({training_args.output_dir}) already exists and is not empty. Use" + " --overwrite_output_dir to overcome." ) # Setup logging diff --git a/examples/research_projects/adversarial/utils_hans.py b/examples/research_projects/adversarial/utils_hans.py index b02bf81352..e54792ad2f 100644 --- a/examples/research_projects/adversarial/utils_hans.py +++ b/examples/research_projects/adversarial/utils_hans.py @@ -197,7 +197,7 @@ if is_tf_available(): self.features = hans_convert_examples_to_features(examples, label_list, max_seq_length, tokenizer) def gen(): - for (ex_index, ex) in tqdm.tqdm(enumerate(self.features), desc="convert examples to features"): + for ex_index, ex in tqdm.tqdm(enumerate(self.features), desc="convert examples to features"): if ex_index % 10000 == 0: logger.info("Writing example %d of %d" % (ex_index, len(examples))) @@ -268,7 +268,7 @@ class HansProcessor(DataProcessor): def _create_examples(self, lines, set_type): """Creates examples for the training and dev sets.""" examples = [] - for (i, line) in enumerate(lines): + for i, line in enumerate(lines): if i == 0: continue guid = "%s-%s" % (set_type, line[0]) @@ -303,7 +303,7 @@ def hans_convert_examples_to_features( label_map = {label: i for i, label in enumerate(label_list)} features = [] - for (ex_index, example) in tqdm.tqdm(enumerate(examples), desc="convert examples to features"): + for ex_index, example in tqdm.tqdm(enumerate(examples), desc="convert examples to features"): if ex_index % 10000 == 0: logger.info("Writing example %d" % (ex_index)) diff --git a/examples/research_projects/bert-loses-patience/pabee/modeling_pabee_albert.py b/examples/research_projects/bert-loses-patience/pabee/modeling_pabee_albert.py index 006ff98c95..5e17352dc1 100644 --- a/examples/research_projects/bert-loses-patience/pabee/modeling_pabee_albert.py +++ b/examples/research_projects/bert-loses-patience/pabee/modeling_pabee_albert.py @@ -84,7 +84,10 @@ class AlbertModelWithPabee(AlbertModel): def log_stats(self): avg_inf_layers = self.inference_layers_num / self.inference_instances_num - message = f"*** Patience = {self.patience} Avg. Inference Layers = {avg_inf_layers:.2f} Speed Up = {1 - avg_inf_layers / self.config.num_hidden_layers:.2f} ***" + message = ( + f"*** Patience = {self.patience} Avg. Inference Layers = {avg_inf_layers:.2f} Speed Up =" + f" {1 - avg_inf_layers / self.config.num_hidden_layers:.2f} ***" + ) print(message) @add_start_docstrings_to_model_forward(ALBERT_INPUTS_DOCSTRING) diff --git a/examples/research_projects/bert-loses-patience/pabee/modeling_pabee_bert.py b/examples/research_projects/bert-loses-patience/pabee/modeling_pabee_bert.py index ff5c2b51e8..b32f47d0c3 100644 --- a/examples/research_projects/bert-loses-patience/pabee/modeling_pabee_bert.py +++ b/examples/research_projects/bert-loses-patience/pabee/modeling_pabee_bert.py @@ -89,7 +89,10 @@ class BertModelWithPabee(BertModel): def log_stats(self): avg_inf_layers = self.inference_layers_num / self.inference_instances_num - message = f"*** Patience = {self.patience} Avg. Inference Layers = {avg_inf_layers:.2f} Speed Up = {1 - avg_inf_layers / self.config.num_hidden_layers:.2f} ***" + message = ( + f"*** Patience = {self.patience} Avg. Inference Layers = {avg_inf_layers:.2f} Speed Up =" + f" {1 - avg_inf_layers / self.config.num_hidden_layers:.2f} ***" + ) print(message) @add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING) diff --git a/examples/research_projects/bert-loses-patience/run_glue_with_pabee.py b/examples/research_projects/bert-loses-patience/run_glue_with_pabee.py index def4dff776..d4121655e8 100755 --- a/examples/research_projects/bert-loses-patience/run_glue_with_pabee.py +++ b/examples/research_projects/bert-loses-patience/run_glue_with_pabee.py @@ -483,8 +483,10 @@ def main(): "--max_seq_length", default=128, type=int, - help="The maximum total input sequence length after tokenization. Sequences longer " - "than this will be truncated, sequences shorter will be padded.", + help=( + "The maximum total input sequence length after tokenization. Sequences longer " + "than this will be truncated, sequences shorter will be padded." + ), ) parser.add_argument("--do_train", action="store_true", help="Whether to run training.") parser.add_argument("--do_eval", action="store_true", help="Whether to run eval on the dev set.") @@ -574,8 +576,10 @@ def main(): "--fp16_opt_level", type=str, default="O1", - help="For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']." - "See details at https://nvidia.github.io/apex/amp.html", + help=( + "For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']." + "See details at https://nvidia.github.io/apex/amp.html" + ), ) parser.add_argument( "--local_rank", diff --git a/examples/research_projects/bertabs/run_summarization.py b/examples/research_projects/bertabs/run_summarization.py index 33be67233f..fcfae6b8c6 100644 --- a/examples/research_projects/bertabs/run_summarization.py +++ b/examples/research_projects/bertabs/run_summarization.py @@ -325,7 +325,8 @@ def main(): if not documents_dir_is_valid(args.documents_dir): raise FileNotFoundError( - "We could not find the directory you specified for the documents to summarize, or it was empty. Please specify a valid path." + "We could not find the directory you specified for the documents to summarize, or it was empty. Please" + " specify a valid path." ) os.makedirs(args.summaries_output_dir, exist_ok=True) diff --git a/examples/research_projects/bertology/run_bertology.py b/examples/research_projects/bertology/run_bertology.py index 1018359dc6..030573d87f 100644 --- a/examples/research_projects/bertology/run_bertology.py +++ b/examples/research_projects/bertology/run_bertology.py @@ -338,8 +338,10 @@ def main(): "--max_seq_length", default=128, type=int, - help="The maximum total input sequence length after WordPiece tokenization. \n" - "Sequences longer than this will be truncated, sequences shorter padded.", + help=( + "The maximum total input sequence length after WordPiece tokenization. \n" + "Sequences longer than this will be truncated, sequences shorter padded." + ), ) parser.add_argument("--batch_size", default=1, type=int, help="Batch size.") diff --git a/examples/research_projects/bertology/run_prune_gpt.py b/examples/research_projects/bertology/run_prune_gpt.py index 49a867b96d..68cece6e99 100644 --- a/examples/research_projects/bertology/run_prune_gpt.py +++ b/examples/research_projects/bertology/run_prune_gpt.py @@ -314,8 +314,10 @@ def main(): "--max_seq_length", default=128, type=int, - help="The maximum total input sequence length after WordPiece tokenization. \n" - "Sequences longer than this will be truncated, sequences shorter padded.", + help=( + "The maximum total input sequence length after WordPiece tokenization. \n" + "Sequences longer than this will be truncated, sequences shorter padded." + ), ) parser.add_argument("--batch_size", default=1, type=int, help="Batch size.") diff --git a/examples/research_projects/codeparrot/scripts/arguments.py b/examples/research_projects/codeparrot/scripts/arguments.py index a94cda2d2f..ec98e28a02 100644 --- a/examples/research_projects/codeparrot/scripts/arguments.py +++ b/examples/research_projects/codeparrot/scripts/arguments.py @@ -112,7 +112,10 @@ class HumanEvalArguments: device_int: Optional[int] = field( default=-1, metadata={ - "help": "Determine which device to run the `text-generation` Pipeline on. -1 is CPU and any zero or positive number corresponds to which GPU device id to run on." + "help": ( + "Determine which device to run the `text-generation` Pipeline on. -1 is CPU and any zero or positive" + " number corresponds to which GPU device id to run on." + ) }, ) diff --git a/examples/research_projects/codeparrot/scripts/human_eval.py b/examples/research_projects/codeparrot/scripts/human_eval.py index 1eb5555cd7..d0614134ad 100644 --- a/examples/research_projects/codeparrot/scripts/human_eval.py +++ b/examples/research_projects/codeparrot/scripts/human_eval.py @@ -186,7 +186,8 @@ def main(): _ = code_eval_metric.compute(references=[""], predictions=[[""]]) except ValueError as exception: print( - 'Code evaluation not enabled. Read the warning below carefully and then use `--HF_ALLOW_CODE_EVAL="1"` flag to enable code evaluation.' + 'Code evaluation not enabled. Read the warning below carefully and then use `--HF_ALLOW_CODE_EVAL="1"`' + " flag to enable code evaluation." ) raise exception diff --git a/examples/research_projects/deebert/run_glue_deebert.py b/examples/research_projects/deebert/run_glue_deebert.py index 5bfc2f8816..f86390375f 100644 --- a/examples/research_projects/deebert/run_glue_deebert.py +++ b/examples/research_projects/deebert/run_glue_deebert.py @@ -459,8 +459,10 @@ def main(): "--max_seq_length", default=128, type=int, - help="The maximum total input sequence length after tokenization. Sequences longer " - "than this will be truncated, sequences shorter will be padded.", + help=( + "The maximum total input sequence length after tokenization. Sequences longer " + "than this will be truncated, sequences shorter will be padded." + ), ) parser.add_argument("--do_train", action="store_true", help="Whether to run training.") parser.add_argument("--do_eval", action="store_true", help="Whether to run eval on the dev set.") @@ -529,8 +531,10 @@ def main(): "--fp16_opt_level", type=str, default="O1", - help="For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']." - "See details at https://nvidia.github.io/apex/amp.html", + help=( + "For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']." + "See details at https://nvidia.github.io/apex/amp.html" + ), ) parser.add_argument("--local_rank", type=int, default=-1, help="For distributed training: local_rank") parser.add_argument("--server_ip", type=str, default="", help="For distant debugging.") diff --git a/examples/research_projects/distillation/grouped_batch_sampler.py b/examples/research_projects/distillation/grouped_batch_sampler.py index 6c2d9b9748..83addc371f 100644 --- a/examples/research_projects/distillation/grouped_batch_sampler.py +++ b/examples/research_projects/distillation/grouped_batch_sampler.py @@ -60,7 +60,7 @@ class GroupedBatchSampler(BatchSampler): def __init__(self, sampler, group_ids, batch_size): if not isinstance(sampler, Sampler): raise ValueError( - "sampler should be an instance of " "torch.utils.data.Sampler, but got sampler={}".format(sampler) + "sampler should be an instance of torch.utils.data.Sampler, but got sampler={}".format(sampler) ) self.sampler = sampler self.group_ids = group_ids diff --git a/examples/research_projects/distillation/run_squad_w_distillation.py b/examples/research_projects/distillation/run_squad_w_distillation.py index ea1f2f46a9..3acfd46864 100644 --- a/examples/research_projects/distillation/run_squad_w_distillation.py +++ b/examples/research_projects/distillation/run_squad_w_distillation.py @@ -518,7 +518,10 @@ def main(): "--teacher_type", default=None, type=str, - help="Teacher type. Teacher tokenizer and student (model) tokenizer must output the same tokenization. Only for distillation.", + help=( + "Teacher type. Teacher tokenizer and student (model) tokenizer must output the same tokenization. Only for" + " distillation." + ), ) parser.add_argument( "--teacher_name_or_path", @@ -590,8 +593,10 @@ def main(): "--max_seq_length", default=384, type=int, - help="The maximum total input sequence length after WordPiece tokenization. Sequences " - "longer than this will be truncated, and sequences shorter than this will be padded.", + help=( + "The maximum total input sequence length after WordPiece tokenization. Sequences " + "longer than this will be truncated, and sequences shorter than this will be padded." + ), ) parser.add_argument( "--doc_stride", @@ -603,8 +608,10 @@ def main(): "--max_query_length", default=64, type=int, - help="The maximum number of tokens for the question. Questions longer than this will " - "be truncated to this length.", + help=( + "The maximum number of tokens for the question. Questions longer than this will " + "be truncated to this length." + ), ) parser.add_argument("--do_train", action="store_true", help="Whether to run training.") parser.add_argument("--do_eval", action="store_true", help="Whether to run eval on the dev set.") @@ -649,14 +656,18 @@ def main(): "--max_answer_length", default=30, type=int, - help="The maximum length of an answer that can be generated. This is needed because the start " - "and end predictions are not conditioned on one another.", + help=( + "The maximum length of an answer that can be generated. This is needed because the start " + "and end predictions are not conditioned on one another." + ), ) parser.add_argument( "--verbose_logging", action="store_true", - help="If true, all of the warnings related to data processing will be printed. " - "A number of warnings are expected for a normal SQuAD evaluation.", + help=( + "If true, all of the warnings related to data processing will be printed. " + "A number of warnings are expected for a normal SQuAD evaluation." + ), ) parser.add_argument("--logging_steps", type=int, default=50, help="Log every X updates steps.") @@ -685,8 +696,10 @@ def main(): "--fp16_opt_level", type=str, default="O1", - help="For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']." - "See details at https://nvidia.github.io/apex/amp.html", + help=( + "For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']." + "See details at https://nvidia.github.io/apex/amp.html" + ), ) parser.add_argument("--server_ip", type=str, default="", help="Can be used for distant debugging.") parser.add_argument("--server_port", type=str, default="", help="Can be used for distant debugging.") diff --git a/examples/research_projects/distillation/scripts/extract.py b/examples/research_projects/distillation/scripts/extract.py index d7a99b1d89..f60f243dec 100644 --- a/examples/research_projects/distillation/scripts/extract.py +++ b/examples/research_projects/distillation/scripts/extract.py @@ -25,7 +25,10 @@ from transformers import GPT2LMHeadModel, RobertaForMaskedLM if __name__ == "__main__": parser = argparse.ArgumentParser( - description="Extraction some layers of the full RobertaForMaskedLM or GPT2LMHeadModel for Transfer Learned Distillation" + description=( + "Extraction some layers of the full RobertaForMaskedLM or GPT2LMHeadModel for Transfer Learned" + " Distillation" + ) ) parser.add_argument("--model_type", default="roberta", choices=["roberta", "gpt2"]) parser.add_argument("--model_name", default="roberta-large", type=str) diff --git a/examples/research_projects/distillation/scripts/extract_distilbert.py b/examples/research_projects/distillation/scripts/extract_distilbert.py index e125f36187..a58105f999 100644 --- a/examples/research_projects/distillation/scripts/extract_distilbert.py +++ b/examples/research_projects/distillation/scripts/extract_distilbert.py @@ -25,7 +25,10 @@ from transformers import BertForMaskedLM if __name__ == "__main__": parser = argparse.ArgumentParser( - description="Extraction some layers of the full BertForMaskedLM or RObertaForMaskedLM for Transfer Learned Distillation" + description=( + "Extraction some layers of the full BertForMaskedLM or RObertaForMaskedLM for Transfer Learned" + " Distillation" + ) ) parser.add_argument("--model_type", default="bert", choices=["bert"]) parser.add_argument("--model_name", default="bert-base-uncased", type=str) diff --git a/examples/research_projects/distillation/train.py b/examples/research_projects/distillation/train.py index 6385c885a9..cc2362888e 100644 --- a/examples/research_projects/distillation/train.py +++ b/examples/research_projects/distillation/train.py @@ -207,8 +207,10 @@ def main(): "--fp16_opt_level", type=str, default="O1", - help="For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']." - "See details at https://nvidia.github.io/apex/amp.html", + help=( + "For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']." + "See details at https://nvidia.github.io/apex/amp.html" + ), ) parser.add_argument("--n_gpu", type=int, default=1, help="Number of GPUs in the node.") parser.add_argument("--local_rank", type=int, default=-1, help="Distributed training - Local rank") @@ -226,8 +228,8 @@ def main(): if os.path.exists(args.dump_path): if not args.force: raise ValueError( - f"Serialization dir {args.dump_path} already exists, but you have not precised wheter to overwrite it" - "Use `--force` if you want to overwrite it" + f"Serialization dir {args.dump_path} already exists, but you have not precised wheter to overwrite" + " itUse `--force` if you want to overwrite it" ) else: shutil.rmtree(args.dump_path) diff --git a/examples/research_projects/fsner/src/fsner/tokenizer_utils.py b/examples/research_projects/fsner/src/fsner/tokenizer_utils.py index 6e4027a989..bc5f6650cc 100644 --- a/examples/research_projects/fsner/src/fsner/tokenizer_utils.py +++ b/examples/research_projects/fsner/src/fsner/tokenizer_utils.py @@ -48,7 +48,8 @@ class FSNERTokenizerUtils(object): else: raise Exception( - "Type of parameter x was not recognized! Only `list of strings` for query or `list of lists of strings` for supports are supported." + "Type of parameter x was not recognized! Only `list of strings` for query or `list of lists of" + " strings` for supports are supported." ) return d diff --git a/examples/research_projects/jax-projects/dataset-streaming/run_mlm_flax_stream.py b/examples/research_projects/jax-projects/dataset-streaming/run_mlm_flax_stream.py index 0bb4a7b9c5..c64979d40f 100755 --- a/examples/research_projects/jax-projects/dataset-streaming/run_mlm_flax_stream.py +++ b/examples/research_projects/jax-projects/dataset-streaming/run_mlm_flax_stream.py @@ -75,8 +75,9 @@ class ModelArguments: model_name_or_path: Optional[str] = field( default=None, metadata={ - "help": "The model checkpoint for weights initialization." - "Don't set if you want to train a model from scratch." + "help": ( + "The model checkpoint for weights initialization.Don't set if you want to train a model from scratch." + ) }, ) model_type: Optional[str] = field( @@ -99,7 +100,10 @@ class ModelArguments: dtype: Optional[str] = field( default="float32", metadata={ - "help": "Floating-point format in which the model weights should be initialized and trained. Choose one of `[float32, float16, bfloat16]`." + "help": ( + "Floating-point format in which the model weights should be initialized and trained. Choose one of" + " `[float32, float16, bfloat16]`." + ) }, ) @@ -141,8 +145,10 @@ class DataTrainingArguments: max_seq_length: Optional[int] = field( default=None, metadata={ - "help": "The maximum total input sequence length after tokenization. Sequences longer " - "than this will be truncated. Default to the max input length of the model." + "help": ( + "The maximum total input sequence length after tokenization. Sequences longer " + "than this will be truncated. Default to the max input length of the model." + ) }, ) preprocessing_num_workers: Optional[int] = field( @@ -155,8 +161,10 @@ class DataTrainingArguments: pad_to_max_length: bool = field( default=False, metadata={ - "help": "Whether to pad all samples to `max_seq_length`. " - "If False, will pad the samples dynamically when batching to the maximum length in the batch." + "help": ( + "Whether to pad all samples to `max_seq_length`. " + "If False, will pad the samples dynamically when batching to the maximum length in the batch." + ) }, ) line_by_line: bool = field( @@ -575,7 +583,8 @@ if __name__ == "__main__": if step % training_args.logging_steps == 0 and step > 0: steps.write( - f"Step... ({step} | Loss: {train_metric['loss'].mean()}, Learning Rate: {train_metric['learning_rate'].mean()})" + f"Step... ({step} | Loss: {train_metric['loss'].mean()}, Learning Rate:" + f" {train_metric['learning_rate'].mean()})" ) train_time += time.time() - train_start if has_tensorboard and jax.process_index() == 0: @@ -604,7 +613,10 @@ if __name__ == "__main__": eval_metrics = jax.tree_map(lambda x: x / eval_normalizer, eval_metrics) # Update progress bar - steps.desc = f"Step... ({step + 1}/{num_train_steps} | Loss: {eval_metrics['loss']}, Acc: {eval_metrics['accuracy']})" + steps.desc = ( + f"Step... ({step + 1}/{num_train_steps} | Loss: {eval_metrics['loss']}, Acc:" + f" {eval_metrics['accuracy']})" + ) if has_tensorboard and jax.process_index() == 0: write_eval_metric(summary_writer, eval_metrics, step) diff --git a/examples/research_projects/jax-projects/hybrid_clip/run_hybrid_clip.py b/examples/research_projects/jax-projects/hybrid_clip/run_hybrid_clip.py index 0572a4e019..6ee974666a 100644 --- a/examples/research_projects/jax-projects/hybrid_clip/run_hybrid_clip.py +++ b/examples/research_projects/jax-projects/hybrid_clip/run_hybrid_clip.py @@ -77,14 +77,18 @@ class ModelArguments: text_model_name_or_path: str = field( metadata={ - "help": "The text model checkpoint for weights initialization." - "Don't set if you want to train a model from scratch." + "help": ( + "The text model checkpoint for weights initialization." + "Don't set if you want to train a model from scratch." + ) }, ) vision_model_name_or_path: str = field( metadata={ - "help": "The vision model checkpoint for weights initialization." - "Don't set if you want to train a model from scratch." + "help": ( + "The vision model checkpoint for weights initialization." + "Don't set if you want to train a model from scratch." + ) }, ) from_pt: bool = field( @@ -107,7 +111,10 @@ class ModelArguments: dtype: Optional[str] = field( default="float32", metadata={ - "help": "Floating-point format in which the model weights should be initialized and trained. Choose one of `[float32, float16, bfloat16]`." + "help": ( + "Floating-point format in which the model weights should be initialized and trained. Choose one of" + " `[float32, float16, bfloat16]`." + ) }, ) @@ -129,22 +136,28 @@ class DataTrainingArguments: max_seq_length: Optional[int] = field( default=72, metadata={ - "help": "The maximum total input sequence length after tokenization. Sequences longer " - "than this will be truncated, sequences shorter will be padded." + "help": ( + "The maximum total input sequence length after tokenization. Sequences longer " + "than this will be truncated, sequences shorter will be padded." + ) }, ) max_train_samples: Optional[int] = field( default=None, metadata={ - "help": "For debugging purposes or quicker training, truncate the number of training examples to this " - "value if set." + "help": ( + "For debugging purposes or quicker training, truncate the number of training examples to this " + "value if set." + ) }, ) max_eval_samples: Optional[int] = field( default=None, metadata={ - "help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this " - "value if set." + "help": ( + "For debugging purposes or quicker training, truncate the number of evaluation examples to this " + "value if set." + ) }, ) overwrite_cache: bool = field( @@ -519,7 +532,8 @@ def main(): train_step_progress_bar.close() epochs.write( - f"Epoch... ({epoch + 1}/{num_epochs} | Loss: {train_metric['loss']}, Learning Rate: {train_metric['learning_rate']})" + f"Epoch... ({epoch + 1}/{num_epochs} | Loss: {train_metric['loss']}, Learning Rate:" + f" {train_metric['learning_rate']})" ) # ======================== Evaluating ============================== diff --git a/examples/research_projects/jax-projects/model_parallel/run_clm_mp.py b/examples/research_projects/jax-projects/model_parallel/run_clm_mp.py index 3371dc3bd4..518ef9f7b2 100644 --- a/examples/research_projects/jax-projects/model_parallel/run_clm_mp.py +++ b/examples/research_projects/jax-projects/model_parallel/run_clm_mp.py @@ -69,8 +69,9 @@ class ModelArguments: model_name_or_path: Optional[str] = field( default=None, metadata={ - "help": "The model checkpoint for weights initialization." - "Don't set if you want to train a model from scratch." + "help": ( + "The model checkpoint for weights initialization.Don't set if you want to train a model from scratch." + ) }, ) model_type: Optional[str] = field( @@ -93,7 +94,10 @@ class ModelArguments: dtype: Optional[str] = field( default="float32", metadata={ - "help": "Floating-point format in which the model weights should be initialized and trained. Choose one of `[float32, float16, bfloat16]`." + "help": ( + "Floating-point format in which the model weights should be initialized and trained. Choose one of" + " `[float32, float16, bfloat16]`." + ) }, ) @@ -118,15 +122,19 @@ class DataTrainingArguments: max_train_samples: Optional[int] = field( default=None, metadata={ - "help": "For debugging purposes or quicker training, truncate the number of training examples to this " - "value if set." + "help": ( + "For debugging purposes or quicker training, truncate the number of training examples to this " + "value if set." + ) }, ) max_eval_samples: Optional[int] = field( default=None, metadata={ - "help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this " - "value if set." + "help": ( + "For debugging purposes or quicker training, truncate the number of evaluation examples to this " + "value if set." + ) }, ) overwrite_cache: bool = field( @@ -141,9 +149,11 @@ class DataTrainingArguments: block_size: Optional[int] = field( default=None, metadata={ - "help": "Optional input sequence length after tokenization. " - "The training dataset will be truncated in block of this size for training. " - "Default to the model max input length for single sentence inputs (take into account special tokens)." + "help": ( + "Optional input sequence length after tokenization. " + "The training dataset will be truncated in block of this size for training. " + "Default to the model max input length for single sentence inputs (take into account special tokens)." + ) }, ) overwrite_cache: bool = field( @@ -334,7 +344,8 @@ def main(): # clm input could be much much longer than block_size if "Token indices sequence length is longer than the" in cl.out: tok_logger.warning( - "^^^^^^^^^^^^^^^^ Please ignore the warning above - this long input will be chunked into smaller bits before being passed to the model." + "^^^^^^^^^^^^^^^^ Please ignore the warning above - this long input will be chunked into smaller bits" + " before being passed to the model." ) return output @@ -606,7 +617,8 @@ def main(): write_train_metric(summary_writer, train_metrics, train_time, cur_step) epochs.write( - f"Step... ({cur_step} | Loss: {train_metric['loss']}, Learning Rate: {train_metric['learning_rate']})" + f"Step... ({cur_step} | Loss: {train_metric['loss']}, Learning Rate:" + f" {train_metric['learning_rate']})" ) train_metrics = [] @@ -632,7 +644,8 @@ def main(): eval_metrics["perplexity"] = float("inf") logger.info( - f"Step... ({cur_step} | Eval loss: {eval_metrics['loss']} | Eval Perplexity: {eval_metrics['perplexity']}" + f"Step... ({cur_step} | Eval loss: {eval_metrics['loss']} | Eval Perplexity:" + f" {eval_metrics['perplexity']}" ) if cur_step % training_args.save_steps == 0 and cur_step > 0: diff --git a/examples/research_projects/jax-projects/wav2vec2/run_wav2vec2_pretrain_flax.py b/examples/research_projects/jax-projects/wav2vec2/run_wav2vec2_pretrain_flax.py index e2bcd7861b..b0600d978b 100755 --- a/examples/research_projects/jax-projects/wav2vec2/run_wav2vec2_pretrain_flax.py +++ b/examples/research_projects/jax-projects/wav2vec2/run_wav2vec2_pretrain_flax.py @@ -64,7 +64,10 @@ class ModelArguments: dtype: Optional[str] = field( default="float32", metadata={ - "help": "Floating-point format in which the model weights should be initialized and trained. Choose one of `[float32, float16, bfloat16]`." + "help": ( + "Floating-point format in which the model weights should be initialized and trained. Choose one of" + " `[float32, float16, bfloat16]`." + ) }, ) @@ -94,7 +97,9 @@ class DataTrainingArguments: validation_split_name: Optional[str] = field( default="validation", metadata={ - "help": "The name of the validation data set split to use (via the datasets library). Defaults to 'validation'" + "help": ( + "The name of the validation data set split to use (via the datasets library). Defaults to 'validation'" + ) }, ) speech_file_column: Optional[str] = field( @@ -120,7 +125,10 @@ class DataTrainingArguments: pad_to_multiple_of: Optional[int] = field( default=1024, metadata={ - "help": "If set will pad the sequence to a multiple of the provided value. This is important to avoid triggering recompilations on TPU" + "help": ( + "If set will pad the sequence to a multiple of the provided value. This is important to avoid" + " triggering recompilations on TPU" + ) }, ) @@ -357,7 +365,8 @@ def main(): if not config.do_stable_layer_norm or config.feat_extract_norm != "layer": raise ValueError( - "PreTraining is only supported for ``config.do_stable_layer_norm=True`` and ``config.feat_extract_norm='layer'" + "PreTraining is only supported for ``config.do_stable_layer_norm=True`` and" + " ``config.feat_extract_norm='layer'" ) model = FlaxWav2Vec2ForPreTraining(config, seed=training_args.seed, dtype=getattr(jnp, model_args.dtype)) @@ -557,7 +566,8 @@ def main(): write_train_metric(summary_writer, train_metrics, train_time, cur_step) epochs.write( - f"Step... ({cur_step} | Loss: {train_metric['loss'].mean()}, Learning Rate: {train_metric['learning_rate'].mean()})" + f"Step... ({cur_step} | Loss: {train_metric['loss'].mean()}, Learning Rate:" + f" {train_metric['learning_rate'].mean()})" ) train_metrics = [] @@ -583,7 +593,8 @@ def main(): # Update progress bar epochs.write( - f"Epoch... ({epoch + 1}/{num_epochs} | Loss: {eval_metrics['loss']}, Perplexity: {eval_metrics['codevector_perplexity']})" + f"Epoch... ({epoch + 1}/{num_epochs} | Loss: {eval_metrics['loss']}, Perplexity:" + f" {eval_metrics['codevector_perplexity']})" ) # Save metrics diff --git a/examples/research_projects/longform-qa/eli5_utils.py b/examples/research_projects/longform-qa/eli5_utils.py index c14210bd5e..82c4bd8caf 100644 --- a/examples/research_projects/longform-qa/eli5_utils.py +++ b/examples/research_projects/longform-qa/eli5_utils.py @@ -649,7 +649,7 @@ def batch_query_qa_dense_index(questions, qa_embedder, tokenizer, wiki_passages, "

" + "

".join([p["passage_text"] for p in res_passages]) for res_passages in res_passages_lst ] all_res_lists = [] - for (res_passages, dl) in zip(res_passages_lst, D): + for res_passages, dl in zip(res_passages_lst, D): res_list = [dict([(k, p[k]) for k in wiki_passages.column_names]) for p in res_passages] for r, sc in zip(res_list, dl): r["score"] = float(sc) @@ -679,7 +679,7 @@ def batch_query_qa_dense_index_nn(passages, qa_embedder, tokenizer, wiki_passage "

" + "

".join([p["passage_text"] for p in res_passages]) for res_passages in res_passages_lst ] all_res_lists = [] - for (res_passages, dl, il) in zip(res_passages_lst, D, I): + for res_passages, dl, il in zip(res_passages_lst, D, I): res_list = [dict([(k, p[k]) for k in wiki_passages.column_names]) for p in res_passages] for r, sc, i in zip(res_list, dl, il): r["passage_id"] = int(i) diff --git a/examples/research_projects/luke/run_luke_ner_no_trainer.py b/examples/research_projects/luke/run_luke_ner_no_trainer.py index c7a9763d99..cb81402425 100644 --- a/examples/research_projects/luke/run_luke_ner_no_trainer.py +++ b/examples/research_projects/luke/run_luke_ner_no_trainer.py @@ -101,8 +101,8 @@ def parse_args(): type=int, default=32, help=( - "The maximum total input entity length after tokenization (Used only for (M)Luke models). Sequences longer than this will be truncated," - " sequences shorter will be padded if `--pad_to_max_length` is passed." + "The maximum total input entity length after tokenization (Used only for (M)Luke models). Sequences longer" + " than this will be truncated, sequences shorter will be padded if `--pad_to_max_length` is passed." ), ) parser.add_argument( @@ -110,8 +110,8 @@ def parse_args(): type=int, default=30, help=( - "The maximum total input mention length after tokenization (Used only for (M)Luke models). Sequences longer than this will be truncated," - " sequences shorter will be padded if `--pad_to_max_length` is passed." + "The maximum total input mention length after tokenization (Used only for (M)Luke models). Sequences" + " longer than this will be truncated, sequences shorter will be padded if `--pad_to_max_length` is passed." ), ) parser.add_argument( diff --git a/examples/research_projects/lxmert/modeling_frcnn.py b/examples/research_projects/lxmert/modeling_frcnn.py index 39a0c6aea8..33c1133e95 100644 --- a/examples/research_projects/lxmert/modeling_frcnn.py +++ b/examples/research_projects/lxmert/modeling_frcnn.py @@ -592,7 +592,7 @@ class Matcher(object): match_labels = matches.new_full(matches.size(), 1, dtype=torch.int8) - for (l, low, high) in zip(self.labels, self.thresholds[:-1], self.thresholds[1:]): + for l, low, high in zip(self.labels, self.thresholds[:-1], self.thresholds[1:]): low_high = (matched_vals >= low) & (matched_vals < high) match_labels[low_high] = l @@ -1037,9 +1037,9 @@ class ResNet(Backbone): curr_kwargs = {} for k, v in kwargs.items(): if k.endswith("_per_block"): - assert len(v) == num_blocks, ( - f"Argument '{k}' of make_stage should have the " f"same length as num_blocks={num_blocks}." - ) + assert ( + len(v) == num_blocks + ), f"Argument '{k}' of make_stage should have the same length as num_blocks={num_blocks}." newk = k[: -len("_per_block")] assert newk not in kwargs, f"Cannot call make_stage with both {k} and {newk}!" curr_kwargs[newk] = v[i] @@ -1401,7 +1401,7 @@ class AnchorGenerator(nn.Module): def grid_anchors(self, grid_sizes): anchors = [] - for (size, stride, base_anchors) in zip(grid_sizes, self.strides, self.cell_anchors): + for size, stride, base_anchors in zip(grid_sizes, self.strides, self.cell_anchors): shift_x, shift_y = _create_grid_offsets(size, stride, self.offset, base_anchors.device) shifts = torch.stack((shift_x, shift_y, shift_x, shift_y), dim=1) @@ -1708,10 +1708,9 @@ class GeneralizedRCNN(nn.Module): elif os.path.isfile(pretrained_model_name_or_path) or is_remote_url(pretrained_model_name_or_path): archive_file = pretrained_model_name_or_path elif os.path.isfile(pretrained_model_name_or_path + ".index"): - assert ( - from_tf - ), "We found a TensorFlow checkpoint at {}, please set from_tf to True to load from this checkpoint".format( - pretrained_model_name_or_path + ".index" + assert from_tf, ( + "We found a TensorFlow checkpoint at {}, please set from_tf to True to load from this checkpoint" + .format(pretrained_model_name_or_path + ".index") ) archive_file = pretrained_model_name_or_path + ".index" else: @@ -1797,26 +1796,28 @@ class GeneralizedRCNN(nn.Module): if len(unexpected_keys) > 0: print( - f"Some weights of the model checkpoint at {pretrained_model_name_or_path} were not used when " - f"initializing {model.__class__.__name__}: {unexpected_keys}\n" - f"- This IS expected if you are initializing {model.__class__.__name__} from the checkpoint of a model trained on another task " - f"or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n" - f"- This IS NOT expected if you are initializing {model.__class__.__name__} from the checkpoint of a model that you expect " - f"to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model)." + f"Some weights of the model checkpoint at {pretrained_model_name_or_path} were not used when" + f" initializing {model.__class__.__name__}: {unexpected_keys}\n- This IS expected if you are" + f" initializing {model.__class__.__name__} from the checkpoint of a model trained on another task or" + " with another architecture (e.g. initializing a BertForSequenceClassification model from a" + " BertForPreTraining model).\n- This IS NOT expected if you are initializing" + f" {model.__class__.__name__} from the checkpoint of a model that you expect to be exactly identical" + " (initializing a BertForSequenceClassification model from a BertForSequenceClassification model)." ) else: print(f"All model checkpoint weights were used when initializing {model.__class__.__name__}.\n") if len(missing_keys) > 0: print( - f"Some weights of {model.__class__.__name__} were not initialized from the model checkpoint at {pretrained_model_name_or_path} " - f"and are newly initialized: {missing_keys}\n" - f"You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference." + f"Some weights of {model.__class__.__name__} were not initialized from the model checkpoint at" + f" {pretrained_model_name_or_path} and are newly initialized: {missing_keys}\nYou should probably" + " TRAIN this model on a down-stream task to be able to use it for predictions and inference." ) else: print( - f"All the weights of {model.__class__.__name__} were initialized from the model checkpoint at {pretrained_model_name_or_path}.\n" - f"If your task is similar to the task the model of the checkpoint was trained on, " - f"you can already use {model.__class__.__name__} for predictions without further training." + f"All the weights of {model.__class__.__name__} were initialized from the model checkpoint at" + f" {pretrained_model_name_or_path}.\nIf your task is similar to the task the model of the checkpoint" + f" was trained on, you can already use {model.__class__.__name__} for predictions without further" + " training." ) if len(error_msgs) > 0: raise RuntimeError( diff --git a/examples/research_projects/lxmert/utils.py b/examples/research_projects/lxmert/utils.py index 59ae11d025..8e830fb835 100644 --- a/examples/research_projects/lxmert/utils.py +++ b/examples/research_projects/lxmert/utils.py @@ -231,9 +231,10 @@ def compare(in_tensor): n2 = out_tensor.numpy()[0] print(n1.shape, n1[0, 0, :5]) print(n2.shape, n2[0, 0, :5]) - assert np.allclose( - n1, n2, rtol=0.01, atol=0.1 - ), f"{sum([1 for x in np.isclose(n1, n2, rtol=0.01, atol=0.1).flatten() if x == False])/len(n1.flatten())*100:.4f} % element-wise mismatch" + assert np.allclose(n1, n2, rtol=0.01, atol=0.1), ( + f"{sum([1 for x in np.isclose(n1, n2, rtol=0.01, atol=0.1).flatten() if x == False])/len(n1.flatten())*100:.4f} %" + " element-wise mismatch" + ) raise Exception("tensors are all good") # Hugging face functions below diff --git a/examples/research_projects/mlm_wwm/run_mlm_wwm.py b/examples/research_projects/mlm_wwm/run_mlm_wwm.py index 51c05ab0b3..0afa413553 100644 --- a/examples/research_projects/mlm_wwm/run_mlm_wwm.py +++ b/examples/research_projects/mlm_wwm/run_mlm_wwm.py @@ -61,8 +61,9 @@ class ModelArguments: model_name_or_path: Optional[str] = field( default=None, metadata={ - "help": "The model checkpoint for weights initialization." - "Don't set if you want to train a model from scratch." + "help": ( + "The model checkpoint for weights initialization.Don't set if you want to train a model from scratch." + ) }, ) model_type: Optional[str] = field( @@ -72,8 +73,10 @@ class ModelArguments: config_overrides: Optional[str] = field( default=None, metadata={ - "help": "Override some existing default config settings when a model is trained from scratch. Example: " - "n_embd=10,resid_pdrop=0.2,scale_attn_weights=false,summary_type=cls_index" + "help": ( + "Override some existing default config settings when a model is trained from scratch. Example: " + "n_embd=10,resid_pdrop=0.2,scale_attn_weights=false,summary_type=cls_index" + ) }, ) config_name: Optional[str] = field( @@ -97,8 +100,10 @@ class ModelArguments: use_auth_token: bool = field( default=False, metadata={ - "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script " - "with private models)." + "help": ( + "Will use the token generated when running `transformers-cli login` (necessary to use this script " + "with private models)." + ) }, ) @@ -146,8 +151,10 @@ class DataTrainingArguments: max_seq_length: Optional[int] = field( default=None, metadata={ - "help": "The maximum total input sequence length after tokenization. Sequences longer " - "than this will be truncated. Default to the max input length of the model." + "help": ( + "The maximum total input sequence length after tokenization. Sequences longer " + "than this will be truncated. Default to the max input length of the model." + ) }, ) preprocessing_num_workers: Optional[int] = field( @@ -160,8 +167,10 @@ class DataTrainingArguments: pad_to_max_length: bool = field( default=False, metadata={ - "help": "Whether to pad all samples to `max_seq_length`. " - "If False, will pad the samples dynamically when batching to the maximum length in the batch." + "help": ( + "Whether to pad all samples to `max_seq_length`. " + "If False, will pad the samples dynamically when batching to the maximum length in the batch." + ) }, ) diff --git a/examples/research_projects/mm-imdb/run_mmimdb.py b/examples/research_projects/mm-imdb/run_mmimdb.py index c73aec5c87..9f12257a10 100644 --- a/examples/research_projects/mm-imdb/run_mmimdb.py +++ b/examples/research_projects/mm-imdb/run_mmimdb.py @@ -356,8 +356,10 @@ def main(): "--max_seq_length", default=128, type=int, - help="The maximum total input sequence length after tokenization. Sequences longer " - "than this will be truncated, sequences shorter will be padded.", + help=( + "The maximum total input sequence length after tokenization. Sequences longer " + "than this will be truncated, sequences shorter will be padded." + ), ) parser.add_argument( "--num_image_embeds", default=1, type=int, help="Number of Image Embeddings from the Image Encoder" @@ -423,8 +425,10 @@ def main(): "--fp16_opt_level", type=str, default="O1", - help="For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']." - "See details at https://nvidia.github.io/apex/amp.html", + help=( + "For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']." + "See details at https://nvidia.github.io/apex/amp.html" + ), ) parser.add_argument("--local_rank", type=int, default=-1, help="For distributed training: local_rank") parser.add_argument("--server_ip", type=str, default="", help="For distant debugging.") diff --git a/examples/research_projects/movement-pruning/bertarize.py b/examples/research_projects/movement-pruning/bertarize.py index d1e2462a30..623b46b943 100644 --- a/examples/research_projects/movement-pruning/bertarize.py +++ b/examples/research_projects/movement-pruning/bertarize.py @@ -103,15 +103,20 @@ if __name__ == "__main__": choices=["l0", "magnitude", "topK", "sigmoied_threshold"], type=str, required=True, - help="Pruning Method (l0 = L0 regularization, magnitude = Magnitude pruning, topK = Movement pruning, sigmoied_threshold = Soft movement pruning)", + help=( + "Pruning Method (l0 = L0 regularization, magnitude = Magnitude pruning, topK = Movement pruning," + " sigmoied_threshold = Soft movement pruning)" + ), ) parser.add_argument( "--threshold", type=float, required=False, - help="For `magnitude` and `topK`, it is the level of remaining weights (in %) in the fine-pruned model." - "For `sigmoied_threshold`, it is the threshold \tau against which the (sigmoied) scores are compared." - "Not needed for `l0`", + help=( + "For `magnitude` and `topK`, it is the level of remaining weights (in %) in the fine-pruned model." + "For `sigmoied_threshold`, it is the threshold \tau against which the (sigmoied) scores are compared." + "Not needed for `l0`" + ), ) parser.add_argument( "--model_name_or_path", diff --git a/examples/research_projects/movement-pruning/counts_parameters.py b/examples/research_projects/movement-pruning/counts_parameters.py index 0dddfaaa27..0aec3766b3 100644 --- a/examples/research_projects/movement-pruning/counts_parameters.py +++ b/examples/research_projects/movement-pruning/counts_parameters.py @@ -70,15 +70,20 @@ if __name__ == "__main__": choices=["l0", "topK", "sigmoied_threshold"], type=str, required=True, - help="Pruning Method (l0 = L0 regularization, topK = Movement pruning, sigmoied_threshold = Soft movement pruning)", + help=( + "Pruning Method (l0 = L0 regularization, topK = Movement pruning, sigmoied_threshold = Soft movement" + " pruning)" + ), ) parser.add_argument( "--threshold", type=float, required=False, - help="For `topK`, it is the level of remaining weights (in %) in the fine-pruned model." - "For `sigmoied_threshold`, it is the threshold \tau against which the (sigmoied) scores are compared." - "Not needed for `l0`", + help=( + "For `topK`, it is the level of remaining weights (in %) in the fine-pruned model." + "For `sigmoied_threshold`, it is the threshold \tau against which the (sigmoied) scores are compared." + "Not needed for `l0`" + ), ) parser.add_argument( "--serialization_dir", diff --git a/examples/research_projects/movement-pruning/emmental/modeling_bert_masked.py b/examples/research_projects/movement-pruning/emmental/modeling_bert_masked.py index 771d2078d0..4228050fe1 100644 --- a/examples/research_projects/movement-pruning/emmental/modeling_bert_masked.py +++ b/examples/research_projects/movement-pruning/emmental/modeling_bert_masked.py @@ -80,8 +80,8 @@ class BertSelfAttention(nn.Module): super().__init__() if config.hidden_size % config.num_attention_heads != 0 and not hasattr(config, "embedding_size"): raise ValueError( - "The hidden size (%d) is not a multiple of the number of attention " - "heads (%d)" % (config.hidden_size, config.num_attention_heads) + "The hidden size (%d) is not a multiple of the number of attention heads (%d)" + % (config.hidden_size, config.num_attention_heads) ) self.output_attentions = config.output_attentions diff --git a/examples/research_projects/movement-pruning/masked_run_glue.py b/examples/research_projects/movement-pruning/masked_run_glue.py index 57f795945b..e81cf9209c 100644 --- a/examples/research_projects/movement-pruning/masked_run_glue.py +++ b/examples/research_projects/movement-pruning/masked_run_glue.py @@ -622,8 +622,10 @@ def main(): "--max_seq_length", default=128, type=int, - help="The maximum total input sequence length after tokenization. Sequences longer " - "than this will be truncated, sequences shorter will be padded.", + help=( + "The maximum total input sequence length after tokenization. Sequences longer " + "than this will be truncated, sequences shorter will be padded." + ), ) parser.add_argument("--do_train", action="store_true", help="Whether to run training.") parser.add_argument("--do_eval", action="store_true", help="Whether to run eval on the dev set.") @@ -669,22 +671,29 @@ def main(): "--initial_warmup", default=1, type=int, - help="Run `initial_warmup` * `warmup_steps` steps of threshold warmup during which threshold stays" - "at its `initial_threshold` value (sparsity schedule).", + help=( + "Run `initial_warmup` * `warmup_steps` steps of threshold warmup during which threshold stays" + "at its `initial_threshold` value (sparsity schedule)." + ), ) parser.add_argument( "--final_warmup", default=2, type=int, - help="Run `final_warmup` * `warmup_steps` steps of threshold cool-down during which threshold stays" - "at its final_threshold value (sparsity schedule).", + help=( + "Run `final_warmup` * `warmup_steps` steps of threshold cool-down during which threshold stays" + "at its final_threshold value (sparsity schedule)." + ), ) parser.add_argument( "--pruning_method", default="topK", type=str, - help="Pruning Method (l0 = L0 regularization, magnitude = Magnitude pruning, topK = Movement pruning, sigmoied_threshold = Soft movement pruning).", + help=( + "Pruning Method (l0 = L0 regularization, magnitude = Magnitude pruning, topK = Movement pruning," + " sigmoied_threshold = Soft movement pruning)." + ), ) parser.add_argument( "--mask_init", @@ -717,7 +726,10 @@ def main(): "--teacher_type", default=None, type=str, - help="Teacher type. Teacher tokenizer and student (model) tokenizer must output the same tokenization. Only for distillation.", + help=( + "Teacher type. Teacher tokenizer and student (model) tokenizer must output the same tokenization. Only for" + " distillation." + ), ) parser.add_argument( "--teacher_name_or_path", @@ -787,8 +799,10 @@ def main(): "--fp16_opt_level", type=str, default="O1", - help="For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']." - "See details at https://nvidia.github.io/apex/amp.html", + help=( + "For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']." + "See details at https://nvidia.github.io/apex/amp.html" + ), ) parser.add_argument("--local_rank", type=int, default=-1, help="For distributed training: local_rank") @@ -805,7 +819,8 @@ def main(): and not args.overwrite_output_dir ): raise ValueError( - f"Output directory ({args.output_dir}) already exists and is not empty. Use --overwrite_output_dir to overcome." + f"Output directory ({args.output_dir}) already exists and is not empty. Use --overwrite_output_dir to" + " overcome." ) # Setup CUDA, GPU & distributed training diff --git a/examples/research_projects/movement-pruning/masked_run_squad.py b/examples/research_projects/movement-pruning/masked_run_squad.py index f1d065f1f4..1bd501eda5 100644 --- a/examples/research_projects/movement-pruning/masked_run_squad.py +++ b/examples/research_projects/movement-pruning/masked_run_squad.py @@ -737,8 +737,10 @@ def main(): "--max_seq_length", default=384, type=int, - help="The maximum total input sequence length after WordPiece tokenization. Sequences " - "longer than this will be truncated, and sequences shorter than this will be padded.", + help=( + "The maximum total input sequence length after WordPiece tokenization. Sequences " + "longer than this will be truncated, and sequences shorter than this will be padded." + ), ) parser.add_argument( "--doc_stride", @@ -750,8 +752,10 @@ def main(): "--max_query_length", default=64, type=int, - help="The maximum number of tokens for the question. Questions longer than this will " - "be truncated to this length.", + help=( + "The maximum number of tokens for the question. Questions longer than this will " + "be truncated to this length." + ), ) parser.add_argument("--do_train", action="store_true", help="Whether to run training.") parser.add_argument("--do_eval", action="store_true", help="Whether to run eval on the dev set.") @@ -785,22 +789,29 @@ def main(): "--initial_warmup", default=1, type=int, - help="Run `initial_warmup` * `warmup_steps` steps of threshold warmup during which threshold stays" - "at its `initial_threshold` value (sparsity schedule).", + help=( + "Run `initial_warmup` * `warmup_steps` steps of threshold warmup during which threshold stays" + "at its `initial_threshold` value (sparsity schedule)." + ), ) parser.add_argument( "--final_warmup", default=2, type=int, - help="Run `final_warmup` * `warmup_steps` steps of threshold cool-down during which threshold stays" - "at its final_threshold value (sparsity schedule).", + help=( + "Run `final_warmup` * `warmup_steps` steps of threshold cool-down during which threshold stays" + "at its final_threshold value (sparsity schedule)." + ), ) parser.add_argument( "--pruning_method", default="topK", type=str, - help="Pruning Method (l0 = L0 regularization, magnitude = Magnitude pruning, topK = Movement pruning, sigmoied_threshold = Soft movement pruning).", + help=( + "Pruning Method (l0 = L0 regularization, magnitude = Magnitude pruning, topK = Movement pruning," + " sigmoied_threshold = Soft movement pruning)." + ), ) parser.add_argument( "--mask_init", @@ -833,7 +844,10 @@ def main(): "--teacher_type", default=None, type=str, - help="Teacher type. Teacher tokenizer and student (model) tokenizer must output the same tokenization. Only for distillation.", + help=( + "Teacher type. Teacher tokenizer and student (model) tokenizer must output the same tokenization. Only for" + " distillation." + ), ) parser.add_argument( "--teacher_name_or_path", @@ -883,20 +897,27 @@ def main(): "--max_answer_length", default=30, type=int, - help="The maximum length of an answer that can be generated. This is needed because the start " - "and end predictions are not conditioned on one another.", + help=( + "The maximum length of an answer that can be generated. This is needed because the start " + "and end predictions are not conditioned on one another." + ), ) parser.add_argument( "--verbose_logging", action="store_true", - help="If true, all of the warnings related to data processing will be printed. " - "A number of warnings are expected for a normal SQuAD evaluation.", + help=( + "If true, all of the warnings related to data processing will be printed. " + "A number of warnings are expected for a normal SQuAD evaluation." + ), ) parser.add_argument( "--lang_id", default=0, type=int, - help="language id of input for language-specific xlm models (see tokenization_xlm.PRETRAINED_INIT_CONFIGURATION)", + help=( + "language id of input for language-specific xlm models (see" + " tokenization_xlm.PRETRAINED_INIT_CONFIGURATION)" + ), ) parser.add_argument("--logging_steps", type=int, default=500, help="Log every X updates steps.") @@ -925,8 +946,10 @@ def main(): "--fp16_opt_level", type=str, default="O1", - help="For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']." - "See details at https://nvidia.github.io/apex/amp.html", + help=( + "For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']." + "See details at https://nvidia.github.io/apex/amp.html" + ), ) parser.add_argument("--server_ip", type=str, default="", help="Can be used for distant debugging.") parser.add_argument("--server_port", type=str, default="", help="Can be used for distant debugging.") diff --git a/examples/research_projects/onnx/summarization/bart_onnx/generation_onnx.py b/examples/research_projects/onnx/summarization/bart_onnx/generation_onnx.py index 58ee49a1b6..6db6842968 100644 --- a/examples/research_projects/onnx/summarization/bart_onnx/generation_onnx.py +++ b/examples/research_projects/onnx/summarization/bart_onnx/generation_onnx.py @@ -392,13 +392,14 @@ class BeamSearchScorerTS(torch.nn.Module): if not isinstance(num_beams, int) or num_beams <= 1: raise ValueError( - f"`num_beams` has to be an integer strictly greater than 1, but is {num_beams}. For `num_beams` == 1, one should make use of `greedy_search` instead." + f"`num_beams` has to be an integer strictly greater than 1, but is {num_beams}. For `num_beams` == 1," + " one should make use of `greedy_search` instead." ) if not isinstance(num_beam_groups, int) or (num_beam_groups > num_beams) or (num_beams % num_beam_groups != 0): raise ValueError( - f"`num_beam_groups` has to be an integer smaller or equal than `num_beams` and `num_beams` " - f"has to be divisible by `num_beam_groups`, but is {num_beam_groups} with `num_beams` being {num_beams}." + "`num_beam_groups` has to be an integer smaller or equal than `num_beams` and `num_beams` has to be" + f" divisible by `num_beam_groups`, but is {num_beam_groups} with `num_beams` being {num_beams}." ) def hypo_len(self, hypo_idx: int): @@ -508,7 +509,8 @@ class BeamSearchScorerTS(torch.nn.Module): if beam_idx < self.group_size: raise ValueError( - f"At most {self.group_size} tokens in {next_tokens[batch_idx]} can be equal to `eos_token_id: {eos_token_id}`. Make sure {next_tokens[batch_idx]} are corrected." + f"At most {self.group_size} tokens in {next_tokens[batch_idx]} can be equal to `eos_token_id:" + f" {eos_token_id}`. Make sure {next_tokens[batch_idx]} are corrected." ) # Check if we are done so that we can save a pad step if all(done) diff --git a/examples/research_projects/onnx/summarization/run_onnx_exporter.py b/examples/research_projects/onnx/summarization/run_onnx_exporter.py index 2a62ca9f70..5d751ace8e 100644 --- a/examples/research_projects/onnx/summarization/run_onnx_exporter.py +++ b/examples/research_projects/onnx/summarization/run_onnx_exporter.py @@ -53,14 +53,16 @@ def parse_args(): "--max_length", type=int, default=5, - help=("The maximum total input sequence length after tokenization."), + help="The maximum total input sequence length after tokenization.", ) parser.add_argument( "--num_beams", type=int, default=None, - help="Number of beams to use for evaluation. This argument will be " - "passed to ``model.generate``, which is used during ``evaluate`` and ``predict``.", + help=( + "Number of beams to use for evaluation. This argument will be " + "passed to ``model.generate``, which is used during ``evaluate`` and ``predict``." + ), ) parser.add_argument( "--model_name_or_path", diff --git a/examples/research_projects/performer/modeling_flax_performer_utils.py b/examples/research_projects/performer/modeling_flax_performer_utils.py index abd42ec3d9..915e2fa23d 100644 --- a/examples/research_projects/performer/modeling_flax_performer_utils.py +++ b/examples/research_projects/performer/modeling_flax_performer_utils.py @@ -535,7 +535,7 @@ class FastAttentionviaLowRankDecomposition(FastAttention): assert key.ndim == value.ndim for ax in axis: if not (query.ndim >= 3 and 1 <= ax < query.ndim - 2): - raise ValueError("Attention axis must be between the batch " "axis and the last-two axes.") + raise ValueError("Attention axis must be between the batch axis and the last-two axes.") n = key.ndim # Constructing projection tensor. diff --git a/examples/research_projects/performer/run_mlm_performer.py b/examples/research_projects/performer/run_mlm_performer.py index 34aa75f8a9..be20342d3a 100644 --- a/examples/research_projects/performer/run_mlm_performer.py +++ b/examples/research_projects/performer/run_mlm_performer.py @@ -98,8 +98,9 @@ class ModelArguments: model_name_or_path: Optional[str] = field( default=None, metadata={ - "help": "The model checkpoint for weights initialization." - "Don't set if you want to train a model from scratch." + "help": ( + "The model checkpoint for weights initialization.Don't set if you want to train a model from scratch." + ) }, ) performer: bool = field( @@ -159,8 +160,10 @@ class DataTrainingArguments: max_seq_length: Optional[int] = field( default=None, metadata={ - "help": "The maximum total input sequence length after tokenization. Sequences longer " - "than this will be truncated. Default to the max input length of the model." + "help": ( + "The maximum total input sequence length after tokenization. Sequences longer " + "than this will be truncated. Default to the max input length of the model." + ) }, ) preprocessing_num_workers: Optional[int] = field( @@ -173,8 +176,10 @@ class DataTrainingArguments: pad_to_max_length: bool = field( default=False, metadata={ - "help": "Whether to pad all samples to `max_seq_length`. " - "If False, will pad the samples dynamically when batching to the maximum length in the batch." + "help": ( + "Whether to pad all samples to `max_seq_length`. " + "If False, will pad the samples dynamically when batching to the maximum length in the batch." + ) }, ) diff --git a/examples/research_projects/pplm/run_pplm_discrim_train.py b/examples/research_projects/pplm/run_pplm_discrim_train.py index ec8cd9b9fa..6a7351d9e6 100644 --- a/examples/research_projects/pplm/run_pplm_discrim_train.py +++ b/examples/research_projects/pplm/run_pplm_discrim_train.py @@ -175,8 +175,7 @@ def evaluate_performance(data_loader, discriminator, device="cpu"): test_loss /= len(data_loader.dataset) print( - "Performance on test set: " - "Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)".format( + "Performance on test set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)".format( test_loss, correct, len(data_loader.dataset), 100.0 * correct / len(data_loader.dataset) ) ) @@ -309,7 +308,7 @@ def train_discriminator( x.append(seq) y.append(d["label"]) except Exception: - print("Error evaluating / tokenizing" " line {}, skipping it".format(i)) + print("Error evaluating / tokenizing line {}, skipping it".format(i)) pass full_dataset = Dataset(x, y) @@ -349,7 +348,7 @@ def train_discriminator( x.append(seq) y.append(int(np.sum(d["label"]) > 0)) except Exception: - print("Error evaluating / tokenizing" " line {}, skipping it".format(i)) + print("Error evaluating / tokenizing line {}, skipping it".format(i)) pass full_dataset = Dataset(x, y) @@ -370,7 +369,7 @@ def train_discriminator( # class \t text if dataset_fp is None: - raise ValueError("When generic dataset is selected, " "dataset_fp needs to be specified aswell.") + raise ValueError("When generic dataset is selected, dataset_fp needs to be specified aswell.") classes = set() with open(dataset_fp) as f: @@ -490,15 +489,17 @@ if __name__ == "__main__": type=str, default="SST", choices=("SST", "clickbait", "toxic", "generic"), - help="dataset to train the discriminator on." - "In case of generic, the dataset is expected" - "to be a TSBV file with structure: class \\t text", + help=( + "dataset to train the discriminator on." + "In case of generic, the dataset is expected" + "to be a TSBV file with structure: class \\t text" + ), ) parser.add_argument( "--dataset_fp", type=str, default="", - help="File path of the dataset to use. " "Needed only in case of generic datadset", + help="File path of the dataset to use. Needed only in case of generic datadset", ) parser.add_argument( "--pretrained_model", type=str, default="gpt2-medium", help="Pretrained model to use as encoder" diff --git a/examples/research_projects/quantization-qdqbert/evaluate-hf-trt-qa.py b/examples/research_projects/quantization-qdqbert/evaluate-hf-trt-qa.py index 4a618ed77c..2a08996303 100755 --- a/examples/research_projects/quantization-qdqbert/evaluate-hf-trt-qa.py +++ b/examples/research_projects/quantization-qdqbert/evaluate-hf-trt-qa.py @@ -87,8 +87,10 @@ parser.add_argument( "--max_seq_length", default=384, type=int, - help="The maximum total input sequence length after WordPiece tokenization. Sequences " - "longer than this will be truncated, and sequences shorter than this will be padded.", + help=( + "The maximum total input sequence length after WordPiece tokenization. Sequences " + "longer than this will be truncated, and sequences shorter than this will be padded." + ), ) parser.add_argument( "--doc_stride", @@ -109,8 +111,10 @@ parser.add_argument( "--max_answer_length", default=30, type=int, - help="The maximum length of an answer that can be generated. This is needed because the start " - "and end predictions are not conditioned on one another.", + help=( + "The maximum length of an answer that can be generated. This is needed because the start " + "and end predictions are not conditioned on one another." + ), ) parser.add_argument("--seed", type=int, default=42, help="random seed for initialization") diff --git a/examples/research_projects/quantization-qdqbert/quant_trainer.py b/examples/research_projects/quantization-qdqbert/quant_trainer.py index b9fbad8a4a..ce1ecb6c51 100755 --- a/examples/research_projects/quantization-qdqbert/quant_trainer.py +++ b/examples/research_projects/quantization-qdqbert/quant_trainer.py @@ -51,8 +51,10 @@ def add_arguments(parser): group.add_argument( "--recalibrate-weights", action="store_true", - help="recalibrate weight amaxes by taking the max of the weights." - " amaxes will be computed with the current quantization granularity (axis).", + help=( + "recalibrate weight amaxes by taking the max of the weights." + " amaxes will be computed with the current quantization granularity (axis)." + ), ) diff --git a/examples/research_projects/quantization-qdqbert/run_quant_qa.py b/examples/research_projects/quantization-qdqbert/run_quant_qa.py index 36bfb45c8f..97eece4c1d 100755 --- a/examples/research_projects/quantization-qdqbert/run_quant_qa.py +++ b/examples/research_projects/quantization-qdqbert/run_quant_qa.py @@ -83,8 +83,10 @@ class ModelArguments: use_auth_token: bool = field( default=False, metadata={ - "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script " - "with private models)." + "help": ( + "Will use the token generated when running `transformers-cli login` (necessary to use this script " + "with private models)." + ) }, ) do_calib: bool = field(default=False, metadata={"help": "Whether to run calibration of quantization ranges."}) @@ -126,37 +128,46 @@ class DataTrainingArguments: max_seq_length: int = field( default=384, metadata={ - "help": "The maximum total input sequence length after tokenization. Sequences longer " - "than this will be truncated, sequences shorter will be padded." + "help": ( + "The maximum total input sequence length after tokenization. Sequences longer " + "than this will be truncated, sequences shorter will be padded." + ) }, ) pad_to_max_length: bool = field( default=True, metadata={ - "help": "Whether to pad all samples to `max_seq_length`. " - "If False, will pad the samples dynamically when batching to the maximum length in the batch (which can " - "be faster on GPU but will be slower on TPU)." + "help": ( + "Whether to pad all samples to `max_seq_length`. If False, will pad the samples dynamically when" + " batching to the maximum length in the batch (which can be faster on GPU but will be slower on TPU)." + ) }, ) max_train_samples: Optional[int] = field( default=None, metadata={ - "help": "For debugging purposes or quicker training, truncate the number of training examples to this " - "value if set." + "help": ( + "For debugging purposes or quicker training, truncate the number of training examples to this " + "value if set." + ) }, ) max_eval_samples: Optional[int] = field( default=None, metadata={ - "help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this " - "value if set." + "help": ( + "For debugging purposes or quicker training, truncate the number of evaluation examples to this " + "value if set." + ) }, ) max_predict_samples: Optional[int] = field( default=None, metadata={ - "help": "For debugging purposes or quicker training, truncate the number of prediction examples to this " - "value if set." + "help": ( + "For debugging purposes or quicker training, truncate the number of prediction examples to this " + "value if set." + ) }, ) version_2_with_negative: bool = field( @@ -165,9 +176,11 @@ class DataTrainingArguments: null_score_diff_threshold: float = field( default=0.0, metadata={ - "help": "The threshold used to select the null answer: if the best answer has a score that is less than " - "the score of the null answer minus this threshold, the null answer is selected for this example. " - "Only useful when `version_2_with_negative=True`." + "help": ( + "The threshold used to select the null answer: if the best answer has a score that is less than " + "the score of the null answer minus this threshold, the null answer is selected for this example. " + "Only useful when `version_2_with_negative=True`." + ) }, ) doc_stride: int = field( @@ -181,8 +194,10 @@ class DataTrainingArguments: max_answer_length: int = field( default=30, metadata={ - "help": "The maximum length of an answer that can be generated. This is needed because the start " - "and end predictions are not conditioned on one another." + "help": ( + "The maximum length of an answer that can be generated. This is needed because the start " + "and end predictions are not conditioned on one another." + ) }, ) @@ -328,9 +343,9 @@ def main(): # Tokenizer check: this script requires a fast tokenizer. if not isinstance(tokenizer, PreTrainedTokenizerFast): raise ValueError( - "This example script only works for models that have a fast tokenizer. Checkout the big table of models " - "at https://huggingface.co/transformers/index.html#supported-frameworks to find the model types that meet this " - "requirement" + "This example script only works for models that have a fast tokenizer. Checkout the big table of models at" + " https://huggingface.co/transformers/index.html#supported-frameworks to find the model types that meet" + " this requirement" ) # Preprocessing the datasets. diff --git a/examples/research_projects/rag-end2end-retriever/callbacks_rag.py b/examples/research_projects/rag-end2end-retriever/callbacks_rag.py index 55fc9655df..c05db23f18 100644 --- a/examples/research_projects/rag-end2end-retriever/callbacks_rag.py +++ b/examples/research_projects/rag-end2end-retriever/callbacks_rag.py @@ -31,7 +31,8 @@ def get_checkpoint_callback(output_dir, metric): exp = "{val_avg_loss:.4f}-{step_count}" else: raise NotImplementedError( - f"seq2seq callbacks only support rouge2 and bleu, got {metric}, You can make your own by adding to this function." + f"seq2seq callbacks only support rouge2 and bleu, got {metric}, You can make your own by adding to this" + " function." ) checkpoint_callback = ModelCheckpoint( diff --git a/examples/research_projects/rag-end2end-retriever/eval_rag.py b/examples/research_projects/rag-end2end-retriever/eval_rag.py index 05f78c3d6c..a8e7abbca6 100644 --- a/examples/research_projects/rag-end2end-retriever/eval_rag.py +++ b/examples/research_projects/rag-end2end-retriever/eval_rag.py @@ -146,7 +146,10 @@ def get_args(): "--model_type", choices=["rag_sequence", "rag_token", "bart"], type=str, - help="RAG model type: rag_sequence, rag_token or bart, if none specified, the type is inferred from the model_name_or_path", + help=( + "RAG model type: rag_sequence, rag_token or bart, if none specified, the type is inferred from the" + " model_name_or_path" + ), ) parser.add_argument( "--index_name", @@ -174,7 +177,10 @@ def get_args(): choices=["e2e", "retrieval"], default="e2e", type=str, - help="Evaluation mode, e2e calculates exact match and F1 of the downstream task, retrieval calculates precision@k.", + help=( + "Evaluation mode, e2e calculates exact match and F1 of the downstream task, retrieval calculates" + " precision@k." + ), ) parser.add_argument("--k", default=1, type=int, help="k for the precision@k calculation") parser.add_argument( @@ -196,9 +202,11 @@ def get_args(): default="qa", type=str, choices=["qa", "ans"], - help="Format of the gold data file" - "qa - a single line in the following format: question [tab] answer_list" - "ans - a single line of the gold file contains the expected answer string", + help=( + "Format of the gold data file" + "qa - a single line in the following format: question [tab] answer_list" + "ans - a single line of the gold file contains the expected answer string" + ), ) parser.add_argument( "--predictions_path", diff --git a/examples/research_projects/rag-end2end-retriever/finetune_rag.py b/examples/research_projects/rag-end2end-retriever/finetune_rag.py index 96cbc0f7c5..ac781c3254 100644 --- a/examples/research_projects/rag-end2end-retriever/finetune_rag.py +++ b/examples/research_projects/rag-end2end-retriever/finetune_rag.py @@ -515,29 +515,37 @@ class GenerativeQAModule(BaseTransformer): "--max_source_length", default=128, type=int, - help="The maximum total input sequence length after tokenization. Sequences longer " - "than this will be truncated, sequences shorter will be padded.", + help=( + "The maximum total input sequence length after tokenization. Sequences longer " + "than this will be truncated, sequences shorter will be padded." + ), ) parser.add_argument( "--max_target_length", default=25, type=int, - help="The maximum total input sequence length after tokenization. Sequences longer " - "than this will be truncated, sequences shorter will be padded.", + help=( + "The maximum total input sequence length after tokenization. Sequences longer " + "than this will be truncated, sequences shorter will be padded." + ), ) parser.add_argument( "--val_max_target_length", default=25, type=int, - help="The maximum total input sequence length after tokenization. Sequences longer " - "than this will be truncated, sequences shorter will be padded.", + help=( + "The maximum total input sequence length after tokenization. Sequences longer " + "than this will be truncated, sequences shorter will be padded." + ), ) parser.add_argument( "--test_max_target_length", default=25, type=int, - help="The maximum total input sequence length after tokenization. Sequences longer " - "than this will be truncated, sequences shorter will be padded.", + help=( + "The maximum total input sequence length after tokenization. Sequences longer " + "than this will be truncated, sequences shorter will be padded." + ), ) parser.add_argument("--logger_name", type=str, choices=["default", "wandb", "wandb_shared"], default="default") parser.add_argument("--n_train", type=int, default=-1, required=False, help="# examples. -1 means use all.") @@ -555,7 +563,10 @@ class GenerativeQAModule(BaseTransformer): type=int, default=-1, required=False, - help="-1 means never early stop. early_stopping_patience is measured in validation checks, not epochs. So val_check_interval will effect it.", + help=( + "-1 means never early stop. early_stopping_patience is measured in validation checks, not epochs. So" + " val_check_interval will effect it." + ), ) parser.add_argument( "--distributed-port", type=int, default=-1, required=False, help="Port number for distributed training." @@ -564,7 +575,10 @@ class GenerativeQAModule(BaseTransformer): "--model_type", choices=["rag_sequence", "rag_token", "bart", "t5"], type=str, - help="RAG model type: sequence or token, if none specified, the type is inferred from the model_name_or_path", + help=( + "RAG model type: sequence or token, if none specified, the type is inferred from the" + " model_name_or_path" + ), ) parser.add_argument( "--context_encoder_name", @@ -590,7 +604,10 @@ class GenerativeQAModule(BaseTransformer): parser.add_argument( "--gpu_order", type=str, - help="order of the GPU used during the fine-tuning. Used to finding free GPUs during the re-encode process. I do not have many GPUs :)", + help=( + "order of the GPU used during the fine-tuning. Used to finding free GPUs during the re-encode" + " process. I do not have many GPUs :)" + ), ) parser.add_argument("--indexing_freq", type=int, help="frequency of re-encode process") @@ -602,39 +619,53 @@ class GenerativeQAModule(BaseTransformer): "--index_name", type=str, default=None, - help="Name of the index to use: 'hf' for a canonical dataset from the datasets library (default), 'custom' for a local index, or 'legacy' for the orignal one)", + help=( + "Name of the index to use: 'hf' for a canonical dataset from the datasets library (default), 'custom'" + " for a local index, or 'legacy' for the orignal one)" + ), ) parser.add_argument( "--passages_path", type=str, default=str(Path(__file__).parent / "test_run" / "dummy-kb" / "my_knowledge_dataset"), - help="Path to the dataset of passages for custom index. More info about custom indexes in the RagRetriever documentation as well as in `examples/rag/use_own_knowledge_dataset.py`", + help=( + "Path to the dataset of passages for custom index. More info about custom indexes in the RagRetriever" + " documentation as well as in `examples/rag/use_own_knowledge_dataset.py`" + ), ) parser.add_argument( "--index_path", type=str, default=str(Path(__file__).parent / "test_run" / "dummy-kb" / "my_knowledge_dataset_hnsw_index.faiss"), - help="Path to the faiss index for custom index. More info about custom indexes in the RagRetriever documentation as well as in `examples/rag/use_own_knowledge_dataset.py`", + help=( + "Path to the faiss index for custom index. More info about custom indexes in the RagRetriever" + " documentation as well as in `examples/rag/use_own_knowledge_dataset.py`" + ), ) parser.add_argument( "--distributed_retriever", choices=["ray", "pytorch"], type=str, default="ray", - help="What implementation to use for distributed retriever? If " - "pytorch is selected, the index is loaded on training " - "worker 0, and torch.distributed is used to handle " - "communication between training worker 0, and the other " - "training workers. If ray is selected, the Ray library is " - "used to create load the index on separate processes, " - "and Ray handles the communication between the training " - "workers and the retrieval actors.", + help=( + "What implementation to use for distributed retriever? If " + "pytorch is selected, the index is loaded on training " + "worker 0, and torch.distributed is used to handle " + "communication between training worker 0, and the other " + "training workers. If ray is selected, the Ray library is " + "used to create load the index on separate processes, " + "and Ray handles the communication between the training " + "workers and the retrieval actors." + ), ) parser.add_argument( "--use_dummy_dataset", type=bool, default=False, - help="Whether to use the dummy version of the dataset index. More info about custom indexes in the RagRetriever documentation as well as in `examples/rag/use_own_knowledge_dataset.py`", + help=( + "Whether to use the dummy version of the dataset index. More info about custom indexes in the" + " RagRetriever documentation as well as in `examples/rag/use_own_knowledge_dataset.py`" + ), ) return parser @@ -645,18 +676,22 @@ class GenerativeQAModule(BaseTransformer): "--ray-address", default="auto", type=str, - help="The address of the Ray cluster to connect to. If not " - "specified, Ray will attempt to automatically detect the " - "cluster. Has no effect if pytorch is used as the distributed " - "retriever.", + help=( + "The address of the Ray cluster to connect to. If not " + "specified, Ray will attempt to automatically detect the " + "cluster. Has no effect if pytorch is used as the distributed " + "retriever." + ), ) parser.add_argument( "--num_retrieval_workers", type=int, default=1, - help="The number of retrieval actors to use when Ray is selected" - "for the distributed retriever. Has no effect when " - "distributed_retriever is set to pytorch.", + help=( + "The number of retrieval actors to use when Ray is selected" + "for the distributed retriever. Has no effect when " + "distributed_retriever is set to pytorch." + ), ) return parser @@ -686,7 +721,7 @@ def main(args=None, model=None) -> GenerativeQAModule: named_actors = [] if args.distributed_retriever == "ray" and args.gpus > 1: if not is_ray_available(): - raise RuntimeError("Please install Ray to use the Ray " "distributed retriever.") + raise RuntimeError("Please install Ray to use the Ray distributed retriever.") # Connect to an existing Ray cluster. try: ray.init(address=args.ray_address) diff --git a/examples/research_projects/rag-end2end-retriever/lightning_base.py b/examples/research_projects/rag-end2end-retriever/lightning_base.py index 1df0fae584..1843b09148 100644 --- a/examples/research_projects/rag-end2end-retriever/lightning_base.py +++ b/examples/research_projects/rag-end2end-retriever/lightning_base.py @@ -333,8 +333,10 @@ def add_generic_args(parser, root_dir) -> None: "--fp16_opt_level", type=str, default="O2", - help="For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']." - "See details at https://nvidia.github.io/apex/amp.html", + help=( + "For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']." + "See details at https://nvidia.github.io/apex/amp.html" + ), ) parser.add_argument("--n_tpu_cores", dest="tpu_cores", type=int) parser.add_argument("--max_grad_norm", dest="gradient_clip_val", default=1.0, type=float, help="Max gradient norm") diff --git a/examples/research_projects/rag-end2end-retriever/use_own_knowledge_dataset.py b/examples/research_projects/rag-end2end-retriever/use_own_knowledge_dataset.py index 213aa8d882..432111a278 100644 --- a/examples/research_projects/rag-end2end-retriever/use_own_knowledge_dataset.py +++ b/examples/research_projects/rag-end2end-retriever/use_own_knowledge_dataset.py @@ -121,7 +121,10 @@ class RagExampleArguments: dpr_ctx_encoder_model_name: str = field( default="facebook/dpr-ctx_encoder-multiset-base", metadata={ - "help": "The DPR context encoder model to use. Either 'facebook/dpr-ctx_encoder-single-nq-base' or 'facebook/dpr-ctx_encoder-multiset-base'" + "help": ( + "The DPR context encoder model to use. Either 'facebook/dpr-ctx_encoder-single-nq-base' or" + " 'facebook/dpr-ctx_encoder-multiset-base'" + ) }, ) output_dir: Optional[str] = field( @@ -155,7 +158,9 @@ class IndexHnswArguments: m: int = field( default=128, metadata={ - "help": "The number of bi-directional links created for every new element during the HNSW index construction." + "help": ( + "The number of bi-directional links created for every new element during the HNSW index construction." + ) }, ) diff --git a/examples/research_projects/rag/callbacks_rag.py b/examples/research_projects/rag/callbacks_rag.py index a2d87f8224..af1595b08e 100644 --- a/examples/research_projects/rag/callbacks_rag.py +++ b/examples/research_projects/rag/callbacks_rag.py @@ -29,7 +29,8 @@ def get_checkpoint_callback(output_dir, metric): exp = "{val_avg_em:.4f}-{step_count}" else: raise NotImplementedError( - f"seq2seq callbacks only support rouge2 and bleu, got {metric}, You can make your own by adding to this function." + f"seq2seq callbacks only support rouge2 and bleu, got {metric}, You can make your own by adding to this" + " function." ) checkpoint_callback = ModelCheckpoint( diff --git a/examples/research_projects/rag/consolidate_rag_checkpoint.py b/examples/research_projects/rag/consolidate_rag_checkpoint.py index b9ed7ec0f8..39ba7e91f6 100644 --- a/examples/research_projects/rag/consolidate_rag_checkpoint.py +++ b/examples/research_projects/rag/consolidate_rag_checkpoint.py @@ -80,7 +80,10 @@ if __name__ == "__main__": parser.add_argument( "--config_name_or_path", type=str, - help="Identifier of the model config to use, if not provided, resolves to a base config for a given ``model_type``", + help=( + "Identifier of the model config to use, if not provided, resolves to a base config for a given" + " ``model_type``" + ), ) args = parser.parse_args() diff --git a/examples/research_projects/rag/eval_rag.py b/examples/research_projects/rag/eval_rag.py index 05f78c3d6c..a8e7abbca6 100644 --- a/examples/research_projects/rag/eval_rag.py +++ b/examples/research_projects/rag/eval_rag.py @@ -146,7 +146,10 @@ def get_args(): "--model_type", choices=["rag_sequence", "rag_token", "bart"], type=str, - help="RAG model type: rag_sequence, rag_token or bart, if none specified, the type is inferred from the model_name_or_path", + help=( + "RAG model type: rag_sequence, rag_token or bart, if none specified, the type is inferred from the" + " model_name_or_path" + ), ) parser.add_argument( "--index_name", @@ -174,7 +177,10 @@ def get_args(): choices=["e2e", "retrieval"], default="e2e", type=str, - help="Evaluation mode, e2e calculates exact match and F1 of the downstream task, retrieval calculates precision@k.", + help=( + "Evaluation mode, e2e calculates exact match and F1 of the downstream task, retrieval calculates" + " precision@k." + ), ) parser.add_argument("--k", default=1, type=int, help="k for the precision@k calculation") parser.add_argument( @@ -196,9 +202,11 @@ def get_args(): default="qa", type=str, choices=["qa", "ans"], - help="Format of the gold data file" - "qa - a single line in the following format: question [tab] answer_list" - "ans - a single line of the gold file contains the expected answer string", + help=( + "Format of the gold data file" + "qa - a single line in the following format: question [tab] answer_list" + "ans - a single line of the gold file contains the expected answer string" + ), ) parser.add_argument( "--predictions_path", diff --git a/examples/research_projects/rag/finetune_rag.py b/examples/research_projects/rag/finetune_rag.py index 2fd4ef7659..f5cef614e2 100644 --- a/examples/research_projects/rag/finetune_rag.py +++ b/examples/research_projects/rag/finetune_rag.py @@ -383,29 +383,37 @@ class GenerativeQAModule(BaseTransformer): "--max_source_length", default=128, type=int, - help="The maximum total input sequence length after tokenization. Sequences longer " - "than this will be truncated, sequences shorter will be padded.", + help=( + "The maximum total input sequence length after tokenization. Sequences longer " + "than this will be truncated, sequences shorter will be padded." + ), ) parser.add_argument( "--max_target_length", default=25, type=int, - help="The maximum total input sequence length after tokenization. Sequences longer " - "than this will be truncated, sequences shorter will be padded.", + help=( + "The maximum total input sequence length after tokenization. Sequences longer " + "than this will be truncated, sequences shorter will be padded." + ), ) parser.add_argument( "--val_max_target_length", default=25, type=int, - help="The maximum total input sequence length after tokenization. Sequences longer " - "than this will be truncated, sequences shorter will be padded.", + help=( + "The maximum total input sequence length after tokenization. Sequences longer " + "than this will be truncated, sequences shorter will be padded." + ), ) parser.add_argument( "--test_max_target_length", default=25, type=int, - help="The maximum total input sequence length after tokenization. Sequences longer " - "than this will be truncated, sequences shorter will be padded.", + help=( + "The maximum total input sequence length after tokenization. Sequences longer " + "than this will be truncated, sequences shorter will be padded." + ), ) parser.add_argument("--logger_name", type=str, choices=["default", "wandb", "wandb_shared"], default="default") parser.add_argument("--n_train", type=int, default=-1, required=False, help="# examples. -1 means use all.") @@ -423,7 +431,10 @@ class GenerativeQAModule(BaseTransformer): type=int, default=-1, required=False, - help="-1 means never early stop. early_stopping_patience is measured in validation checks, not epochs. So val_check_interval will effect it.", + help=( + "-1 means never early stop. early_stopping_patience is measured in validation checks, not epochs. So" + " val_check_interval will effect it." + ), ) parser.add_argument( "--distributed-port", type=int, default=-1, required=False, help="Port number for distributed training." @@ -432,7 +443,10 @@ class GenerativeQAModule(BaseTransformer): "--model_type", choices=["rag_sequence", "rag_token", "bart", "t5"], type=str, - help="RAG model type: sequence or token, if none specified, the type is inferred from the model_name_or_path", + help=( + "RAG model type: sequence or token, if none specified, the type is inferred from the" + " model_name_or_path" + ), ) return parser @@ -442,39 +456,53 @@ class GenerativeQAModule(BaseTransformer): "--index_name", type=str, default=None, - help="Name of the index to use: 'hf' for a canonical dataset from the datasets library (default), 'custom' for a local index, or 'legacy' for the orignal one)", + help=( + "Name of the index to use: 'hf' for a canonical dataset from the datasets library (default), 'custom'" + " for a local index, or 'legacy' for the orignal one)" + ), ) parser.add_argument( "--passages_path", type=str, default=None, - help="Path to the dataset of passages for custom index. More info about custom indexes in the RagRetriever documentation as well as in `examples/rag/use_own_knowledge_dataset.py`", + help=( + "Path to the dataset of passages for custom index. More info about custom indexes in the RagRetriever" + " documentation as well as in `examples/rag/use_own_knowledge_dataset.py`" + ), ) parser.add_argument( "--index_path", type=str, default=None, - help="Path to the faiss index for custom index. More info about custom indexes in the RagRetriever documentation as well as in `examples/rag/use_own_knowledge_dataset.py`", + help=( + "Path to the faiss index for custom index. More info about custom indexes in the RagRetriever" + " documentation as well as in `examples/rag/use_own_knowledge_dataset.py`" + ), ) parser.add_argument( "--distributed_retriever", choices=["ray", "pytorch"], type=str, default="pytorch", - help="What implementation to use for distributed retriever? If " - "pytorch is selected, the index is loaded on training " - "worker 0, and torch.distributed is used to handle " - "communication between training worker 0, and the other " - "training workers. If ray is selected, the Ray library is " - "used to create load the index on separate processes, " - "and Ray handles the communication between the training " - "workers and the retrieval actors.", + help=( + "What implementation to use for distributed retriever? If " + "pytorch is selected, the index is loaded on training " + "worker 0, and torch.distributed is used to handle " + "communication between training worker 0, and the other " + "training workers. If ray is selected, the Ray library is " + "used to create load the index on separate processes, " + "and Ray handles the communication between the training " + "workers and the retrieval actors." + ), ) parser.add_argument( "--use_dummy_dataset", type=bool, default=False, - help="Whether to use the dummy version of the dataset index. More info about custom indexes in the RagRetriever documentation as well as in `examples/rag/use_own_knowledge_dataset.py`", + help=( + "Whether to use the dummy version of the dataset index. More info about custom indexes in the" + " RagRetriever documentation as well as in `examples/rag/use_own_knowledge_dataset.py`" + ), ) return parser @@ -485,18 +513,22 @@ class GenerativeQAModule(BaseTransformer): "--ray-address", default="auto", type=str, - help="The address of the Ray cluster to connect to. If not " - "specified, Ray will attempt to automatically detect the " - "cluster. Has no effect if pytorch is used as the distributed " - "retriever.", + help=( + "The address of the Ray cluster to connect to. If not " + "specified, Ray will attempt to automatically detect the " + "cluster. Has no effect if pytorch is used as the distributed " + "retriever." + ), ) parser.add_argument( "--num_retrieval_workers", type=int, default=1, - help="The number of retrieval actors to use when Ray is selected" - "for the distributed retriever. Has no effect when " - "distributed_retriever is set to pytorch.", + help=( + "The number of retrieval actors to use when Ray is selected" + "for the distributed retriever. Has no effect when " + "distributed_retriever is set to pytorch." + ), ) return parser @@ -514,7 +546,7 @@ def main(args=None, model=None) -> GenerativeQAModule: named_actors = [] if args.distributed_retriever == "ray" and args.gpus > 1: if not is_ray_available(): - raise RuntimeError("Please install Ray to use the Ray " "distributed retriever.") + raise RuntimeError("Please install Ray to use the Ray distributed retriever.") # Connect to an existing Ray cluster. try: ray.init(address=args.ray_address, namespace="rag") diff --git a/examples/research_projects/rag/lightning_base.py b/examples/research_projects/rag/lightning_base.py index 1e0f67627e..77830a4760 100644 --- a/examples/research_projects/rag/lightning_base.py +++ b/examples/research_projects/rag/lightning_base.py @@ -321,8 +321,10 @@ def add_generic_args(parser, root_dir) -> None: "--fp16_opt_level", type=str, default="O2", - help="For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']." - "See details at https://nvidia.github.io/apex/amp.html", + help=( + "For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']." + "See details at https://nvidia.github.io/apex/amp.html" + ), ) parser.add_argument("--n_tpu_cores", dest="tpu_cores", type=int) parser.add_argument("--max_grad_norm", dest="gradient_clip_val", default=1.0, type=float, help="Max gradient norm") diff --git a/examples/research_projects/rag/use_own_knowledge_dataset.py b/examples/research_projects/rag/use_own_knowledge_dataset.py index 269765caab..dc08f50822 100644 --- a/examples/research_projects/rag/use_own_knowledge_dataset.py +++ b/examples/research_projects/rag/use_own_knowledge_dataset.py @@ -154,7 +154,10 @@ class RagExampleArguments: dpr_ctx_encoder_model_name: str = field( default="facebook/dpr-ctx_encoder-multiset-base", metadata={ - "help": "The DPR context encoder model to use. Either 'facebook/dpr-ctx_encoder-single-nq-base' or 'facebook/dpr-ctx_encoder-multiset-base'" + "help": ( + "The DPR context encoder model to use. Either 'facebook/dpr-ctx_encoder-single-nq-base' or" + " 'facebook/dpr-ctx_encoder-multiset-base'" + ) }, ) output_dir: Optional[str] = field( @@ -188,7 +191,9 @@ class IndexHnswArguments: m: int = field( default=128, metadata={ - "help": "The number of bi-directional links created for every new element during the HNSW index construction." + "help": ( + "The number of bi-directional links created for every new element during the HNSW index construction." + ) }, ) diff --git a/examples/research_projects/robust-speech-event/eval.py b/examples/research_projects/robust-speech-event/eval.py index 53cd244daf..32e3d1f2c7 100755 --- a/examples/research_projects/robust-speech-event/eval.py +++ b/examples/research_projects/robust-speech-event/eval.py @@ -24,7 +24,7 @@ def log_results(result: Dataset, args: Dict[str, str]): cer_result = cer.compute(references=result["target"], predictions=result["prediction"]) # print & log results - result_str = f"WER: {wer_result}\n" f"CER: {cer_result}" + result_str = f"WER: {wer_result}\nCER: {cer_result}" print(result_str) with open(f"{dataset_id}_eval_results.txt", "w") as f: diff --git a/examples/research_projects/robust-speech-event/run_speech_recognition_ctc_bnb.py b/examples/research_projects/robust-speech-event/run_speech_recognition_ctc_bnb.py index 2317367e7c..521036c78e 100755 --- a/examples/research_projects/robust-speech-event/run_speech_recognition_ctc_bnb.py +++ b/examples/research_projects/robust-speech-event/run_speech_recognition_ctc_bnb.py @@ -103,9 +103,11 @@ class ModelArguments: mask_time_prob: float = field( default=0.05, metadata={ - "help": "Probability of each feature vector along the time axis to be chosen as the start of the vector" - "span to be masked. Approximately ``mask_time_prob * sequence_length // mask_time_length`` feature" - "vectors will be masked along the time axis." + "help": ( + "Probability of each feature vector along the time axis to be chosen as the start of the vector" + "span to be masked. Approximately ``mask_time_prob * sequence_length // mask_time_length`` feature" + "vectors will be masked along the time axis." + ) }, ) mask_time_length: int = field( @@ -115,8 +117,11 @@ class ModelArguments: mask_feature_prob: float = field( default=0.0, metadata={ - "help": "Probability of each feature vector along the feature axis to be chosen as the start of the vector" - "span to be masked. Approximately ``mask_feature_prob * sequence_length // mask_feature_length`` feature bins will be masked along the time axis." + "help": ( + "Probability of each feature vector along the feature axis to be chosen as the start of the vectorspan" + " to be masked. Approximately ``mask_feature_prob * sequence_length // mask_feature_length`` feature" + " bins will be masked along the time axis." + ) }, ) mask_feature_length: int = field( @@ -175,15 +180,19 @@ class DataTrainingArguments: max_train_samples: Optional[int] = field( default=None, metadata={ - "help": "For debugging purposes or quicker training, truncate the number of training examples to this " - "value if set." + "help": ( + "For debugging purposes or quicker training, truncate the number of training examples to this " + "value if set." + ) }, ) max_eval_samples: Optional[int] = field( default=None, metadata={ - "help": "For debugging purposes or quicker training, truncate the number of validation examples to this " - "value if set." + "help": ( + "For debugging purposes or quicker training, truncate the number of validation examples to this " + "value if set." + ) }, ) chars_to_ignore: Optional[List[str]] = list_field( @@ -197,7 +206,10 @@ class DataTrainingArguments: max_duration_in_seconds: float = field( default=20.0, metadata={ - "help": "Filter audio files that are longer than `max_duration_in_seconds` seconds to 'max_duration_in_seconds`" + "help": ( + "Filter audio files that are longer than `max_duration_in_seconds` seconds to" + " 'max_duration_in_seconds`" + ) }, ) min_duration_in_seconds: float = field( @@ -206,17 +218,21 @@ class DataTrainingArguments: preprocessing_only: bool = field( default=False, metadata={ - "help": "Whether to only do data preprocessing and skip training. " - "This is especially useful when data preprocessing errors out in distributed training due to timeout. " - "In this case, one should run the preprocessing in a non-distributed setup with `preprocessing_only=True` " - "so that the cached datasets can consequently be loaded in distributed training" + "help": ( + "Whether to only do data preprocessing and skip training. This is especially useful when data" + " preprocessing errors out in distributed training due to timeout. In this case, one should run the" + " preprocessing in a non-distributed setup with `preprocessing_only=True` so that the cached datasets" + " can consequently be loaded in distributed training" + ) }, ) use_auth_token: bool = field( default=False, metadata={ - "help": "If :obj:`True`, will use the token generated when running" - ":obj:`transformers-cli login` as HTTP bearer authorization for remote files." + "help": ( + "If :obj:`True`, will use the token generated when running" + ":obj:`transformers-cli login` as HTTP bearer authorization for remote files." + ) }, ) unk_token: str = field( @@ -234,10 +250,12 @@ class DataTrainingArguments: phoneme_language: Optional[str] = field( default=None, metadata={ - "help": "The target language that should be used be" - " passed to the tokenizer for tokenization. Note that" - " this is only relevant if the model classifies the" - " input audio to a sequence of phoneme sequences." + "help": ( + "The target language that should be used be" + " passed to the tokenizer for tokenization. Note that" + " this is only relevant if the model classifies the" + " input audio to a sequence of phoneme sequences." + ) }, ) @@ -406,9 +424,9 @@ def main(): if data_args.audio_column_name not in raw_datasets["train"].column_names: raise ValueError( - f"--audio_column_name '{data_args.audio_column_name}' not found in dataset '{data_args.dataset_name}'. " - "Make sure to set `--audio_column_name` to the correct audio column - one of " - f"{', '.join(raw_datasets['train'].column_names)}." + f"--audio_column_name '{data_args.audio_column_name}' not found in dataset '{data_args.dataset_name}'." + " Make sure to set `--audio_column_name` to the correct audio column - one of" + f" {', '.join(raw_datasets['train'].column_names)}." ) if data_args.text_column_name not in raw_datasets["train"].column_names: @@ -743,7 +761,10 @@ def main(): "finetuned_from": model_args.model_name_or_path, "tasks": "speech-recognition", "tags": ["automatic-speech-recognition", data_args.dataset_name], - "dataset_args": f"Config: {config_name}, Training split: {data_args.train_split_name}, Eval split: {data_args.eval_split_name}", + "dataset_args": ( + f"Config: {config_name}, Training split: {data_args.train_split_name}, Eval split:" + f" {data_args.eval_split_name}" + ), "dataset": f"{data_args.dataset_name.upper()} - {config_name.upper()}", } if "common_voice" in data_args.dataset_name: diff --git a/examples/research_projects/robust-speech-event/run_speech_recognition_ctc_streaming.py b/examples/research_projects/robust-speech-event/run_speech_recognition_ctc_streaming.py index 9e69178088..d357bc4696 100644 --- a/examples/research_projects/robust-speech-event/run_speech_recognition_ctc_streaming.py +++ b/examples/research_projects/robust-speech-event/run_speech_recognition_ctc_streaming.py @@ -102,9 +102,11 @@ class ModelArguments: mask_time_prob: float = field( default=0.05, metadata={ - "help": "Probability of each feature vector along the time axis to be chosen as the start of the vector" - "span to be masked. Approximately ``mask_time_prob * sequence_length // mask_time_length`` feature" - "vectors will be masked along the time axis." + "help": ( + "Probability of each feature vector along the time axis to be chosen as the start of the vector" + "span to be masked. Approximately ``mask_time_prob * sequence_length // mask_time_length`` feature" + "vectors will be masked along the time axis." + ) }, ) mask_time_length: int = field( @@ -114,8 +116,11 @@ class ModelArguments: mask_feature_prob: float = field( default=0.0, metadata={ - "help": "Probability of each feature vector along the feature axis to be chosen as the start of the vector" - "span to be masked. Approximately ``mask_feature_prob * sequence_length // mask_feature_length`` feature bins will be masked along the time axis." + "help": ( + "Probability of each feature vector along the feature axis to be chosen as the start of the vectorspan" + " to be masked. Approximately ``mask_feature_prob * sequence_length // mask_feature_length`` feature" + " bins will be masked along the time axis." + ) }, ) mask_feature_length: int = field( @@ -147,8 +152,10 @@ class DataTrainingArguments: train_split_name: str = field( default="train+validation", metadata={ - "help": "The name of the training data set split to use (via the datasets library). Defaults to " - "'train+validation'" + "help": ( + "The name of the training data set split to use (via the datasets library). Defaults to " + "'train+validation'" + ) }, ) eval_split_name: str = field( @@ -175,22 +182,28 @@ class DataTrainingArguments: max_train_samples: Optional[int] = field( default=None, metadata={ - "help": "For debugging purposes or quicker training, truncate the number of training examples to this " - "value if set." + "help": ( + "For debugging purposes or quicker training, truncate the number of training examples to this " + "value if set." + ) }, ) max_eval_samples: Optional[int] = field( default=None, metadata={ - "help": "For debugging purposes or quicker training, truncate the number of validation examples to this " - "value if set." + "help": ( + "For debugging purposes or quicker training, truncate the number of validation examples to this " + "value if set." + ) }, ) shuffle_buffer_size: Optional[int] = field( default=500, metadata={ - "help": "The number of streamed examples to download before shuffling them. The large the buffer, " - "the closer it is to real offline shuffling." + "help": ( + "The number of streamed examples to download before shuffling them. The large the buffer, " + "the closer it is to real offline shuffling." + ) }, ) chars_to_ignore: Optional[List[str]] = list_field( @@ -208,26 +221,32 @@ class DataTrainingArguments: preprocessing_only: bool = field( default=False, metadata={ - "help": "Whether to only do data preprocessing and skip training. " - "This is especially useful when data preprocessing errors out in distributed training due to timeout. " - "In this case, one should run the preprocessing in a non-distributed setup with `preprocessing_only=True` " - "so that the cached datasets can consequently be loaded in distributed training" + "help": ( + "Whether to only do data preprocessing and skip training. This is especially useful when data" + " preprocessing errors out in distributed training due to timeout. In this case, one should run the" + " preprocessing in a non-distributed setup with `preprocessing_only=True` so that the cached datasets" + " can consequently be loaded in distributed training" + ) }, ) use_auth_token: bool = field( default=False, metadata={ - "help": "If :obj:`True`, will use the token generated when running" - ":obj:`transformers-cli login` as HTTP bearer authorization for remote files." + "help": ( + "If :obj:`True`, will use the token generated when running" + ":obj:`transformers-cli login` as HTTP bearer authorization for remote files." + ) }, ) phoneme_language: Optional[str] = field( default=None, metadata={ - "help": "The target language that should be used be" - " passed to the tokenizer for tokenization. Note that" - " this is only relevant if the model classifies the" - " input audio to a sequence of phoneme sequences." + "help": ( + "The target language that should be used be" + " passed to the tokenizer for tokenization. Note that" + " this is only relevant if the model classifies the" + " input audio to a sequence of phoneme sequences." + ) }, ) @@ -393,9 +412,9 @@ def main(): if data_args.audio_column_name not in raw_column_names["train"]: raise ValueError( - f"--audio_column_name '{data_args.audio_column_name}' not found in dataset '{data_args.dataset_name}'. " - "Make sure to set `--audio_column_name` to the correct audio column - one of " - f"{', '.join(raw_column_names['train'])}." + f"--audio_column_name '{data_args.audio_column_name}' not found in dataset '{data_args.dataset_name}'." + " Make sure to set `--audio_column_name` to the correct audio column - one of" + f" {', '.join(raw_column_names['train'])}." ) if data_args.text_column_name not in raw_column_names["train"]: @@ -641,7 +660,10 @@ def main(): "finetuned_from": model_args.model_name_or_path, "tasks": "speech-recognition", "tags": ["automatic-speech-recognition", data_args.dataset_name], - "dataset_args": f"Config: {config_name}, Training split: {data_args.train_split_name}, Eval split: {data_args.eval_split_name}", + "dataset_args": ( + f"Config: {config_name}, Training split: {data_args.train_split_name}, Eval split:" + f" {data_args.eval_split_name}" + ), "dataset": f"{data_args.dataset_name.upper()} - {config_name.upper()}", } if "common_voice" in data_args.dataset_name: diff --git a/examples/research_projects/self-training-text-classification/finetuning.py b/examples/research_projects/self-training-text-classification/finetuning.py index 8ad92359b6..eeb0a285df 100644 --- a/examples/research_projects/self-training-text-classification/finetuning.py +++ b/examples/research_projects/self-training-text-classification/finetuning.py @@ -100,15 +100,19 @@ class FTDataArguments: max_length: Optional[int] = dataclasses.field( default=128, metadata={ - "help": "The maximum total input sequence length after tokenization. Sequences longer " - "than this will be truncated, sequences shorter will be padded." + "help": ( + "The maximum total input sequence length after tokenization. Sequences longer " + "than this will be truncated, sequences shorter will be padded." + ) }, ) pad_to_max_length: Optional[bool] = dataclasses.field( default=False, metadata={ - "help": "Whether to pad all samples to `max_seq_length`. " - "If False, will pad the samples dynamically when batching to the maximum length in the batch." + "help": ( + "Whether to pad all samples to `max_seq_length`. " + "If False, will pad the samples dynamically when batching to the maximum length in the batch." + ) }, ) @@ -147,7 +151,10 @@ class FTTrainingArguments: weight_decay: Optional[float] = dataclasses.field( default=0.0, metadata={ - "help": "The weight decay to apply (if not zero) to all layers except all bias and LayerNorm weights in [`AdamW`] optimizer." + "help": ( + "The weight decay to apply (if not zero) to all layers except all bias and LayerNorm weights in" + " [`AdamW`] optimizer." + ) }, ) learning_rate: Optional[float] = dataclasses.field( @@ -157,13 +164,18 @@ class FTTrainingArguments: gradient_accumulation_steps: Optional[int] = dataclasses.field( default=1, metadata={ - "help": "Number of updates steps to accumulate the gradients for, before performing a backward/update pass." + "help": ( + "Number of updates steps to accumulate the gradients for, before performing a backward/update pass." + ) }, ) max_steps: Optional[int] = dataclasses.field( default=-1, metadata={ - "help": "If set to a positive number, the total number of training steps to perform. Overrides `num_train_epochs`." + "help": ( + "If set to a positive number, the total number of training steps to perform. Overrides" + " `num_train_epochs`." + ) }, ) lr_scheduler_type: Optional[str] = dataclasses.field( @@ -172,7 +184,10 @@ class FTTrainingArguments: warmup_steps: Optional[int] = dataclasses.field( default=1, metadata={ - "help": "Number of steps used for a linear warmup from 0 to `learning_rate`. Overrides any effect of `warmup_ratio`." + "help": ( + "Number of steps used for a linear warmup from 0 to `learning_rate`. Overrides any effect of" + " `warmup_ratio`." + ) }, ) evaluation_strategy: Optional[str] = dataclasses.field( diff --git a/examples/research_projects/seq2seq-distillation/callbacks.py b/examples/research_projects/seq2seq-distillation/callbacks.py index 388b6d53dd..6f6ed5dd58 100644 --- a/examples/research_projects/seq2seq-distillation/callbacks.py +++ b/examples/research_projects/seq2seq-distillation/callbacks.py @@ -93,7 +93,8 @@ def get_checkpoint_callback(output_dir, metric, save_top_k=1, lower_is_better=Fa exp = "{val_avg_loss:.4f}-{step_count}" else: raise NotImplementedError( - f"seq2seq callbacks only support rouge2, bleu and loss, got {metric}, You can make your own by adding to this function." + f"seq2seq callbacks only support rouge2, bleu and loss, got {metric}, You can make your own by adding to" + " this function." ) checkpoint_callback = ModelCheckpoint( diff --git a/examples/research_projects/seq2seq-distillation/distillation.py b/examples/research_projects/seq2seq-distillation/distillation.py index 1f9106f0c0..5a403be8d5 100755 --- a/examples/research_projects/seq2seq-distillation/distillation.py +++ b/examples/research_projects/seq2seq-distillation/distillation.py @@ -52,9 +52,10 @@ class SummarizationDistiller(SummarizationModule): student.config.length_penalty = hparams.length_penalty hparams.tokenizer_name = hparams.teacher # Use teacher's tokenizer super().__init__(hparams, model=student, config=student.config) - assert ( - student.config.model_type == teacher.config.model_type - ), f"teacher, student model types should be the same, got {student.config.model_type} != {teacher.config.model_type}" + assert student.config.model_type == teacher.config.model_type, ( + f"teacher, student model types should be the same, got {student.config.model_type} !=" + f" {teacher.config.model_type}" + ) if student.config.model_type == "t5": student_encoder_layers = len(student.get_encoder().block) diff --git a/examples/research_projects/seq2seq-distillation/finetune.py b/examples/research_projects/seq2seq-distillation/finetune.py index 5874509377..c20b361d58 100755 --- a/examples/research_projects/seq2seq-distillation/finetune.py +++ b/examples/research_projects/seq2seq-distillation/finetune.py @@ -303,29 +303,37 @@ class SummarizationModule(BaseTransformer): "--max_source_length", default=1024, type=int, - help="The maximum total input sequence length after tokenization. Sequences longer " - "than this will be truncated, sequences shorter will be padded.", + help=( + "The maximum total input sequence length after tokenization. Sequences longer " + "than this will be truncated, sequences shorter will be padded." + ), ) parser.add_argument( "--max_target_length", default=56, type=int, - help="The maximum total input sequence length after tokenization. Sequences longer " - "than this will be truncated, sequences shorter will be padded.", + help=( + "The maximum total input sequence length after tokenization. Sequences longer " + "than this will be truncated, sequences shorter will be padded." + ), ) parser.add_argument( "--val_max_target_length", default=142, # these defaults are optimized for CNNDM. For xsum, see README.md. type=int, - help="The maximum total input sequence length after tokenization. Sequences longer " - "than this will be truncated, sequences shorter will be padded.", + help=( + "The maximum total input sequence length after tokenization. Sequences longer " + "than this will be truncated, sequences shorter will be padded." + ), ) parser.add_argument( "--test_max_target_length", default=142, type=int, - help="The maximum total input sequence length after tokenization. Sequences longer " - "than this will be truncated, sequences shorter will be padded.", + help=( + "The maximum total input sequence length after tokenization. Sequences longer " + "than this will be truncated, sequences shorter will be padded." + ), ) parser.add_argument("--freeze_encoder", action="store_true") parser.add_argument("--freeze_embeds", action="store_true") @@ -353,7 +361,10 @@ class SummarizationModule(BaseTransformer): type=int, default=-1, required=False, - help="-1 means never early stop. early_stopping_patience is measured in validation checks, not epochs. So val_check_interval will effect it.", + help=( + "-1 means never early stop. early_stopping_patience is measured in validation checks, not epochs. So" + " val_check_interval will effect it." + ), ) return parser diff --git a/examples/research_projects/seq2seq-distillation/lightning_base.py b/examples/research_projects/seq2seq-distillation/lightning_base.py index b7f53076e3..b3104a25a8 100644 --- a/examples/research_projects/seq2seq-distillation/lightning_base.py +++ b/examples/research_projects/seq2seq-distillation/lightning_base.py @@ -312,8 +312,10 @@ def add_generic_args(parser, root_dir) -> None: "--fp16_opt_level", type=str, default="O2", - help="For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']." - "See details at https://nvidia.github.io/apex/amp.html", + help=( + "For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']." + "See details at https://nvidia.github.io/apex/amp.html" + ), ) parser.add_argument("--n_tpu_cores", dest="tpu_cores", type=int) parser.add_argument("--max_grad_norm", dest="gradient_clip_val", default=1.0, type=float, help="Max gradient norm") diff --git a/examples/research_projects/seq2seq-distillation/make_student.py b/examples/research_projects/seq2seq-distillation/make_student.py index 8d70292d0e..a4021505b9 100644 --- a/examples/research_projects/seq2seq-distillation/make_student.py +++ b/examples/research_projects/seq2seq-distillation/make_student.py @@ -58,7 +58,8 @@ def pick_layers_to_copy(n_student, n_teacher): except KeyError: if n_student != n_teacher: warnings.warn( - f"no hardcoded layers to copy for teacher {n_teacher} -> student {n_student}, defaulting to first {n_student}" + f"no hardcoded layers to copy for teacher {n_teacher} -> student {n_student}, defaulting to first" + f" {n_student}" ) return list(range(n_student)) @@ -144,7 +145,8 @@ def create_student_by_copying_alternating_layers( if copy_first_teacher_layers: # Our copying is done. We just log and save e_layers_to_copy, d_layers_to_copy = list(range(e)), list(range(d)) logger.info( - f"Copied encoder layers {e_layers_to_copy} and decoder layers {d_layers_to_copy}. Saving them to {save_path}" + f"Copied encoder layers {e_layers_to_copy} and decoder layers {d_layers_to_copy}. Saving them to" + f" {save_path}" ) student.save_pretrained(save_path) return student, e_layers_to_copy, d_layers_to_copy diff --git a/examples/research_projects/seq2seq-distillation/run_eval.py b/examples/research_projects/seq2seq-distillation/run_eval.py index de752c7df1..3f685884e8 100755 --- a/examples/research_projects/seq2seq-distillation/run_eval.py +++ b/examples/research_projects/seq2seq-distillation/run_eval.py @@ -108,7 +108,10 @@ def run_generate(verbose=True): nargs="?", type=str, const=datetime_now(), - help="use in conjunction w/ --dump-args to print with the results whatever other info you'd like, e.g. lang=en-ru. If no value is passed, the current datetime string will be used.", + help=( + "use in conjunction w/ --dump-args to print with the results whatever other info you'd like, e.g." + " lang=en-ru. If no value is passed, the current datetime string will be used." + ), ) # Unspecified args like --num_beams=2 --decoder_start_token_id=4 are passed to model.generate args, rest = parser.parse_known_args() diff --git a/examples/research_projects/tapex/run_tabfact_with_tapex.py b/examples/research_projects/tapex/run_tabfact_with_tapex.py index 0ed573ad9c..19c21c3394 100644 --- a/examples/research_projects/tapex/run_tabfact_with_tapex.py +++ b/examples/research_projects/tapex/run_tabfact_with_tapex.py @@ -77,8 +77,10 @@ class DataTrainingArguments: max_seq_length: int = field( default=1024, metadata={ - "help": "The maximum total input sequence length after tokenization. Sequences longer " - "than this will be truncated, sequences shorter will be padded." + "help": ( + "The maximum total input sequence length after tokenization. Sequences longer " + "than this will be truncated, sequences shorter will be padded." + ) }, ) overwrite_cache: bool = field( @@ -87,29 +89,37 @@ class DataTrainingArguments: pad_to_max_length: bool = field( default=False, metadata={ - "help": "Whether to pad all samples to `max_seq_length`. " - "If False, will pad the samples dynamically when batching to the maximum length in the batch." + "help": ( + "Whether to pad all samples to `max_seq_length`. " + "If False, will pad the samples dynamically when batching to the maximum length in the batch." + ) }, ) max_train_samples: Optional[int] = field( default=None, metadata={ - "help": "For debugging purposes or quicker training, truncate the number of training examples to this " - "value if set." + "help": ( + "For debugging purposes or quicker training, truncate the number of training examples to this " + "value if set." + ) }, ) max_eval_samples: Optional[int] = field( default=None, metadata={ - "help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this " - "value if set." + "help": ( + "For debugging purposes or quicker training, truncate the number of evaluation examples to this " + "value if set." + ) }, ) max_predict_samples: Optional[int] = field( default=None, metadata={ - "help": "For debugging purposes or quicker training, truncate the number of prediction examples to this " - "value if set." + "help": ( + "For debugging purposes or quicker training, truncate the number of prediction examples to this " + "value if set." + ) }, ) train_file: Optional[str] = field( @@ -164,8 +174,10 @@ class ModelArguments: use_auth_token: bool = field( default=False, metadata={ - "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script " - "with private models)." + "help": ( + "Will use the token generated when running `transformers-cli login` (necessary to use this script " + "with private models)." + ) }, ) diff --git a/examples/research_projects/tapex/run_wikisql_with_tapex.py b/examples/research_projects/tapex/run_wikisql_with_tapex.py index 594c83cb6b..461bfbec9a 100644 --- a/examples/research_projects/tapex/run_wikisql_with_tapex.py +++ b/examples/research_projects/tapex/run_wikisql_with_tapex.py @@ -82,8 +82,10 @@ class ModelArguments: tokenizer_name: Optional[str] = field( default=None, metadata={ - "help": "Pretrained tokenizer name or path if not the same as model_name. " - "By default we use BART-large tokenizer for TAPEX-large." + "help": ( + "Pretrained tokenizer name or path if not the same as model_name. " + "By default we use BART-large tokenizer for TAPEX-large." + ) }, ) cache_dir: Optional[str] = field( @@ -101,8 +103,10 @@ class ModelArguments: use_auth_token: bool = field( default=False, metadata={ - "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script " - "with private models)." + "help": ( + "Will use the token generated when running `transformers-cli login` (necessary to use this script " + "with private models)." + ) }, ) @@ -125,14 +129,15 @@ class DataTrainingArguments: validation_file: Optional[str] = field( default=None, metadata={ - "help": "An optional input evaluation data file to evaluate the metrics (rouge) on " - "(a jsonlines or csv file)." + "help": ( + "An optional input evaluation data file to evaluate the metrics (rouge) on (a jsonlines or csv file)." + ) }, ) test_file: Optional[str] = field( default=None, metadata={ - "help": "An optional input test data file to evaluate the metrics (rouge) on " "(a jsonlines or csv file)." + "help": "An optional input test data file to evaluate the metrics (rouge) on (a jsonlines or csv file)." }, ) overwrite_cache: bool = field( @@ -145,60 +150,76 @@ class DataTrainingArguments: max_source_length: Optional[int] = field( default=1024, metadata={ - "help": "The maximum total input sequence length after tokenization. Sequences longer " - "than this will be truncated, sequences shorter will be padded." + "help": ( + "The maximum total input sequence length after tokenization. Sequences longer " + "than this will be truncated, sequences shorter will be padded." + ) }, ) max_target_length: Optional[int] = field( default=128, metadata={ - "help": "The maximum total sequence length for target text after tokenization. Sequences longer " - "than this will be truncated, sequences shorter will be padded." + "help": ( + "The maximum total sequence length for target text after tokenization. Sequences longer " + "than this will be truncated, sequences shorter will be padded." + ) }, ) val_max_target_length: Optional[int] = field( default=None, metadata={ - "help": "The maximum total sequence length for validation target text after tokenization. Sequences longer " - "than this will be truncated, sequences shorter will be padded. Will default to `max_target_length`." - "This argument is also used to override the ``max_length`` param of ``model.generate``, which is used " - "during ``evaluate`` and ``predict``." + "help": ( + "The maximum total sequence length for validation target text after tokenization. Sequences longer " + "than this will be truncated, sequences shorter will be padded. Will default to `max_target_length`." + "This argument is also used to override the ``max_length`` param of ``model.generate``, which is used " + "during ``evaluate`` and ``predict``." + ) }, ) pad_to_max_length: bool = field( default=False, metadata={ - "help": "Whether to pad all samples to model maximum sentence length. " - "If False, will pad the samples dynamically when batching to the maximum length in the batch. More " - "efficient on GPU but very bad for TPU." + "help": ( + "Whether to pad all samples to model maximum sentence length. " + "If False, will pad the samples dynamically when batching to the maximum length in the batch. More " + "efficient on GPU but very bad for TPU." + ) }, ) max_train_samples: Optional[int] = field( default=None, metadata={ - "help": "For debugging purposes or quicker training, truncate the number of training examples to this " - "value if set." + "help": ( + "For debugging purposes or quicker training, truncate the number of training examples to this " + "value if set." + ) }, ) max_eval_samples: Optional[int] = field( default=None, metadata={ - "help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this " - "value if set." + "help": ( + "For debugging purposes or quicker training, truncate the number of evaluation examples to this " + "value if set." + ) }, ) max_predict_samples: Optional[int] = field( default=None, metadata={ - "help": "For debugging purposes or quicker training, truncate the number of prediction examples to this " - "value if set." + "help": ( + "For debugging purposes or quicker training, truncate the number of prediction examples to this " + "value if set." + ) }, ) num_beams: Optional[int] = field( default=None, metadata={ - "help": "Number of beams to use for evaluation. This argument will be passed to ``model.generate``, " - "which is used during ``evaluate`` and ``predict``." + "help": ( + "Number of beams to use for evaluation. This argument will be passed to ``model.generate``, " + "which is used during ``evaluate`` and ``predict``." + ) }, ) ignore_pad_token_for_loss: bool = field( diff --git a/examples/research_projects/tapex/run_wikitablequestions_with_tapex.py b/examples/research_projects/tapex/run_wikitablequestions_with_tapex.py index 4398309566..1750adc546 100644 --- a/examples/research_projects/tapex/run_wikitablequestions_with_tapex.py +++ b/examples/research_projects/tapex/run_wikitablequestions_with_tapex.py @@ -80,8 +80,10 @@ class ModelArguments: tokenizer_name: Optional[str] = field( default=None, metadata={ - "help": "Pretrained tokenizer name or path if not the same as model_name. " - "By default we use BART-large tokenizer for TAPEX-large." + "help": ( + "Pretrained tokenizer name or path if not the same as model_name. " + "By default we use BART-large tokenizer for TAPEX-large." + ) }, ) cache_dir: Optional[str] = field( @@ -99,8 +101,10 @@ class ModelArguments: use_auth_token: bool = field( default=False, metadata={ - "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script " - "with private models)." + "help": ( + "Will use the token generated when running `transformers-cli login` (necessary to use this script " + "with private models)." + ) }, ) @@ -123,14 +127,15 @@ class DataTrainingArguments: validation_file: Optional[str] = field( default=None, metadata={ - "help": "An optional input evaluation data file to evaluate the metrics (rouge) on " - "(a jsonlines or csv file)." + "help": ( + "An optional input evaluation data file to evaluate the metrics (rouge) on (a jsonlines or csv file)." + ) }, ) test_file: Optional[str] = field( default=None, metadata={ - "help": "An optional input test data file to evaluate the metrics (rouge) on " "(a jsonlines or csv file)." + "help": "An optional input test data file to evaluate the metrics (rouge) on (a jsonlines or csv file)." }, ) overwrite_cache: bool = field( @@ -143,60 +148,76 @@ class DataTrainingArguments: max_source_length: Optional[int] = field( default=1024, metadata={ - "help": "The maximum total input sequence length after tokenization. Sequences longer " - "than this will be truncated, sequences shorter will be padded." + "help": ( + "The maximum total input sequence length after tokenization. Sequences longer " + "than this will be truncated, sequences shorter will be padded." + ) }, ) max_target_length: Optional[int] = field( default=128, metadata={ - "help": "The maximum total sequence length for target text after tokenization. Sequences longer " - "than this will be truncated, sequences shorter will be padded." + "help": ( + "The maximum total sequence length for target text after tokenization. Sequences longer " + "than this will be truncated, sequences shorter will be padded." + ) }, ) val_max_target_length: Optional[int] = field( default=None, metadata={ - "help": "The maximum total sequence length for validation target text after tokenization. Sequences longer " - "than this will be truncated, sequences shorter will be padded. Will default to `max_target_length`." - "This argument is also used to override the ``max_length`` param of ``model.generate``, which is used " - "during ``evaluate`` and ``predict``." + "help": ( + "The maximum total sequence length for validation target text after tokenization. Sequences longer " + "than this will be truncated, sequences shorter will be padded. Will default to `max_target_length`." + "This argument is also used to override the ``max_length`` param of ``model.generate``, which is used " + "during ``evaluate`` and ``predict``." + ) }, ) pad_to_max_length: bool = field( default=False, metadata={ - "help": "Whether to pad all samples to model maximum sentence length. " - "If False, will pad the samples dynamically when batching to the maximum length in the batch. More " - "efficient on GPU but very bad for TPU." + "help": ( + "Whether to pad all samples to model maximum sentence length. " + "If False, will pad the samples dynamically when batching to the maximum length in the batch. More " + "efficient on GPU but very bad for TPU." + ) }, ) max_train_samples: Optional[int] = field( default=None, metadata={ - "help": "For debugging purposes or quicker training, truncate the number of training examples to this " - "value if set." + "help": ( + "For debugging purposes or quicker training, truncate the number of training examples to this " + "value if set." + ) }, ) max_eval_samples: Optional[int] = field( default=None, metadata={ - "help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this " - "value if set." + "help": ( + "For debugging purposes or quicker training, truncate the number of evaluation examples to this " + "value if set." + ) }, ) max_predict_samples: Optional[int] = field( default=None, metadata={ - "help": "For debugging purposes or quicker training, truncate the number of prediction examples to this " - "value if set." + "help": ( + "For debugging purposes or quicker training, truncate the number of prediction examples to this " + "value if set." + ) }, ) num_beams: Optional[int] = field( default=None, metadata={ - "help": "Number of beams to use for evaluation. This argument will be passed to ``model.generate``, " - "which is used during ``evaluate`` and ``predict``." + "help": ( + "Number of beams to use for evaluation. This argument will be passed to ``model.generate``, " + "which is used during ``evaluate`` and ``predict``." + ) }, ) ignore_pad_token_for_loss: bool = field( diff --git a/examples/research_projects/visual_bert/modeling_frcnn.py b/examples/research_projects/visual_bert/modeling_frcnn.py index 39a0c6aea8..33c1133e95 100644 --- a/examples/research_projects/visual_bert/modeling_frcnn.py +++ b/examples/research_projects/visual_bert/modeling_frcnn.py @@ -592,7 +592,7 @@ class Matcher(object): match_labels = matches.new_full(matches.size(), 1, dtype=torch.int8) - for (l, low, high) in zip(self.labels, self.thresholds[:-1], self.thresholds[1:]): + for l, low, high in zip(self.labels, self.thresholds[:-1], self.thresholds[1:]): low_high = (matched_vals >= low) & (matched_vals < high) match_labels[low_high] = l @@ -1037,9 +1037,9 @@ class ResNet(Backbone): curr_kwargs = {} for k, v in kwargs.items(): if k.endswith("_per_block"): - assert len(v) == num_blocks, ( - f"Argument '{k}' of make_stage should have the " f"same length as num_blocks={num_blocks}." - ) + assert ( + len(v) == num_blocks + ), f"Argument '{k}' of make_stage should have the same length as num_blocks={num_blocks}." newk = k[: -len("_per_block")] assert newk not in kwargs, f"Cannot call make_stage with both {k} and {newk}!" curr_kwargs[newk] = v[i] @@ -1401,7 +1401,7 @@ class AnchorGenerator(nn.Module): def grid_anchors(self, grid_sizes): anchors = [] - for (size, stride, base_anchors) in zip(grid_sizes, self.strides, self.cell_anchors): + for size, stride, base_anchors in zip(grid_sizes, self.strides, self.cell_anchors): shift_x, shift_y = _create_grid_offsets(size, stride, self.offset, base_anchors.device) shifts = torch.stack((shift_x, shift_y, shift_x, shift_y), dim=1) @@ -1708,10 +1708,9 @@ class GeneralizedRCNN(nn.Module): elif os.path.isfile(pretrained_model_name_or_path) or is_remote_url(pretrained_model_name_or_path): archive_file = pretrained_model_name_or_path elif os.path.isfile(pretrained_model_name_or_path + ".index"): - assert ( - from_tf - ), "We found a TensorFlow checkpoint at {}, please set from_tf to True to load from this checkpoint".format( - pretrained_model_name_or_path + ".index" + assert from_tf, ( + "We found a TensorFlow checkpoint at {}, please set from_tf to True to load from this checkpoint" + .format(pretrained_model_name_or_path + ".index") ) archive_file = pretrained_model_name_or_path + ".index" else: @@ -1797,26 +1796,28 @@ class GeneralizedRCNN(nn.Module): if len(unexpected_keys) > 0: print( - f"Some weights of the model checkpoint at {pretrained_model_name_or_path} were not used when " - f"initializing {model.__class__.__name__}: {unexpected_keys}\n" - f"- This IS expected if you are initializing {model.__class__.__name__} from the checkpoint of a model trained on another task " - f"or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n" - f"- This IS NOT expected if you are initializing {model.__class__.__name__} from the checkpoint of a model that you expect " - f"to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model)." + f"Some weights of the model checkpoint at {pretrained_model_name_or_path} were not used when" + f" initializing {model.__class__.__name__}: {unexpected_keys}\n- This IS expected if you are" + f" initializing {model.__class__.__name__} from the checkpoint of a model trained on another task or" + " with another architecture (e.g. initializing a BertForSequenceClassification model from a" + " BertForPreTraining model).\n- This IS NOT expected if you are initializing" + f" {model.__class__.__name__} from the checkpoint of a model that you expect to be exactly identical" + " (initializing a BertForSequenceClassification model from a BertForSequenceClassification model)." ) else: print(f"All model checkpoint weights were used when initializing {model.__class__.__name__}.\n") if len(missing_keys) > 0: print( - f"Some weights of {model.__class__.__name__} were not initialized from the model checkpoint at {pretrained_model_name_or_path} " - f"and are newly initialized: {missing_keys}\n" - f"You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference." + f"Some weights of {model.__class__.__name__} were not initialized from the model checkpoint at" + f" {pretrained_model_name_or_path} and are newly initialized: {missing_keys}\nYou should probably" + " TRAIN this model on a down-stream task to be able to use it for predictions and inference." ) else: print( - f"All the weights of {model.__class__.__name__} were initialized from the model checkpoint at {pretrained_model_name_or_path}.\n" - f"If your task is similar to the task the model of the checkpoint was trained on, " - f"you can already use {model.__class__.__name__} for predictions without further training." + f"All the weights of {model.__class__.__name__} were initialized from the model checkpoint at" + f" {pretrained_model_name_or_path}.\nIf your task is similar to the task the model of the checkpoint" + f" was trained on, you can already use {model.__class__.__name__} for predictions without further" + " training." ) if len(error_msgs) > 0: raise RuntimeError( diff --git a/examples/research_projects/visual_bert/utils.py b/examples/research_projects/visual_bert/utils.py index 59ae11d025..8e830fb835 100644 --- a/examples/research_projects/visual_bert/utils.py +++ b/examples/research_projects/visual_bert/utils.py @@ -231,9 +231,10 @@ def compare(in_tensor): n2 = out_tensor.numpy()[0] print(n1.shape, n1[0, 0, :5]) print(n2.shape, n2[0, 0, :5]) - assert np.allclose( - n1, n2, rtol=0.01, atol=0.1 - ), f"{sum([1 for x in np.isclose(n1, n2, rtol=0.01, atol=0.1).flatten() if x == False])/len(n1.flatten())*100:.4f} % element-wise mismatch" + assert np.allclose(n1, n2, rtol=0.01, atol=0.1), ( + f"{sum([1 for x in np.isclose(n1, n2, rtol=0.01, atol=0.1).flatten() if x == False])/len(n1.flatten())*100:.4f} %" + " element-wise mismatch" + ) raise Exception("tensors are all good") # Hugging face functions below diff --git a/examples/research_projects/wav2vec2/run_asr.py b/examples/research_projects/wav2vec2/run_asr.py index 9b031cca19..bb34e0a0c7 100755 --- a/examples/research_projects/wav2vec2/run_asr.py +++ b/examples/research_projects/wav2vec2/run_asr.py @@ -99,7 +99,9 @@ class DataTrainingArguments: validation_split_name: Optional[str] = field( default="validation", metadata={ - "help": "The name of the validation data set split to use (via the datasets library). Defaults to 'validation'" + "help": ( + "The name of the validation data set split to use (via the datasets library). Defaults to 'validation'" + ) }, ) target_text_column: Optional[str] = field( @@ -121,7 +123,10 @@ class DataTrainingArguments: orthography: Optional[str] = field( default="librispeech", metadata={ - "help": "Orthography used for normalization and tokenization: 'librispeech' (default), 'timit', or 'buckwalter'." + "help": ( + "Orthography used for normalization and tokenization: 'librispeech' (default), 'timit', or" + " 'buckwalter'." + ) }, ) overwrite_cache: bool = field( @@ -392,11 +397,13 @@ def main(): val_dataset = val_dataset.filter(filter_by_max_duration, remove_columns=["duration_in_seconds"]) if len(train_dataset) > old_train_size: logger.warning( - f"Filtered out {len(train_dataset) - old_train_size} train example(s) longer than {data_args.max_duration_in_seconds} second(s)." + f"Filtered out {len(train_dataset) - old_train_size} train example(s) longer than" + f" {data_args.max_duration_in_seconds} second(s)." ) if len(val_dataset) > old_val_size: logger.warning( - f"Filtered out {len(val_dataset) - old_val_size} validation example(s) longer than {data_args.max_duration_in_seconds} second(s)." + f"Filtered out {len(val_dataset) - old_val_size} validation example(s) longer than" + f" {data_args.max_duration_in_seconds} second(s)." ) logger.info(f"Split sizes: {len(train_dataset)} train and {len(val_dataset)} validation.") diff --git a/examples/research_projects/wav2vec2/run_common_voice.py b/examples/research_projects/wav2vec2/run_common_voice.py index 5825c1feb1..b8480d3c7d 100644 --- a/examples/research_projects/wav2vec2/run_common_voice.py +++ b/examples/research_projects/wav2vec2/run_common_voice.py @@ -79,9 +79,11 @@ class ModelArguments: mask_time_prob: Optional[float] = field( default=0.05, metadata={ - "help": "Propability of each feature vector along the time axis to be chosen as the start of the vector" - "span to be masked. Approximately ``mask_time_prob * sequence_length // mask_time_length`` feature" - "vectors will be masked along the time axis. This is only relevant if ``apply_spec_augment is True``." + "help": ( + "Propability of each feature vector along the time axis to be chosen as the start of the vector" + "span to be masked. Approximately ``mask_time_prob * sequence_length // mask_time_length`` feature" + "vectors will be masked along the time axis. This is only relevant if ``apply_spec_augment is True``." + ) }, ) layerdrop: Optional[float] = field(default=0.0, metadata={"help": "The LayerDrop probability."}) @@ -116,15 +118,19 @@ class DataTrainingArguments: max_train_samples: Optional[int] = field( default=None, metadata={ - "help": "For debugging purposes or quicker training, truncate the number of training examples to this " - "value if set." + "help": ( + "For debugging purposes or quicker training, truncate the number of training examples to this " + "value if set." + ) }, ) max_val_samples: Optional[int] = field( default=None, metadata={ - "help": "For debugging purposes or quicker training, truncate the number of validation examples to this " - "value if set." + "help": ( + "For debugging purposes or quicker training, truncate the number of validation examples to this " + "value if set." + ) }, ) chars_to_ignore: List[str] = list_field( diff --git a/examples/research_projects/wav2vec2/run_pretrain.py b/examples/research_projects/wav2vec2/run_pretrain.py index 248f32443f..cd35caaaa8 100755 --- a/examples/research_projects/wav2vec2/run_pretrain.py +++ b/examples/research_projects/wav2vec2/run_pretrain.py @@ -104,7 +104,9 @@ class DataTrainingArguments: validation_split_name: Optional[str] = field( default="validation", metadata={ - "help": "The name of the validation data set split to use (via the datasets library). Defaults to 'validation'" + "help": ( + "The name of the validation data set split to use (via the datasets library). Defaults to 'validation'" + ) }, ) speech_file_column: Optional[str] = field( @@ -369,7 +371,8 @@ def main(): if not config.do_stable_layer_norm or config.feat_extract_norm != "layer": raise ValueError( - "PreTraining is only supported for ``config.do_stable_layer_norm=True`` and ``config.feat_extract_norm='layer'" + "PreTraining is only supported for ``config.do_stable_layer_norm=True`` and" + " ``config.feat_extract_norm='layer'" ) model = Wav2Vec2ForPreTraining(config) diff --git a/examples/research_projects/xtreme-s/run_xtreme_s.py b/examples/research_projects/xtreme-s/run_xtreme_s.py index a186d4b7ce..972c6d5462 100644 --- a/examples/research_projects/xtreme-s/run_xtreme_s.py +++ b/examples/research_projects/xtreme-s/run_xtreme_s.py @@ -89,7 +89,7 @@ class ModelArguments: cache_dir: Optional[str] = field( default=None, metadata={ - "help": "Where do you want to store the pretrained models and datasets downloaded from " "huggingface.co" + "help": "Where do you want to store the pretrained models and datasets downloaded from huggingface.co" }, ) freeze_feature_encoder: bool = field( @@ -115,9 +115,11 @@ class ModelArguments: mask_time_prob: float = field( default=0.05, metadata={ - "help": "Probability of each feature vector along the time axis to be chosen as the start of the vector" - "span to be masked. Approximately ``mask_time_prob * sequence_length // mask_time_length`` feature" - "vectors will be masked along the time axis." + "help": ( + "Probability of each feature vector along the time axis to be chosen as the start of the vector" + "span to be masked. Approximately ``mask_time_prob * sequence_length // mask_time_length`` feature" + "vectors will be masked along the time axis." + ) }, ) mask_time_length: int = field( @@ -127,8 +129,11 @@ class ModelArguments: mask_feature_prob: float = field( default=0.0, metadata={ - "help": "Probability of each feature vector along the feature axis to be chosen as the start of the vector" - "span to be masked. Approximately ``mask_feature_prob * sequence_length // mask_feature_length`` feature bins will be masked along the time axis." + "help": ( + "Probability of each feature vector along the feature axis to be chosen as the start of the vectorspan" + " to be masked. Approximately ``mask_feature_prob * sequence_length // mask_feature_length`` feature" + " bins will be masked along the time axis." + ) }, ) mask_feature_length: int = field( @@ -162,8 +167,10 @@ class DataTrainingArguments: task: str = field( default=None, metadata={ - "help": "The task name of the benchmark to use (via the datasets library). Should be on of: " - "'fleurs-asr', 'mls', 'voxpopuli', 'covost2', 'minds14', 'fleurs-lang_id', 'babel'." + "help": ( + "The task name of the benchmark to use (via the datasets library). Should be on of: " + "'fleurs-asr', 'mls', 'voxpopuli', 'covost2', 'minds14', 'fleurs-lang_id', 'babel'." + ) }, ) language: str = field( @@ -173,10 +180,12 @@ class DataTrainingArguments: language_group: str = field( default=None, metadata={ - "help": "The language group to select a subset of languages to train on. " - "This option is only used the 'fleurs-asr' task. Should be one of: " - "'western_european_we', 'eastern_european_ee', 'central_asia_middle_north_african_cmn', " - "'sub_saharan_african_ssa', 'south_asian_sa', 'south_east_asian_sea', 'chinese_japanase_korean_cjk'." + "help": ( + "The language group to select a subset of languages to train on. " + "This option is only used the 'fleurs-asr' task. Should be one of: " + "'western_european_we', 'eastern_european_ee', 'central_asia_middle_north_african_cmn', " + "'sub_saharan_african_ssa', 'south_asian_sa', 'south_east_asian_sea', 'chinese_japanase_korean_cjk'." + ) }, ) train_split_name: str = field( @@ -188,14 +197,15 @@ class DataTrainingArguments: eval_split_name: str = field( default="validation", metadata={ - "help": "The name of the evaluation dataset split to use (via the datasets library). " - "Defaults to 'validation'" + "help": ( + "The name of the evaluation dataset split to use (via the datasets library). Defaults to 'validation'" + ) }, ) predict_split_name: str = field( default="test", metadata={ - "help": "The name of the prediction dataset split to use (via the datasets library). " "Defaults to 'test'" + "help": "The name of the prediction dataset split to use (via the datasets library). Defaults to 'test'" }, ) audio_column_name: str = field( @@ -205,8 +215,10 @@ class DataTrainingArguments: target_column_name: str = field( default=None, metadata={ - "help": "The name of the dataset column containing the target data " - "(transcription/translation/label). If None, the name will be inferred from the task. Defaults to None." + "help": ( + "The name of the dataset column containing the target data (transcription/translation/label). If None," + " the name will be inferred from the task. Defaults to None." + ) }, ) overwrite_cache: bool = field( @@ -219,22 +231,28 @@ class DataTrainingArguments: max_train_samples: Optional[int] = field( default=None, metadata={ - "help": "For debugging purposes or quicker training, truncate the number of training examples to this " - "value if set." + "help": ( + "For debugging purposes or quicker training, truncate the number of training examples to this " + "value if set." + ) }, ) max_eval_samples: Optional[int] = field( default=None, metadata={ - "help": "For debugging purposes or quicker training, truncate the number of validation examples to this " - "value if set." + "help": ( + "For debugging purposes or quicker training, truncate the number of validation examples to this " + "value if set." + ) }, ) max_predict_samples: Optional[int] = field( default=None, metadata={ - "help": "For debugging purposes or quicker training, truncate the number of prediction examples to this " - "value if set." + "help": ( + "For debugging purposes or quicker training, truncate the number of prediction examples to this " + "value if set." + ) }, ) chars_to_ignore: Optional[List[str]] = list_field( @@ -244,7 +262,10 @@ class DataTrainingArguments: max_duration_in_seconds: float = field( default=30.0, metadata={ - "help": "Filter audio files that are longer than `max_duration_in_seconds` seconds to 'max_duration_in_seconds`" + "help": ( + "Filter audio files that are longer than `max_duration_in_seconds` seconds to" + " 'max_duration_in_seconds`" + ) }, ) min_duration_in_seconds: float = field( @@ -253,17 +274,21 @@ class DataTrainingArguments: preprocessing_only: bool = field( default=False, metadata={ - "help": "Whether to only do data preprocessing and skip training. " - "This is especially useful when data preprocessing errors out in distributed training due to timeout. " - "In this case, one should run the preprocessing in a non-distributed setup with `preprocessing_only=True` " - "so that the cached datasets can consequently be loaded in distributed training" + "help": ( + "Whether to only do data preprocessing and skip training. This is especially useful when data" + " preprocessing errors out in distributed training due to timeout. In this case, one should run the" + " preprocessing in a non-distributed setup with `preprocessing_only=True` so that the cached datasets" + " can consequently be loaded in distributed training" + ) }, ) use_auth_token: bool = field( default=False, metadata={ - "help": "If :obj:`True`, will use the token generated when running" - ":obj:`transformers-cli login` as HTTP bearer authorization for remote files." + "help": ( + "If :obj:`True`, will use the token generated when running" + ":obj:`transformers-cli login` as HTTP bearer authorization for remote files." + ) }, ) unk_token: str = field( @@ -281,17 +306,21 @@ class DataTrainingArguments: phoneme_language: Optional[str] = field( default=None, metadata={ - "help": "The target language that should be used be" - " passed to the tokenizer for tokenization. Note that" - " this is only relevant if the model classifies the" - " input audio to a sequence of phoneme sequences." + "help": ( + "The target language that should be used be" + " passed to the tokenizer for tokenization. Note that" + " this is only relevant if the model classifies the" + " input audio to a sequence of phoneme sequences." + ) }, ) per_lang_metrics: bool = field( default=True, metadata={ - "help": "If `True`, compute the test metrics separately for each language, and average the results. " - "If `False` compute the average test metrics in a single pass for all languages at once." + "help": ( + "If `True`, compute the test metrics separately for each language, and average the results. " + "If `False` compute the average test metrics in a single pass for all languages at once." + ) }, ) @@ -446,7 +475,7 @@ def main(): if task_name is None: raise ValueError( - "Set --task should be set to '' " "(e.g. 'fleurs-asr', 'mls', 'covost2', 'minds14') " + "Set --task should be set to '' (e.g. 'fleurs-asr', 'mls', 'covost2', 'minds14') " ) if lang_id is None: raise ValueError( @@ -481,9 +510,9 @@ def main(): if data_args.audio_column_name not in raw_datasets["train"].column_names: raise ValueError( - f"--audio_column_name '{data_args.audio_column_name}' not found in dataset '{data_args.dataset_name}'. " - "Make sure to set `--audio_column_name` to the correct audio column - one of " - f"{', '.join(raw_datasets['train'].column_names)}." + f"--audio_column_name '{data_args.audio_column_name}' not found in dataset '{data_args.dataset_name}'." + " Make sure to set `--audio_column_name` to the correct audio column - one of" + f" {', '.join(raw_datasets['train'].column_names)}." ) if target_column_name not in raw_datasets["train"].column_names: @@ -903,7 +932,10 @@ def main(): "finetuned_from": model_args.model_name_or_path, "tasks": task_name, "tags": [task_name, data_args.dataset_name], - "dataset_args": f"Config: {config_name}, Training split: {data_args.train_split_name}, Eval split: {data_args.eval_split_name}, Predict split: {data_args.predict_split_name}", + "dataset_args": ( + f"Config: {config_name}, Training split: {data_args.train_split_name}, Eval split:" + f" {data_args.eval_split_name}, Predict split: {data_args.predict_split_name}" + ), "dataset": f"{data_args.dataset_name.upper()} - {config_name.upper()}", "language": data_args.language, } diff --git a/examples/tensorflow/language-modeling/run_clm.py b/examples/tensorflow/language-modeling/run_clm.py index 3598ad668a..5469f0c3f7 100755 --- a/examples/tensorflow/language-modeling/run_clm.py +++ b/examples/tensorflow/language-modeling/run_clm.py @@ -73,8 +73,9 @@ class ModelArguments: model_name_or_path: Optional[str] = field( default=None, metadata={ - "help": "The model checkpoint for weights initialization." - "Don't set if you want to train a model from scratch." + "help": ( + "The model checkpoint for weights initialization.Don't set if you want to train a model from scratch." + ) }, ) model_type: Optional[str] = field( @@ -84,8 +85,10 @@ class ModelArguments: config_overrides: Optional[str] = field( default=None, metadata={ - "help": "Override some existing default config settings when a model is trained from scratch. Example: " - "n_embd=10,resid_pdrop=0.2,scale_attn_weights=false,summary_type=cls_index" + "help": ( + "Override some existing default config settings when a model is trained from scratch. Example: " + "n_embd=10,resid_pdrop=0.2,scale_attn_weights=false,summary_type=cls_index" + ) }, ) config_name: Optional[str] = field( @@ -109,8 +112,10 @@ class ModelArguments: use_auth_token: bool = field( default=False, metadata={ - "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script " - "with private models)." + "help": ( + "Will use the token generated when running `transformers-cli login` (necessary to use this script " + "with private models)." + ) }, ) @@ -150,9 +155,11 @@ class DataTrainingArguments: block_size: Optional[int] = field( default=None, metadata={ - "help": "Optional input sequence length after tokenization. " - "The training dataset will be truncated in block of this size for training. " - "Default to the model max input length for single sentence inputs (take into account special tokens)." + "help": ( + "Optional input sequence length after tokenization. " + "The training dataset will be truncated in block of this size for training. " + "Default to the model max input length for single sentence inputs (take into account special tokens)." + ) }, ) preprocessing_num_workers: Optional[int] = field( @@ -166,15 +173,19 @@ class DataTrainingArguments: max_train_samples: Optional[int] = field( default=None, metadata={ - "help": "For debugging purposes or quicker training, truncate the number of training examples to this " - "value if set." + "help": ( + "For debugging purposes or quicker training, truncate the number of training examples to this " + "value if set." + ) }, ) max_eval_samples: Optional[int] = field( default=None, metadata={ - "help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this " - "value if set." + "help": ( + "For debugging purposes or quicker training, truncate the number of evaluation examples to this " + "value if set." + ) }, ) keep_linebreaks: bool = field( @@ -412,7 +423,8 @@ def main(): eval_dataset = lm_datasets["validation"] else: logger.info( - f"Validation file not found: using {data_args.validation_split_percentage}% of the dataset as validation as provided in data_args" + f"Validation file not found: using {data_args.validation_split_percentage}% of the dataset as validation" + " as provided in data_args" ) train_indices, val_indices = train_test_split( list(range(len(train_dataset))), test_size=data_args.validation_split_percentage / 100 diff --git a/examples/tensorflow/language-modeling/run_mlm.py b/examples/tensorflow/language-modeling/run_mlm.py index 8b32070b2d..5c0b124d45 100755 --- a/examples/tensorflow/language-modeling/run_mlm.py +++ b/examples/tensorflow/language-modeling/run_mlm.py @@ -74,8 +74,9 @@ class ModelArguments: model_name_or_path: Optional[str] = field( default=None, metadata={ - "help": "The model checkpoint for weights initialization." - "Don't set if you want to train a model from scratch." + "help": ( + "The model checkpoint for weights initialization.Don't set if you want to train a model from scratch." + ) }, ) model_type: Optional[str] = field( @@ -85,8 +86,10 @@ class ModelArguments: config_overrides: Optional[str] = field( default=None, metadata={ - "help": "Override some existing default config settings when a model is trained from scratch. Example: " - "n_embd=10,resid_pdrop=0.2,scale_attn_weights=false,summary_type=cls_index" + "help": ( + "Override some existing default config settings when a model is trained from scratch. Example: " + "n_embd=10,resid_pdrop=0.2,scale_attn_weights=false,summary_type=cls_index" + ) }, ) config_name: Optional[str] = field( @@ -110,8 +113,10 @@ class ModelArguments: use_auth_token: bool = field( default=False, metadata={ - "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script " - "with private models)." + "help": ( + "Will use the token generated when running `transformers-cli login` (necessary to use this script " + "with private models)." + ) }, ) @@ -151,8 +156,10 @@ class DataTrainingArguments: max_seq_length: Optional[int] = field( default=None, metadata={ - "help": "The maximum total input sequence length after tokenization. Sequences longer " - "than this will be truncated." + "help": ( + "The maximum total input sequence length after tokenization. Sequences longer " + "than this will be truncated." + ) }, ) preprocessing_num_workers: Optional[int] = field( @@ -169,22 +176,28 @@ class DataTrainingArguments: pad_to_max_length: bool = field( default=False, metadata={ - "help": "Whether to pad all samples to `max_seq_length`. " - "If False, will pad the samples dynamically when batching to the maximum length in the batch." + "help": ( + "Whether to pad all samples to `max_seq_length`. " + "If False, will pad the samples dynamically when batching to the maximum length in the batch." + ) }, ) max_train_samples: Optional[int] = field( default=None, metadata={ - "help": "For debugging purposes or quicker training, truncate the number of training examples to this " - "value if set." + "help": ( + "For debugging purposes or quicker training, truncate the number of training examples to this " + "value if set." + ) }, ) max_eval_samples: Optional[int] = field( default=None, metadata={ - "help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this " - "value if set." + "help": ( + "For debugging purposes or quicker training, truncate the number of evaluation examples to this " + "value if set." + ) }, ) @@ -456,7 +469,8 @@ def main(): eval_dataset = tokenized_datasets["validation"] else: logger.info( - f"Validation file not found: using {data_args.validation_split_percentage}% of the dataset as validation as provided in data_args" + f"Validation file not found: using {data_args.validation_split_percentage}% of the dataset as validation" + " as provided in data_args" ) train_indices, val_indices = train_test_split( list(range(len(train_dataset))), test_size=data_args.validation_split_percentage / 100 diff --git a/examples/tensorflow/multiple-choice/run_swag.py b/examples/tensorflow/multiple-choice/run_swag.py index a05b6eacf7..c06ac5a19b 100644 --- a/examples/tensorflow/multiple-choice/run_swag.py +++ b/examples/tensorflow/multiple-choice/run_swag.py @@ -156,8 +156,10 @@ class ModelArguments: use_auth_token: bool = field( default=False, metadata={ - "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script " - "with private models)." + "help": ( + "Will use the token generated when running `transformers-cli login` (necessary to use this script " + "with private models)." + ) }, ) @@ -183,30 +185,38 @@ class DataTrainingArguments: max_seq_length: Optional[int] = field( default=None, metadata={ - "help": "The maximum total input sequence length after tokenization. If passed, sequences longer " - "than this will be truncated, sequences shorter will be padded." + "help": ( + "The maximum total input sequence length after tokenization. If passed, sequences longer " + "than this will be truncated, sequences shorter will be padded." + ) }, ) pad_to_max_length: bool = field( default=False, metadata={ - "help": "Whether to pad all samples to the maximum sentence length. " - "If False, will pad the samples dynamically when batching to the maximum length in the batch. More " - "efficient on GPU but very bad for TPU." + "help": ( + "Whether to pad all samples to the maximum sentence length. " + "If False, will pad the samples dynamically when batching to the maximum length in the batch. More " + "efficient on GPU but very bad for TPU." + ) }, ) max_train_samples: Optional[int] = field( default=None, metadata={ - "help": "For debugging purposes or quicker training, truncate the number of training examples to this " - "value if set." + "help": ( + "For debugging purposes or quicker training, truncate the number of training examples to this " + "value if set." + ) }, ) max_eval_samples: Optional[int] = field( default=None, metadata={ - "help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this " - "value if set." + "help": ( + "For debugging purposes or quicker training, truncate the number of evaluation examples to this " + "value if set." + ) }, ) diff --git a/examples/tensorflow/question-answering/run_qa.py b/examples/tensorflow/question-answering/run_qa.py index 781f7a6911..eb91ec2538 100755 --- a/examples/tensorflow/question-answering/run_qa.py +++ b/examples/tensorflow/question-answering/run_qa.py @@ -78,8 +78,10 @@ class ModelArguments: use_auth_token: bool = field( default=False, metadata={ - "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script " - "with private models)." + "help": ( + "Will use the token generated when running `transformers-cli login` (necessary to use this script " + "with private models)." + ) }, ) @@ -115,37 +117,46 @@ class DataTrainingArguments: max_seq_length: int = field( default=384, metadata={ - "help": "The maximum total input sequence length after tokenization. Sequences longer " - "than this will be truncated, sequences shorter will be padded." + "help": ( + "The maximum total input sequence length after tokenization. Sequences longer " + "than this will be truncated, sequences shorter will be padded." + ) }, ) pad_to_max_length: bool = field( default=False, metadata={ - "help": "Whether to pad all samples to `max_seq_length`. " - "If False, will pad the samples dynamically when batching to the maximum length in the batch (which can " - "be faster on GPU but will be slower on TPU)." + "help": ( + "Whether to pad all samples to `max_seq_length`. If False, will pad the samples dynamically when" + " batching to the maximum length in the batch (which can be faster on GPU but will be slower on TPU)." + ) }, ) max_train_samples: Optional[int] = field( default=None, metadata={ - "help": "For debugging purposes or quicker training, truncate the number of training examples to this " - "value if set." + "help": ( + "For debugging purposes or quicker training, truncate the number of training examples to this " + "value if set." + ) }, ) max_eval_samples: Optional[int] = field( default=None, metadata={ - "help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this " - "value if set." + "help": ( + "For debugging purposes or quicker training, truncate the number of evaluation examples to this " + "value if set." + ) }, ) max_predict_samples: Optional[int] = field( default=None, metadata={ - "help": "For debugging purposes or quicker training, truncate the number of prediction examples to this " - "value if set." + "help": ( + "For debugging purposes or quicker training, truncate the number of prediction examples to this " + "value if set." + ) }, ) version_2_with_negative: bool = field( @@ -154,9 +165,11 @@ class DataTrainingArguments: null_score_diff_threshold: float = field( default=0.0, metadata={ - "help": "The threshold used to select the null answer: if the best answer has a score that is less than " - "the score of the null answer minus this threshold, the null answer is selected for this example. " - "Only useful when `version_2_with_negative=True`." + "help": ( + "The threshold used to select the null answer: if the best answer has a score that is less than " + "the score of the null answer minus this threshold, the null answer is selected for this example. " + "Only useful when `version_2_with_negative=True`." + ) }, ) doc_stride: int = field( @@ -170,8 +183,10 @@ class DataTrainingArguments: max_answer_length: int = field( default=30, metadata={ - "help": "The maximum length of an answer that can be generated. This is needed because the start " - "and end predictions are not conditioned on one another." + "help": ( + "The maximum length of an answer that can be generated. This is needed because the start " + "and end predictions are not conditioned on one another." + ) }, ) @@ -330,9 +345,9 @@ def main(): # region Tokenizer check: this script requires a fast tokenizer. if not isinstance(tokenizer, PreTrainedTokenizerFast): raise ValueError( - "This example script only works for models that have a fast tokenizer. Checkout the big table of models " - "at https://huggingface.co/transformers/index.html#supported-frameworks to find the model types that meet this " - "requirement" + "This example script only works for models that have a fast tokenizer. Checkout the big table of models at" + " https://huggingface.co/transformers/index.html#supported-frameworks to find the model types that meet" + " this requirement" ) # endregion diff --git a/examples/tensorflow/summarization/run_summarization.py b/examples/tensorflow/summarization/run_summarization.py index 10a0b37223..aa750837ab 100644 --- a/examples/tensorflow/summarization/run_summarization.py +++ b/examples/tensorflow/summarization/run_summarization.py @@ -99,8 +99,10 @@ class ModelArguments: use_auth_token: bool = field( default=False, metadata={ - "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script " - "with private models)." + "help": ( + "Will use the token generated when running `transformers-cli login` (necessary to use this script " + "with private models)." + ) }, ) @@ -131,14 +133,15 @@ class DataTrainingArguments: validation_file: Optional[str] = field( default=None, metadata={ - "help": "An optional input evaluation data file to evaluate the metrics (rouge) on " - "(a jsonlines or csv file)." + "help": ( + "An optional input evaluation data file to evaluate the metrics (rouge) on (a jsonlines or csv file)." + ) }, ) test_file: Optional[str] = field( default=None, metadata={ - "help": "An optional input test data file to evaluate the metrics (rouge) on " "(a jsonlines or csv file)." + "help": "An optional input test data file to evaluate the metrics (rouge) on (a jsonlines or csv file)." }, ) overwrite_cache: bool = field( @@ -151,60 +154,76 @@ class DataTrainingArguments: max_source_length: Optional[int] = field( default=1024, metadata={ - "help": "The maximum total input sequence length after tokenization. Sequences longer " - "than this will be truncated, sequences shorter will be padded." + "help": ( + "The maximum total input sequence length after tokenization. Sequences longer " + "than this will be truncated, sequences shorter will be padded." + ) }, ) max_target_length: Optional[int] = field( default=128, metadata={ - "help": "The maximum total sequence length for target text after tokenization. Sequences longer " - "than this will be truncated, sequences shorter will be padded." + "help": ( + "The maximum total sequence length for target text after tokenization. Sequences longer " + "than this will be truncated, sequences shorter will be padded." + ) }, ) val_max_target_length: Optional[int] = field( default=None, metadata={ - "help": "The maximum total sequence length for validation target text after tokenization. Sequences longer " - "than this will be truncated, sequences shorter will be padded. Will default to `max_target_length`." - "This argument is also used to override the ``max_length`` param of ``model.generate``, which is used " - "during ``evaluate`` and ``predict``." + "help": ( + "The maximum total sequence length for validation target text after tokenization. Sequences longer " + "than this will be truncated, sequences shorter will be padded. Will default to `max_target_length`." + "This argument is also used to override the ``max_length`` param of ``model.generate``, which is used " + "during ``evaluate`` and ``predict``." + ) }, ) pad_to_max_length: bool = field( default=False, metadata={ - "help": "Whether to pad all samples to model maximum sentence length. " - "If False, will pad the samples dynamically when batching to the maximum length in the batch. More " - "efficient on GPU but very bad for TPU." + "help": ( + "Whether to pad all samples to model maximum sentence length. " + "If False, will pad the samples dynamically when batching to the maximum length in the batch. More " + "efficient on GPU but very bad for TPU." + ) }, ) max_train_samples: Optional[int] = field( default=None, metadata={ - "help": "For debugging purposes or quicker training, truncate the number of training examples to this " - "value if set." + "help": ( + "For debugging purposes or quicker training, truncate the number of training examples to this " + "value if set." + ) }, ) max_eval_samples: Optional[int] = field( default=None, metadata={ - "help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this " - "value if set." + "help": ( + "For debugging purposes or quicker training, truncate the number of evaluation examples to this " + "value if set." + ) }, ) max_predict_samples: Optional[int] = field( default=None, metadata={ - "help": "For debugging purposes or quicker training, truncate the number of prediction examples to this " - "value if set." + "help": ( + "For debugging purposes or quicker training, truncate the number of prediction examples to this " + "value if set." + ) }, ) num_beams: Optional[int] = field( default=None, metadata={ - "help": "Number of beams to use for evaluation. This argument will be passed to ``model.generate``, " - "which is used during ``evaluate`` and ``predict``." + "help": ( + "Number of beams to use for evaluation. This argument will be passed to ``model.generate``, " + "which is used during ``evaluate`` and ``predict``." + ) }, ) ignore_pad_token_for_loss: bool = field( diff --git a/examples/tensorflow/text-classification/run_glue.py b/examples/tensorflow/text-classification/run_glue.py index 18a6336aa9..9d11e787a5 100644 --- a/examples/tensorflow/text-classification/run_glue.py +++ b/examples/tensorflow/text-classification/run_glue.py @@ -99,8 +99,10 @@ class DataTrainingArguments: max_seq_length: int = field( default=128, metadata={ - "help": "The maximum total input sequence length after tokenization. Sequences longer " - "than this will be truncated, sequences shorter will be padded." + "help": ( + "The maximum total input sequence length after tokenization. Sequences longer " + "than this will be truncated, sequences shorter will be padded." + ) }, ) overwrite_cache: bool = field( @@ -109,29 +111,37 @@ class DataTrainingArguments: pad_to_max_length: bool = field( default=False, metadata={ - "help": "Whether to pad all samples to `max_seq_length`. " - "If False, will pad the samples dynamically when batching to the maximum length in the batch." + "help": ( + "Whether to pad all samples to `max_seq_length`. " + "If False, will pad the samples dynamically when batching to the maximum length in the batch." + ) }, ) max_train_samples: Optional[int] = field( default=None, metadata={ - "help": "For debugging purposes or quicker training, truncate the number of training examples to this " - "value if set." + "help": ( + "For debugging purposes or quicker training, truncate the number of training examples to this " + "value if set." + ) }, ) max_eval_samples: Optional[int] = field( default=None, metadata={ - "help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this " - "value if set." + "help": ( + "For debugging purposes or quicker training, truncate the number of evaluation examples to this " + "value if set." + ) }, ) max_predict_samples: Optional[int] = field( default=None, metadata={ - "help": "For debugging purposes or quicker training, truncate the number of prediction examples to this " - "value if set." + "help": ( + "For debugging purposes or quicker training, truncate the number of prediction examples to this " + "value if set." + ) }, ) @@ -171,8 +181,10 @@ class ModelArguments: use_auth_token: bool = field( default=False, metadata={ - "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script " - "with private models)." + "help": ( + "Will use the token generated when running `transformers-cli login` (necessary to use this script " + "with private models)." + ) }, ) diff --git a/examples/tensorflow/text-classification/run_text_classification.py b/examples/tensorflow/text-classification/run_text_classification.py index 3f3d64b623..b2948324f5 100644 --- a/examples/tensorflow/text-classification/run_text_classification.py +++ b/examples/tensorflow/text-classification/run_text_classification.py @@ -85,8 +85,10 @@ class DataTrainingArguments: max_seq_length: int = field( default=128, metadata={ - "help": "The maximum total input sequence length after tokenization. Sequences longer " - "than this will be truncated, sequences shorter will be padded." + "help": ( + "The maximum total input sequence length after tokenization. Sequences longer " + "than this will be truncated, sequences shorter will be padded." + ) }, ) overwrite_cache: bool = field( @@ -95,30 +97,38 @@ class DataTrainingArguments: pad_to_max_length: bool = field( default=False, metadata={ - "help": "Whether to pad all samples to `max_seq_length`. " - "If False, will pad the samples dynamically when batching to the maximum length in the batch." - "Data will always be padded when using TPUs." + "help": ( + "Whether to pad all samples to `max_seq_length`. " + "If False, will pad the samples dynamically when batching to the maximum length in the batch." + "Data will always be padded when using TPUs." + ) }, ) max_train_samples: Optional[int] = field( default=None, metadata={ - "help": "For debugging purposes or quicker training, truncate the number of training examples to this " - "value if set." + "help": ( + "For debugging purposes or quicker training, truncate the number of training examples to this " + "value if set." + ) }, ) max_val_samples: Optional[int] = field( default=None, metadata={ - "help": "For debugging purposes or quicker training, truncate the number of validation examples to this " - "value if set." + "help": ( + "For debugging purposes or quicker training, truncate the number of validation examples to this " + "value if set." + ) }, ) max_test_samples: Optional[int] = field( default=None, metadata={ - "help": "For debugging purposes or quicker training, truncate the number of test examples to this " - "value if set." + "help": ( + "For debugging purposes or quicker training, truncate the number of test examples to this " + "value if set." + ) }, ) @@ -162,8 +172,10 @@ class ModelArguments: use_auth_token: bool = field( default=False, metadata={ - "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script " - "with private models)." + "help": ( + "Will use the token generated when running `transformers-cli login` (necessary to use this script " + "with private models)." + ) }, ) @@ -330,8 +342,8 @@ def main(): else: logger.warning( "Your model seems to have been trained with labels, but they don't match the dataset: ", - f"model labels: {list(sorted(label_name_to_id.keys()))}, dataset labels: {list(sorted(label_list))}." - "\nIgnoring the model labels as a result.", + f"model labels: {list(sorted(label_name_to_id.keys()))}, dataset labels:" + f" {list(sorted(label_list))}.\nIgnoring the model labels as a result.", ) label_to_id = {v: i for i, v in enumerate(label_list)} elif not is_regression: diff --git a/examples/tensorflow/token-classification/run_ner.py b/examples/tensorflow/token-classification/run_ner.py index e580ed94b0..d9feb413d0 100644 --- a/examples/tensorflow/token-classification/run_ner.py +++ b/examples/tensorflow/token-classification/run_ner.py @@ -80,8 +80,10 @@ class ModelArguments: use_auth_token: bool = field( default=False, metadata={ - "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script " - "with private models)." + "help": ( + "Will use the token generated when running `transformers-cli login` (necessary to use this script " + "with private models)." + ) }, ) @@ -127,37 +129,47 @@ class DataTrainingArguments: pad_to_max_length: bool = field( default=False, metadata={ - "help": "Whether to pad all samples to model maximum sentence length. " - "If False, will pad the samples dynamically when batching to the maximum length in the batch. More " - "efficient on GPU but very bad for TPU." + "help": ( + "Whether to pad all samples to model maximum sentence length. " + "If False, will pad the samples dynamically when batching to the maximum length in the batch. More " + "efficient on GPU but very bad for TPU." + ) }, ) max_train_samples: Optional[int] = field( default=None, metadata={ - "help": "For debugging purposes or quicker training, truncate the number of training examples to this " - "value if set." + "help": ( + "For debugging purposes or quicker training, truncate the number of training examples to this " + "value if set." + ) }, ) max_eval_samples: Optional[int] = field( default=None, metadata={ - "help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this " - "value if set." + "help": ( + "For debugging purposes or quicker training, truncate the number of evaluation examples to this " + "value if set." + ) }, ) max_predict_samples: Optional[int] = field( default=None, metadata={ - "help": "For debugging purposes or quicker training, truncate the number of prediction examples to this " - "value if set." + "help": ( + "For debugging purposes or quicker training, truncate the number of prediction examples to this " + "value if set." + ) }, ) label_all_tokens: bool = field( default=False, metadata={ - "help": "Whether to put the label for one word on all tokens of generated by that word or just on the " - "one (in which case the other tokens will have a padding index)." + "help": ( + "Whether to put the label for one word on all tokens of generated by that word or just on the " + "one (in which case the other tokens will have a padding index)." + ) }, ) return_entity_level_metrics: bool = field( diff --git a/examples/tensorflow/translation/run_translation.py b/examples/tensorflow/translation/run_translation.py index 6931c32fba..4ed7c621e5 100644 --- a/examples/tensorflow/translation/run_translation.py +++ b/examples/tensorflow/translation/run_translation.py @@ -93,8 +93,10 @@ class ModelArguments: use_auth_token: bool = field( default=False, metadata={ - "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script " - "with private models)." + "help": ( + "Will use the token generated when running `transformers-cli login` (necessary to use this script " + "with private models)." + ) }, ) @@ -119,14 +121,15 @@ class DataTrainingArguments: validation_file: Optional[str] = field( default=None, metadata={ - "help": "An optional input evaluation data file to evaluate the metrics (rouge) on " - "(a jsonlines or csv file)." + "help": ( + "An optional input evaluation data file to evaluate the metrics (rouge) on (a jsonlines or csv file)." + ) }, ) test_file: Optional[str] = field( default=None, metadata={ - "help": "An optional input test data file to evaluate the metrics (rouge) on " "(a jsonlines or csv file)." + "help": "An optional input test data file to evaluate the metrics (rouge) on (a jsonlines or csv file)." }, ) overwrite_cache: bool = field( @@ -139,60 +142,76 @@ class DataTrainingArguments: max_source_length: Optional[int] = field( default=1024, metadata={ - "help": "The maximum total input sequence length after tokenization. Sequences longer " - "than this will be truncated, sequences shorter will be padded." + "help": ( + "The maximum total input sequence length after tokenization. Sequences longer " + "than this will be truncated, sequences shorter will be padded." + ) }, ) max_target_length: Optional[int] = field( default=128, metadata={ - "help": "The maximum total sequence length for target text after tokenization. Sequences longer " - "than this will be truncated, sequences shorter will be padded." + "help": ( + "The maximum total sequence length for target text after tokenization. Sequences longer " + "than this will be truncated, sequences shorter will be padded." + ) }, ) val_max_target_length: Optional[int] = field( default=None, metadata={ - "help": "The maximum total sequence length for validation target text after tokenization. Sequences longer " - "than this will be truncated, sequences shorter will be padded. Will default to `max_target_length`." - "This argument is also used to override the ``max_length`` param of ``model.generate``, which is used " - "during ``evaluate`` and ``predict``." + "help": ( + "The maximum total sequence length for validation target text after tokenization. Sequences longer " + "than this will be truncated, sequences shorter will be padded. Will default to `max_target_length`." + "This argument is also used to override the ``max_length`` param of ``model.generate``, which is used " + "during ``evaluate`` and ``predict``." + ) }, ) pad_to_max_length: bool = field( default=False, metadata={ - "help": "Whether to pad all samples to model maximum sentence length. " - "If False, will pad the samples dynamically when batching to the maximum length in the batch. More " - "efficient on GPU but very bad for TPU." + "help": ( + "Whether to pad all samples to model maximum sentence length. " + "If False, will pad the samples dynamically when batching to the maximum length in the batch. More " + "efficient on GPU but very bad for TPU." + ) }, ) max_train_samples: Optional[int] = field( default=None, metadata={ - "help": "For debugging purposes or quicker training, truncate the number of training examples to this " - "value if set." + "help": ( + "For debugging purposes or quicker training, truncate the number of training examples to this " + "value if set." + ) }, ) max_eval_samples: Optional[int] = field( default=None, metadata={ - "help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this " - "value if set." + "help": ( + "For debugging purposes or quicker training, truncate the number of evaluation examples to this " + "value if set." + ) }, ) max_predict_samples: Optional[int] = field( default=None, metadata={ - "help": "For debugging purposes or quicker training, truncate the number of prediction examples to this " - "value if set." + "help": ( + "For debugging purposes or quicker training, truncate the number of prediction examples to this " + "value if set." + ) }, ) num_beams: Optional[int] = field( default=None, metadata={ - "help": "Number of beams to use for evaluation. This argument will be passed to ``model.generate``, " - "which is used during ``evaluate`` and ``predict``." + "help": ( + "Number of beams to use for evaluation. This argument will be passed to ``model.generate``, " + "which is used during ``evaluate`` and ``predict``." + ) }, ) ignore_pad_token_for_loss: bool = field( diff --git a/setup.py b/setup.py index eed4a74e6c..a7e11799f5 100644 --- a/setup.py +++ b/setup.py @@ -98,7 +98,7 @@ if stale_egg_info.exists(): _deps = [ "Pillow", "accelerate>=0.7.1", - "black~=22.0", + "black~=22.0,>=22.3", "codecarbon==1.2.0", "cookiecutter==1.7.3", "dataclasses", diff --git a/src/transformers/benchmark/benchmark.py b/src/transformers/benchmark/benchmark.py index 8569c6e324..7f95e4b40b 100644 --- a/src/transformers/benchmark/benchmark.py +++ b/src/transformers/benchmark/benchmark.py @@ -96,7 +96,8 @@ class PyTorchBenchmark(Benchmark): model = model_cls(config) except ImportError: raise ImportError( - f"{model_class} does not exist. If you just want to test the pretrained model, you might want to set `--only_pretrain_model` or `args.only_pretrain_model=True`." + f"{model_class} does not exist. If you just want to test the pretrained model, you might want to" + " set `--only_pretrain_model` or `args.only_pretrain_model=True`." ) else: model = MODEL_MAPPING[config.__class__](config) @@ -151,7 +152,8 @@ class PyTorchBenchmark(Benchmark): model = model_cls(config) except ImportError: raise ImportError( - f"{model_class} does not exist. If you just want to test the pretrained model, you might want to set `--only_pretrain_model` or `args.only_pretrain_model=True`." + f"{model_class} does not exist. If you just want to test the pretrained model, you might want to" + " set `--only_pretrain_model` or `args.only_pretrain_model=True`." ) else: model = MODEL_WITH_LM_HEAD_MAPPING[config.__class__](config) @@ -230,7 +232,8 @@ class PyTorchBenchmark(Benchmark): if self.args.is_tpu: # tpu raise NotImplementedError( - "Memory Benchmarking is currently not implemented for TPU. Please disable memory benchmarking with `--no-memory` or `args.memory=False`" + "Memory Benchmarking is currently not implemented for TPU. Please disable memory benchmarking with" + " `--no-memory` or `args.memory=False`" ) elif self.args.is_gpu: if not is_py3nvml_available(): @@ -241,7 +244,8 @@ class PyTorchBenchmark(Benchmark): memory = "N/A" else: logger.info( - "Measuring total GPU usage on GPU device. Make sure to not have additional processes running on the same GPU." + "Measuring total GPU usage on GPU device. Make sure to not have additional processes running" + " on the same GPU." ) # init nvml nvml.nvmlInit() diff --git a/src/transformers/benchmark/benchmark_args.py b/src/transformers/benchmark/benchmark_args.py index dbdf9d8a36..57af2481ef 100644 --- a/src/transformers/benchmark/benchmark_args.py +++ b/src/transformers/benchmark/benchmark_args.py @@ -54,7 +54,8 @@ class PyTorchBenchmarkArguments(BenchmarkArguments): positive_arg = deprecated_arg[3:] setattr(self, positive_arg, not kwargs.pop(deprecated_arg)) logger.warning( - f"{deprecated_arg} is depreciated. Please use --no_{positive_arg} or {positive_arg}={kwargs[positive_arg]}" + f"{deprecated_arg} is depreciated. Please use --no_{positive_arg} or" + f" {positive_arg}={kwargs[positive_arg]}" ) self.torchscript = kwargs.pop("torchscript", self.torchscript) diff --git a/src/transformers/benchmark/benchmark_args_tf.py b/src/transformers/benchmark/benchmark_args_tf.py index 7ec5054cb3..8f3a9cea94 100644 --- a/src/transformers/benchmark/benchmark_args_tf.py +++ b/src/transformers/benchmark/benchmark_args_tf.py @@ -51,7 +51,8 @@ class TensorFlowBenchmarkArguments(BenchmarkArguments): positive_arg = deprecated_arg[3:] kwargs[positive_arg] = not kwargs.pop(deprecated_arg) logger.warning( - f"{deprecated_arg} is depreciated. Please use --no-{positive_arg} or {positive_arg}={kwargs[positive_arg]}" + f"{deprecated_arg} is depreciated. Please use --no-{positive_arg} or" + f" {positive_arg}={kwargs[positive_arg]}" ) self.tpu_name = kwargs.pop("tpu_name", self.tpu_name) self.device_idx = kwargs.pop("device_idx", self.device_idx) diff --git a/src/transformers/benchmark/benchmark_args_utils.py b/src/transformers/benchmark/benchmark_args_utils.py index b2f76f809f..d9233906d2 100644 --- a/src/transformers/benchmark/benchmark_args_utils.py +++ b/src/transformers/benchmark/benchmark_args_utils.py @@ -43,7 +43,10 @@ class BenchmarkArguments: models: List[str] = list_field( default=[], metadata={ - "help": "Model checkpoints to be provided to the AutoModel classes. Leave blank to benchmark the base version of all available models" + "help": ( + "Model checkpoints to be provided to the AutoModel classes. Leave blank to benchmark the base version" + " of all available models" + ) }, ) @@ -87,7 +90,11 @@ class BenchmarkArguments: multi_process: bool = field( default=True, metadata={ - "help": "Whether to use multiprocessing for memory and speed measurement. It is highly recommended to use multiprocessing for accurate CPU and GPU memory measurements. This option should only be disabled for debugging / testing and on TPU." + "help": ( + "Whether to use multiprocessing for memory and speed measurement. It is highly recommended to use" + " multiprocessing for accurate CPU and GPU memory measurements. This option should only be disabled" + " for debugging / testing and on TPU." + ) }, ) inference_time_csv_file: str = field( @@ -118,7 +125,10 @@ class BenchmarkArguments: only_pretrain_model: bool = field( default=False, metadata={ - "help": "Instead of loading the model as defined in `config.architectures` if exists, just load the pretrain model weights." + "help": ( + "Instead of loading the model as defined in `config.architectures` if exists, just load the pretrain" + " model weights." + ) }, ) @@ -138,9 +148,10 @@ class BenchmarkArguments: @property def model_names(self): - assert ( - len(self.models) > 0 - ), "Please make sure you provide at least one model name / model identifier, *e.g.* `--models bert-base-cased` or `args.models = ['bert-base-cased']." + assert len(self.models) > 0, ( + "Please make sure you provide at least one model name / model identifier, *e.g.* `--models" + " bert-base-cased` or `args.models = ['bert-base-cased']." + ) return self.models @property diff --git a/src/transformers/benchmark/benchmark_tf.py b/src/transformers/benchmark/benchmark_tf.py index 0eb0db64a8..b5fd4b71b5 100644 --- a/src/transformers/benchmark/benchmark_tf.py +++ b/src/transformers/benchmark/benchmark_tf.py @@ -140,7 +140,8 @@ class TensorFlowBenchmark(Benchmark): model = model_cls(config) except ImportError: raise ImportError( - f"{model_class} does not exist. If you just want to test the pretrained model, you might want to set `--only_pretrain_model` or `args.only_pretrain_model=True`." + f"{model_class} does not exist. If you just want to test the pretrained model, you might want to" + " set `--only_pretrain_model` or `args.only_pretrain_model=True`." ) else: model = TF_MODEL_MAPPING[config.__class__](config) @@ -184,7 +185,8 @@ class TensorFlowBenchmark(Benchmark): model = model_cls(config) except ImportError: raise ImportError( - f"{model_class} does not exist. If you just want to test the pretrained model, you might want to set `--only_pretrain_model` or `args.only_pretrain_model=True`." + f"{model_class} does not exist. If you just want to test the pretrained model, you might want to" + " set `--only_pretrain_model` or `args.only_pretrain_model=True`." ) else: model = TF_MODEL_WITH_LM_HEAD_MAPPING[config.__class__](config) @@ -239,15 +241,17 @@ class TensorFlowBenchmark(Benchmark): with self.args.strategy.scope(): try: if self.args.trace_memory_line_by_line: - assert ( - self.args.eager_mode - ), "`args.eager_mode` is set to `False`. Make sure to run model in eager mode to measure memory consumption line by line." + assert self.args.eager_mode, ( + "`args.eager_mode` is set to `False`. Make sure to run model in eager mode to measure memory" + " consumption line by line." + ) trace = start_memory_tracing("transformers") if self.args.is_tpu: # tpu raise NotImplementedError( - "Memory Benchmarking is currently not implemented for TPU. Please disable memory benchmarking with `args.memory=False`" + "Memory Benchmarking is currently not implemented for TPU. Please disable memory benchmarking" + " with `args.memory=False`" ) elif self.args.is_gpu: # gpu @@ -259,7 +263,8 @@ class TensorFlowBenchmark(Benchmark): memory = "N/A" else: logger.info( - "Measuring total GPU usage on GPU device. Make sure to not have additional processes running on the same GPU." + "Measuring total GPU usage on GPU device. Make sure to not have additional processes" + " running on the same GPU." ) # init nvml nvml.nvmlInit() @@ -274,7 +279,8 @@ class TensorFlowBenchmark(Benchmark): # cpu if self.args.trace_memory_line_by_line: logger.info( - "When enabling line by line tracing, the max peak memory for CPU is inaccurate in TensorFlow." + "When enabling line by line tracing, the max peak memory for CPU is inaccurate in" + " TensorFlow." ) memory = None else: diff --git a/src/transformers/benchmark/benchmark_utils.py b/src/transformers/benchmark/benchmark_utils.py index 7e738bb601..36fe5eb116 100644 --- a/src/transformers/benchmark/benchmark_utils.py +++ b/src/transformers/benchmark/benchmark_utils.py @@ -379,7 +379,7 @@ def start_memory_tracing( devices = list(range(nvml.nvmlDeviceGetCount())) if gpus_to_trace is None else gpus_to_trace nvml.nvmlShutdown() except (OSError, nvml.NVMLError): - logger.warning("Error while initializing communication with GPU. " "We won't perform GPU memory tracing.") + logger.warning("Error while initializing communication with GPU. We won't perform GPU memory tracing.") log_gpu = False else: log_gpu = is_torch_available() or is_tf_available() @@ -626,7 +626,8 @@ class Benchmark(ABC): if self.args.memory and os.getenv("TRANSFORMERS_USE_MULTIPROCESSING") == 0: logger.warning( - "Memory consumption will not be measured accurately if `args.multi_process` is set to `False.` The flag 'TRANSFORMERS_USE_MULTIPROCESSING' should only be disabled for debugging / testing." + "Memory consumption will not be measured accurately if `args.multi_process` is set to `False.` The" + " flag 'TRANSFORMERS_USE_MULTIPROCESSING' should only be disabled for debugging / testing." ) self._print_fn = None @@ -732,7 +733,8 @@ class Benchmark(ABC): self.save_to_csv(inference_result_time, self.args.inference_time_csv_file) if self.args.is_tpu: self.print_fn( - "TPU was used for inference. Note that the time after compilation stabilized (after ~10 inferences model.forward(..) calls) was measured." + "TPU was used for inference. Note that the time after compilation stabilized (after ~10" + " inferences model.forward(..) calls) was measured." ) if self.args.memory: @@ -751,7 +753,8 @@ class Benchmark(ABC): self.save_to_csv(train_result_time, self.args.train_time_csv_file) if self.args.is_tpu: self.print_fn( - "TPU was used for training. Note that the time after compilation stabilized (after ~10 train loss=model.forward(...) + loss.backward() calls) was measured." + "TPU was used for training. Note that the time after compilation stabilized (after ~10 train" + " loss=model.forward(...) + loss.backward() calls) was measured." ) if self.args.memory: diff --git a/src/transformers/commands/add_new_model_like.py b/src/transformers/commands/add_new_model_like.py index 915df4da0f..c0a9e73c87 100644 --- a/src/transformers/commands/add_new_model_like.py +++ b/src/transformers/commands/add_new_model_like.py @@ -1255,8 +1255,8 @@ def create_new_model_like( if disabled_fx_test: print( - "The tests for symbolic tracing with torch.fx were disabled, you can add those once symbolic tracing works " - "for your new model." + "The tests for symbolic tracing with torch.fx were disabled, you can add those once symbolic tracing works" + " for your new model." ) # 4. Add model to auto classes @@ -1532,7 +1532,8 @@ def get_user_input(): ) all_frameworks = get_user_field( - f"Should we add a version of your new model in all the frameworks implemented by {old_model_type} ({old_frameworks})?", + "Should we add a version of your new model in all the frameworks implemented by" + f" {old_model_type} ({old_frameworks})?", convert_to=convert_to_bool, default_value="yes", fallback_message="Please answer yes/no, y/n, true/false or 1/0.", diff --git a/src/transformers/commands/convert.py b/src/transformers/commands/convert.py index cce06aabc3..8c3e37bfcf 100644 --- a/src/transformers/commands/convert.py +++ b/src/transformers/commands/convert.py @@ -46,8 +46,7 @@ class ConvertCommand(BaseTransformersCLICommand): """ train_parser = parser.add_parser( "convert", - help="CLI tool to run convert model from original " - "author checkpoints to Transformers PyTorch checkpoints.", + help="CLI tool to run convert model from original author checkpoints to Transformers PyTorch checkpoints.", ) train_parser.add_argument("--model_type", type=str, required=True, help="Model's type.") train_parser.add_argument( diff --git a/src/transformers/commands/lfs.py b/src/transformers/commands/lfs.py index fe57943139..25537f0791 100644 --- a/src/transformers/commands/lfs.py +++ b/src/transformers/commands/lfs.py @@ -55,16 +55,19 @@ class LfsCommands(BaseTransformersCLICommand): def register_subcommand(parser: ArgumentParser): enable_parser = parser.add_parser( "lfs-enable-largefiles", - help="Deprecated: use `huggingface-cli` instead. " - "Configure your repository to enable upload of files > 5GB.", + help=( + "Deprecated: use `huggingface-cli` instead. Configure your repository to enable upload of files > 5GB." + ), ) enable_parser.add_argument("path", type=str, help="Local path to repository you want to configure.") enable_parser.set_defaults(func=lambda args: LfsEnableCommand(args)) upload_parser = parser.add_parser( LFS_MULTIPART_UPLOAD_COMMAND, - help="Deprecated: use `huggingface-cli` instead. " - "Command will get called by git-lfs, do not call it directly.", + help=( + "Deprecated: use `huggingface-cli` instead. " + "Command will get called by git-lfs, do not call it directly." + ), ) upload_parser.set_defaults(func=lambda args: LfsUploadCommand(args)) diff --git a/src/transformers/commands/train.py b/src/transformers/commands/train.py index e0071608c0..bdcbae9e01 100644 --- a/src/transformers/commands/train.py +++ b/src/transformers/commands/train.py @@ -53,8 +53,7 @@ class TrainCommand(BaseTransformersCLICommand): "--train_data", type=str, required=True, - help="path to train (and optionally evaluation) dataset as a csv with " - "tab separated labels and sentences.", + help="path to train (and optionally evaluation) dataset as a csv with tab separated labels and sentences.", ) train_parser.add_argument( "--column_label", type=int, default=0, help="Column of the dataset csv file with example labels." @@ -74,7 +73,7 @@ class TrainCommand(BaseTransformersCLICommand): "--validation_split", type=float, default=0.1, - help="if validation dataset is not provided, fraction of train dataset " "to use as validation dataset.", + help="if validation dataset is not provided, fraction of train dataset to use as validation dataset.", ) train_parser.add_argument("--output", type=str, default="./", help="path to saved the trained model.") diff --git a/src/transformers/commands/user.py b/src/transformers/commands/user.py index 58a990eef7..f3ec7f740d 100644 --- a/src/transformers/commands/user.py +++ b/src/transformers/commands/user.py @@ -39,7 +39,7 @@ class UserCommands(BaseTransformersCLICommand): # new system: git-based repo system repo_parser = parser.add_parser( "repo", - help="Deprecated: use `huggingface-cli` instead. " "Commands to interact with your huggingface.co repos.", + help="Deprecated: use `huggingface-cli` instead. Commands to interact with your huggingface.co repos.", ) repo_subparsers = repo_parser.add_subparsers( help="Deprecated: use `huggingface-cli` instead. huggingface.co repos related commands" @@ -224,6 +224,6 @@ class RepoCreateCommand(BaseUserCommand): exit(1) print("\nYour repo now lives at:") print(f" {ANSI.bold(url)}") - print("\nYou can clone it locally with the command below," " and commit/push as usual.") + print("\nYou can clone it locally with the command below, and commit/push as usual.") print(f"\n git clone {url}") print("") diff --git a/src/transformers/configuration_utils.py b/src/transformers/configuration_utils.py index f66b5734bd..6985d51af3 100755 --- a/src/transformers/configuration_utils.py +++ b/src/transformers/configuration_utils.py @@ -632,10 +632,10 @@ class PretrainedConfig(PushToHubMixin): ) except ValueError: raise EnvironmentError( - f"We couldn't connect to '{HUGGINGFACE_CO_RESOLVE_ENDPOINT}' to load this model, couldn't find it in the cached " - f"files and it looks like {pretrained_model_name_or_path} is not the path to a directory containing a " - f"{configuration_file} file.\nCheckout your internet connection or see how to run the library in " - "offline mode at 'https://huggingface.co/docs/transformers/installation#offline-mode'." + f"We couldn't connect to '{HUGGINGFACE_CO_RESOLVE_ENDPOINT}' to load this model, couldn't find it in" + f" the cached files and it looks like {pretrained_model_name_or_path} is not the path to a directory" + f" containing a {configuration_file} file.\nCheckout your internet connection or see how to run the" + " library in offline mode at 'https://huggingface.co/docs/transformers/installation#offline-mode'." ) except EnvironmentError: raise EnvironmentError( diff --git a/src/transformers/convert_graph_to_onnx.py b/src/transformers/convert_graph_to_onnx.py index 2647cbd869..c757fab8ff 100644 --- a/src/transformers/convert_graph_to_onnx.py +++ b/src/transformers/convert_graph_to_onnx.py @@ -120,7 +120,7 @@ def check_onnxruntime_requirements(minimum_version: Version): raise ImportError( f"We found an older version of onnxruntime ({onnxruntime.__version__}) " f"but we require onnxruntime to be >= {minimum_version} to enable all the conversions options.\n" - f"Please update onnxruntime by running `pip install --upgrade onnxruntime`" + "Please update onnxruntime by running `pip install --upgrade onnxruntime`" ) except ImportError: @@ -376,7 +376,8 @@ def convert( """ warnings.warn( - "The `transformers.convert_graph_to_onnx` package is deprecated and will be removed in version 5 of Transformers", + "The `transformers.convert_graph_to_onnx` package is deprecated and will be removed in version 5 of" + " Transformers", FutureWarning, ) print(f"ONNX opset version set to: {opset}") diff --git a/src/transformers/convert_pytorch_checkpoint_to_tf2.py b/src/transformers/convert_pytorch_checkpoint_to_tf2.py index e083a905d7..db7484f4b2 100755 --- a/src/transformers/convert_pytorch_checkpoint_to_tf2.py +++ b/src/transformers/convert_pytorch_checkpoint_to_tf2.py @@ -429,24 +429,30 @@ if __name__ == "__main__": "--model_type", default=None, type=str, - help=f"Model type selected in the list of {list(MODEL_CLASSES.keys())}. If not given, will download and " - "convert all the models from AWS.", + help=( + f"Model type selected in the list of {list(MODEL_CLASSES.keys())}. If not given, will download and " + "convert all the models from AWS." + ), ) parser.add_argument( "--pytorch_checkpoint_path", default=None, type=str, - help="Path to the PyTorch checkpoint path or shortcut name to download from AWS. " - "If not given, will download and convert all the checkpoints from AWS.", + help=( + "Path to the PyTorch checkpoint path or shortcut name to download from AWS. " + "If not given, will download and convert all the checkpoints from AWS." + ), ) parser.add_argument( "--config_file", default=None, type=str, - help="The config json file corresponding to the pre-trained model. \n" - "This specifies the model architecture. If not given and " - "--pytorch_checkpoint_path is not given or is a shortcut name " - "use the configuration associated to the shortcut name on the AWS", + help=( + "The config json file corresponding to the pre-trained model. \n" + "This specifies the model architecture. If not given and " + "--pytorch_checkpoint_path is not given or is a shortcut name " + "use the configuration associated to the shortcut name on the AWS" + ), ) parser.add_argument( "--compare_with_pt_model", action="store_true", help="Compare Tensorflow and PyTorch model predictions." diff --git a/src/transformers/convert_slow_tokenizer.py b/src/transformers/convert_slow_tokenizer.py index 6f32a4456e..1feb8dd5fb 100644 --- a/src/transformers/convert_slow_tokenizer.py +++ b/src/transformers/convert_slow_tokenizer.py @@ -1066,8 +1066,9 @@ def convert_slow_tokenizer(transformer_tokenizer) -> Tokenizer: if tokenizer_class_name not in SLOW_TO_FAST_CONVERTERS: raise ValueError( - f"An instance of tokenizer class {tokenizer_class_name} cannot be converted in a Fast tokenizer instance. " - f"No converter was found. Currently available slow->fast convertors: {list(SLOW_TO_FAST_CONVERTERS.keys())}" + f"An instance of tokenizer class {tokenizer_class_name} cannot be converted in a Fast tokenizer instance." + " No converter was found. Currently available slow->fast convertors:" + f" {list(SLOW_TO_FAST_CONVERTERS.keys())}" ) converter_class = SLOW_TO_FAST_CONVERTERS[tokenizer_class_name] diff --git a/src/transformers/convert_slow_tokenizers_checkpoints_to_fast.py b/src/transformers/convert_slow_tokenizers_checkpoints_to_fast.py index 96458f4935..a032ee93b0 100755 --- a/src/transformers/convert_slow_tokenizers_checkpoints_to_fast.py +++ b/src/transformers/convert_slow_tokenizers_checkpoints_to_fast.py @@ -105,8 +105,10 @@ if __name__ == "__main__": "--tokenizer_name", default=None, type=str, - help=f"Optional tokenizer type selected in the list of {list(TOKENIZER_CLASSES.keys())}. If not given, will " - "download and convert all the checkpoints from AWS.", + help=( + f"Optional tokenizer type selected in the list of {list(TOKENIZER_CLASSES.keys())}. If not given, will " + "download and convert all the checkpoints from AWS." + ), ) parser.add_argument( "--checkpoint_name", diff --git a/src/transformers/data/data_collator.py b/src/transformers/data/data_collator.py index fc1dd25eb3..2d1b413497 100644 --- a/src/transformers/data/data_collator.py +++ b/src/transformers/data/data_collator.py @@ -953,7 +953,7 @@ class DataCollatorForWholeWordMask(DataCollatorForLanguageModeling): ) cand_indexes = [] - for (i, token) in enumerate(input_tokens): + for i, token in enumerate(input_tokens): if token == "[CLS]" or token == "[SEP]": continue @@ -998,7 +998,8 @@ class DataCollatorForWholeWordMask(DataCollatorForLanguageModeling): if self.tokenizer.mask_token is None: raise ValueError( - "This tokenizer does not have a mask token which is necessary for masked language modeling. Remove the --mlm flag if you want to use this tokenizer." + "This tokenizer does not have a mask token which is necessary for masked language modeling. Remove the" + " --mlm flag if you want to use this tokenizer." ) labels = inputs.clone() # We sample a few tokens in each sequence for masked-LM training (with probability args.mlm_probability defaults to 0.15 in Bert/RoBERTa) @@ -1038,7 +1039,8 @@ class DataCollatorForWholeWordMask(DataCollatorForLanguageModeling): input_shape = tf.shape(inputs) if self.tokenizer.mask_token is None: raise ValueError( - "This tokenizer does not have a mask token which is necessary for masked language modeling. Remove the --mlm flag if you want to use this tokenizer." + "This tokenizer does not have a mask token which is necessary for masked language modeling. Remove the" + " --mlm flag if you want to use this tokenizer." ) labels = tf.identity(inputs) # We sample a few tokens in each sequence for masked-LM training (with probability args.mlm_probability defaults to 0.15 in Bert/RoBERTa) @@ -1078,7 +1080,8 @@ class DataCollatorForWholeWordMask(DataCollatorForLanguageModeling): if self.tokenizer.mask_token is None: raise ValueError( - "This tokenizer does not have a mask token which is necessary for masked language modeling. Remove the --mlm flag if you want to use this tokenizer." + "This tokenizer does not have a mask token which is necessary for masked language modeling. Remove the" + " --mlm flag if you want to use this tokenizer." ) labels = np.copy(inputs) # We sample a few tokens in each sequence for masked-LM training (with probability args.mlm_probability defaults to 0.15 in Bert/RoBERTa) @@ -1159,7 +1162,8 @@ class DataCollatorForSOP(DataCollatorForLanguageModeling): if self.tokenizer.mask_token is None: raise ValueError( - "This tokenizer does not have a mask token which is necessary for masked language modeling. Remove the --mlm flag if you want to use this tokenizer." + "This tokenizer does not have a mask token which is necessary for masked language modeling. Remove the" + " --mlm flag if you want to use this tokenizer." ) labels = inputs.clone() @@ -1245,12 +1249,14 @@ class DataCollatorForPermutationLanguageModeling(DataCollatorMixin): if self.tokenizer.mask_token is None: raise ValueError( - "This tokenizer does not have a mask token which is necessary for permutation language modeling. Please add a mask token if you want to use this tokenizer." + "This tokenizer does not have a mask token which is necessary for permutation language modeling." + " Please add a mask token if you want to use this tokenizer." ) if inputs.size(1) % 2 != 0: raise ValueError( - "This collator requires that sequence lengths be even to create a leakage-free perm_mask. Please see relevant comments in source code for details." + "This collator requires that sequence lengths be even to create a leakage-free perm_mask. Please see" + " relevant comments in source code for details." ) labels = inputs.clone() @@ -1345,12 +1351,14 @@ class DataCollatorForPermutationLanguageModeling(DataCollatorMixin): if self.tokenizer.mask_token is None: raise ValueError( - "This tokenizer does not have a mask token which is necessary for permutation language modeling. Please add a mask token if you want to use this tokenizer." + "This tokenizer does not have a mask token which is necessary for permutation language modeling." + " Please add a mask token if you want to use this tokenizer." ) if tf.shape(inputs)[1] % 2 != 0: raise ValueError( - "This collator requires that sequence lengths be even to create a leakage-free perm_mask. Please see relevant comments in source code for details." + "This collator requires that sequence lengths be even to create a leakage-free perm_mask. Please see" + " relevant comments in source code for details." ) labels = tf.identity(inputs) @@ -1452,12 +1460,14 @@ class DataCollatorForPermutationLanguageModeling(DataCollatorMixin): if self.tokenizer.mask_token is None: raise ValueError( - "This tokenizer does not have a mask token which is necessary for permutation language modeling. Please add a mask token if you want to use this tokenizer." + "This tokenizer does not have a mask token which is necessary for permutation language modeling." + " Please add a mask token if you want to use this tokenizer." ) if inputs.shape[1] % 2 != 0: raise ValueError( - "This collator requires that sequence lengths be even to create a leakage-free perm_mask. Please see relevant comments in source code for details." + "This collator requires that sequence lengths be even to create a leakage-free perm_mask. Please see" + " relevant comments in source code for details." ) labels = np.copy(inputs) diff --git a/src/transformers/data/datasets/glue.py b/src/transformers/data/datasets/glue.py index a5cdcfde4b..befa22c2e1 100644 --- a/src/transformers/data/datasets/glue.py +++ b/src/transformers/data/datasets/glue.py @@ -49,8 +49,10 @@ class GlueDataTrainingArguments: max_seq_length: int = field( default=128, metadata={ - "help": "The maximum total input sequence length after tokenization. Sequences longer " - "than this will be truncated, sequences shorter will be padded." + "help": ( + "The maximum total input sequence length after tokenization. Sequences longer " + "than this will be truncated, sequences shorter will be padded." + ) }, ) overwrite_cache: bool = field( diff --git a/src/transformers/data/datasets/squad.py b/src/transformers/data/datasets/squad.py index 294f89e2f6..e1c8c9cb6c 100644 --- a/src/transformers/data/datasets/squad.py +++ b/src/transformers/data/datasets/squad.py @@ -50,8 +50,10 @@ class SquadDataTrainingArguments: max_seq_length: int = field( default=128, metadata={ - "help": "The maximum total input sequence length after tokenization. Sequences longer " - "than this will be truncated, sequences shorter will be padded." + "help": ( + "The maximum total input sequence length after tokenization. Sequences longer " + "than this will be truncated, sequences shorter will be padded." + ) }, ) doc_stride: int = field( @@ -61,15 +63,19 @@ class SquadDataTrainingArguments: max_query_length: int = field( default=64, metadata={ - "help": "The maximum number of tokens for the question. Questions longer than this will " - "be truncated to this length." + "help": ( + "The maximum number of tokens for the question. Questions longer than this will " + "be truncated to this length." + ) }, ) max_answer_length: int = field( default=30, metadata={ - "help": "The maximum length of an answer that can be generated. This is needed because the start " - "and end predictions are not conditioned on one another." + "help": ( + "The maximum length of an answer that can be generated. This is needed because the start " + "and end predictions are not conditioned on one another." + ) }, ) overwrite_cache: bool = field( @@ -87,7 +93,10 @@ class SquadDataTrainingArguments: lang_id: int = field( default=0, metadata={ - "help": "language id of input for language-specific xlm models (see tokenization_xlm.PRETRAINED_INIT_CONFIGURATION)" + "help": ( + "language id of input for language-specific xlm models (see" + " tokenization_xlm.PRETRAINED_INIT_CONFIGURATION)" + ) }, ) threads: int = field(default=1, metadata={"help": "multiple threads for converting example to features"}) @@ -153,7 +162,8 @@ class SquadDataset(Dataset): if self.dataset is None or self.examples is None: logger.warning( - f"Deleting cached file {cached_features_file} will allow dataset and examples to be cached in future run" + f"Deleting cached file {cached_features_file} will allow dataset and examples to be cached in" + " future run" ) else: if mode == Split.dev: diff --git a/src/transformers/data/metrics/squad_metrics.py b/src/transformers/data/metrics/squad_metrics.py index f55e827f07..8a97d6d6e0 100644 --- a/src/transformers/data/metrics/squad_metrics.py +++ b/src/transformers/data/metrics/squad_metrics.py @@ -283,7 +283,7 @@ def get_final_text(pred_text, orig_text, do_lower_case, verbose_logging=False): def _strip_spaces(text): ns_chars = [] ns_to_s_map = collections.OrderedDict() - for (i, c) in enumerate(text): + for i, c in enumerate(text): if c == " ": continue ns_to_s_map[len(ns_chars)] = i @@ -317,7 +317,7 @@ def get_final_text(pred_text, orig_text, do_lower_case, verbose_logging=False): # We then project the characters in `pred_text` back to `orig_text` using # the character-to-character alignment. tok_s_to_ns_map = {} - for (i, tok_index) in tok_ns_to_s_map.items(): + for i, tok_index in tok_ns_to_s_map.items(): tok_s_to_ns_map[tok_index] = i orig_start_position = None @@ -420,7 +420,7 @@ def compute_predictions_logits( all_nbest_json = collections.OrderedDict() scores_diff_json = collections.OrderedDict() - for (example_index, example) in enumerate(all_examples): + for example_index, example in enumerate(all_examples): features = example_index_to_features[example_index] prelim_predictions = [] @@ -429,7 +429,7 @@ def compute_predictions_logits( min_null_feature_index = 0 # the paragraph slice with min null score null_start_logit = 0 # the start logit at the slice with min null score null_end_logit = 0 # the end logit at the slice with min null score - for (feature_index, feature) in enumerate(features): + for feature_index, feature in enumerate(features): result = unique_id_to_result[feature.unique_id] start_indexes = _get_best_indexes(result.start_logits, n_best_size) end_indexes = _get_best_indexes(result.end_logits, n_best_size) @@ -549,7 +549,7 @@ def compute_predictions_logits( probs = _compute_softmax(total_scores) nbest_json = [] - for (i, entry) in enumerate(nbest): + for i, entry in enumerate(nbest): output = collections.OrderedDict() output["text"] = entry.text output["probability"] = probs[i] @@ -629,14 +629,14 @@ def compute_predictions_log_probs( all_nbest_json = collections.OrderedDict() scores_diff_json = collections.OrderedDict() - for (example_index, example) in enumerate(all_examples): + for example_index, example in enumerate(all_examples): features = example_index_to_features[example_index] prelim_predictions = [] # keep track of the minimum score of null start+end of position 0 score_null = 1000000 # large and positive - for (feature_index, feature) in enumerate(features): + for feature_index, feature in enumerate(features): result = unique_id_to_result[feature.unique_id] cur_null_score = result.cls_logits @@ -744,7 +744,7 @@ def compute_predictions_log_probs( probs = _compute_softmax(total_scores) nbest_json = [] - for (i, entry) in enumerate(nbest): + for i, entry in enumerate(nbest): output = collections.OrderedDict() output["text"] = entry.text output["probability"] = probs[i] diff --git a/src/transformers/data/processors/glue.py b/src/transformers/data/processors/glue.py index 749f15cb0c..3d22968c9d 100644 --- a/src/transformers/data/processors/glue.py +++ b/src/transformers/data/processors/glue.py @@ -202,7 +202,7 @@ class MrpcProcessor(DataProcessor): def _create_examples(self, lines, set_type): """Creates examples for the training, dev and test sets.""" examples = [] - for (i, line) in enumerate(lines): + for i, line in enumerate(lines): if i == 0: continue guid = f"{set_type}-{i}" @@ -248,7 +248,7 @@ class MnliProcessor(DataProcessor): def _create_examples(self, lines, set_type): """Creates examples for the training, dev and test sets.""" examples = [] - for (i, line) in enumerate(lines): + for i, line in enumerate(lines): if i == 0: continue guid = f"{set_type}-{line[0]}" @@ -314,7 +314,7 @@ class ColaProcessor(DataProcessor): lines = lines[1:] text_index = 1 if test_mode else 3 examples = [] - for (i, line) in enumerate(lines): + for i, line in enumerate(lines): guid = f"{set_type}-{i}" text_a = line[text_index] label = None if test_mode else line[1] @@ -358,7 +358,7 @@ class Sst2Processor(DataProcessor): """Creates examples for the training, dev and test sets.""" examples = [] text_index = 1 if set_type == "test" else 0 - for (i, line) in enumerate(lines): + for i, line in enumerate(lines): if i == 0: continue guid = f"{set_type}-{i}" @@ -403,7 +403,7 @@ class StsbProcessor(DataProcessor): def _create_examples(self, lines, set_type): """Creates examples for the training, dev and test sets.""" examples = [] - for (i, line) in enumerate(lines): + for i, line in enumerate(lines): if i == 0: continue guid = f"{set_type}-{line[0]}" @@ -452,7 +452,7 @@ class QqpProcessor(DataProcessor): q1_index = 1 if test_mode else 3 q2_index = 2 if test_mode else 4 examples = [] - for (i, line) in enumerate(lines): + for i, line in enumerate(lines): if i == 0: continue guid = f"{set_type}-{line[0]}" @@ -501,7 +501,7 @@ class QnliProcessor(DataProcessor): def _create_examples(self, lines, set_type): """Creates examples for the training, dev and test sets.""" examples = [] - for (i, line) in enumerate(lines): + for i, line in enumerate(lines): if i == 0: continue guid = f"{set_type}-{line[0]}" @@ -547,7 +547,7 @@ class RteProcessor(DataProcessor): def _create_examples(self, lines, set_type): """Creates examples for the training, dev and test sets.""" examples = [] - for (i, line) in enumerate(lines): + for i, line in enumerate(lines): if i == 0: continue guid = f"{set_type}-{line[0]}" @@ -593,7 +593,7 @@ class WnliProcessor(DataProcessor): def _create_examples(self, lines, set_type): """Creates examples for the training, dev and test sets.""" examples = [] - for (i, line) in enumerate(lines): + for i, line in enumerate(lines): if i == 0: continue guid = f"{set_type}-{line[0]}" diff --git a/src/transformers/data/processors/squad.py b/src/transformers/data/processors/squad.py index bf8ef6aecf..64137c95ac 100644 --- a/src/transformers/data/processors/squad.py +++ b/src/transformers/data/processors/squad.py @@ -57,7 +57,7 @@ def _check_is_max_context(doc_spans, cur_span_index, position): """Check if this is the 'max context' doc span for the token.""" best_score = None best_span_index = None - for (span_index, doc_span) in enumerate(doc_spans): + for span_index, doc_span in enumerate(doc_spans): end = doc_span.start + doc_span.length - 1 if position < doc_span.start: continue @@ -79,7 +79,7 @@ def _new_check_is_max_context(doc_spans, cur_span_index, position): # return True best_score = None best_span_index = None - for (span_index, doc_span) in enumerate(doc_spans): + for span_index, doc_span in enumerate(doc_spans): end = doc_span["start"] + doc_span["length"] - 1 if position < doc_span["start"]: continue @@ -120,7 +120,7 @@ def squad_convert_example_to_features( tok_to_orig_index = [] orig_to_tok_index = [] all_doc_tokens = [] - for (i, token) in enumerate(example.doc_tokens): + for i, token in enumerate(example.doc_tokens): orig_to_tok_index.append(len(all_doc_tokens)) if tokenizer.__class__.__name__ in [ "RobertaTokenizer", diff --git a/src/transformers/data/processors/utils.py b/src/transformers/data/processors/utils.py index b403894d4c..936f5a51e9 100644 --- a/src/transformers/data/processors/utils.py +++ b/src/transformers/data/processors/utils.py @@ -179,7 +179,7 @@ class SingleSentenceClassificationProcessor(DataProcessor): texts = [] labels = [] ids = [] - for (i, line) in enumerate(lines): + for i, line in enumerate(lines): texts.append(line[column_text]) labels.append(line[column_label]) if column_id is not None: @@ -207,7 +207,7 @@ class SingleSentenceClassificationProcessor(DataProcessor): labels = [None] * len(texts_or_text_and_labels) examples = [] added_labels = set() - for (text_or_text_and_label, label, guid) in zip(texts_or_text_and_labels, labels, ids): + for text_or_text_and_label, label, guid in zip(texts_or_text_and_labels, labels, ids): if isinstance(text_or_text_and_label, (tuple, list)) and label is None: text, label = text_or_text_and_label else: @@ -262,7 +262,7 @@ class SingleSentenceClassificationProcessor(DataProcessor): label_map = {label: i for i, label in enumerate(self.labels)} all_input_ids = [] - for (ex_index, example) in enumerate(self.examples): + for ex_index, example in enumerate(self.examples): if ex_index % 10000 == 0: logger.info(f"Tokenizing example {ex_index}") @@ -276,7 +276,7 @@ class SingleSentenceClassificationProcessor(DataProcessor): batch_length = max(len(input_ids) for input_ids in all_input_ids) features = [] - for (ex_index, (input_ids, example)) in enumerate(zip(all_input_ids, self.examples)): + for ex_index, (input_ids, example) in enumerate(zip(all_input_ids, self.examples)): if ex_index % 10000 == 0: logger.info(f"Writing example {ex_index}/{len(self.examples)}") # The mask has 1 for real tokens and 0 for padding tokens. Only real diff --git a/src/transformers/data/processors/xnli.py b/src/transformers/data/processors/xnli.py index 4b27c309a1..3f1a11fcd6 100644 --- a/src/transformers/data/processors/xnli.py +++ b/src/transformers/data/processors/xnli.py @@ -40,7 +40,7 @@ class XnliProcessor(DataProcessor): lg = self.language if self.train_language is None else self.train_language lines = self._read_tsv(os.path.join(data_dir, f"XNLI-MT-1.0/multinli/multinli.train.{lg}.tsv")) examples = [] - for (i, line) in enumerate(lines): + for i, line in enumerate(lines): if i == 0: continue guid = f"train-{i}" @@ -60,7 +60,7 @@ class XnliProcessor(DataProcessor): """See base class.""" lines = self._read_tsv(os.path.join(data_dir, "XNLI-1.0/xnli.test.tsv")) examples = [] - for (i, line) in enumerate(lines): + for i, line in enumerate(lines): if i == 0: continue language = line[0] diff --git a/src/transformers/debug_utils.py b/src/transformers/debug_utils.py index da266ac571..dbceb1d849 100644 --- a/src/transformers/debug_utils.py +++ b/src/transformers/debug_utils.py @@ -285,7 +285,8 @@ class DebugUnderflowOverflow: # abort after certain batch if requested to do so if self.abort_after_batch_num is not None and self.batch_number > self.abort_after_batch_num: raise ValueError( - f"DebugUnderflowOverflow: aborting after {self.batch_number} batches due to `abort_after_batch_num={self.abort_after_batch_num}` arg" + f"DebugUnderflowOverflow: aborting after {self.batch_number} batches due to" + f" `abort_after_batch_num={self.abort_after_batch_num}` arg" ) diff --git a/src/transformers/deepspeed.py b/src/transformers/deepspeed.py index 6feabdaa80..5f56551077 100644 --- a/src/transformers/deepspeed.py +++ b/src/transformers/deepspeed.py @@ -278,8 +278,8 @@ class HfTrainerDeepSpeedConfig(HfDeepSpeedConfig): if len(self.mismatches) > 0: mismatches = "\n".join(self.mismatches) raise ValueError( - f"Please correct the following DeepSpeed config values that mismatch TrainingArguments values:\n{mismatches}\n" - "The easiest method is to set these DeepSpeed config values to 'auto'." + "Please correct the following DeepSpeed config values that mismatch TrainingArguments" + f" values:\n{mismatches}\nThe easiest method is to set these DeepSpeed config values to 'auto'." ) @@ -340,7 +340,8 @@ def deepspeed_optim_sched(trainer, hf_deepspeed_config, args, num_training_steps else: if hf_deepspeed_config.is_offload(): logger.info( - "Detected ZeRO Offload and non-DeepSpeed optimizers: This combination should work as long as the custom optimizer has both CPU and GPU implementation (except LAMB)" + "Detected ZeRO Offload and non-DeepSpeed optimizers: This combination should work as long as the" + " custom optimizer has both CPU and GPU implementation (except LAMB)" ) # ds supports Adam, OneBitAdam, and Lamb optimizers and can import other optimizers from torch. diff --git a/src/transformers/dependency_versions_table.py b/src/transformers/dependency_versions_table.py index cecba61c73..4e6541771f 100644 --- a/src/transformers/dependency_versions_table.py +++ b/src/transformers/dependency_versions_table.py @@ -4,7 +4,7 @@ deps = { "Pillow": "Pillow", "accelerate": "accelerate>=0.7.1", - "black": "black~=22.0", + "black": "black~=22.0,>=22.3", "codecarbon": "codecarbon==1.2.0", "cookiecutter": "cookiecutter==1.7.3", "dataclasses": "dataclasses", diff --git a/src/transformers/feature_extraction_sequence_utils.py b/src/transformers/feature_extraction_sequence_utils.py index cbcdeb4acd..0415686803 100644 --- a/src/transformers/feature_extraction_sequence_utils.py +++ b/src/transformers/feature_extraction_sequence_utils.py @@ -131,8 +131,9 @@ class SequenceFeatureExtractor(FeatureExtractionMixin): # The model's main input name, usually `input_values`, has be passed for padding if self.model_input_names[0] not in processed_features: raise ValueError( - "You should supply an instance of `transformers.BatchFeature` or list of `transformers.BatchFeature` to this method " - f"that includes {self.model_input_names[0]}, but you provided {list(processed_features.keys())}" + "You should supply an instance of `transformers.BatchFeature` or list of `transformers.BatchFeature`" + f" to this method that includes {self.model_input_names[0]}, but you provided" + f" {list(processed_features.keys())}" ) required_input = processed_features[self.model_input_names[0]] @@ -168,7 +169,7 @@ class SequenceFeatureExtractor(FeatureExtractionMixin): else: raise ValueError( f"type of {first_element} unknown: {type(first_element)}. " - f"Should be one of a python, numpy, pytorch or tensorflow object." + "Should be one of a python, numpy, pytorch or tensorflow object." ) for key, value in processed_features.items(): @@ -353,14 +354,14 @@ class SequenceFeatureExtractor(FeatureExtractionMixin): if max_length is None: if padding_strategy == PaddingStrategy.MAX_LENGTH: raise ValueError( - f"When setting ``padding={PaddingStrategy.MAX_LENGTH}``, make sure that" f" max_length is defined" + f"When setting ``padding={PaddingStrategy.MAX_LENGTH}``, make sure that max_length is defined" ) # Test if we have a padding value if padding_strategy != PaddingStrategy.DO_NOT_PAD and (self.padding_value is None): raise ValueError( - "Asking to pad but the feature_extractor does not have a padding value. " - "Please select a value to use as `padding_value`. For example: `feature_extractor.padding_value = 0.0`." + "Asking to pad but the feature_extractor does not have a padding value. Please select a value to use" + " as `padding_value`. For example: `feature_extractor.padding_value = 0.0`." ) return padding_strategy diff --git a/src/transformers/feature_extraction_utils.py b/src/transformers/feature_extraction_utils.py index bb719b98f6..da8007d319 100644 --- a/src/transformers/feature_extraction_utils.py +++ b/src/transformers/feature_extraction_utils.py @@ -434,10 +434,11 @@ class FeatureExtractionMixin(PushToHubMixin): ) except ValueError: raise EnvironmentError( - f"We couldn't connect to '{HUGGINGFACE_CO_RESOLVE_ENDPOINT}' to load this model, couldn't find it in the cached " - f"files and it looks like {pretrained_model_name_or_path} is not the path to a directory containing a " - f"{FEATURE_EXTRACTOR_NAME} file.\nCheckout your internet connection or see how to run the library in " - "offline mode at 'https://huggingface.co/docs/transformers/installation#offline-mode'." + f"We couldn't connect to '{HUGGINGFACE_CO_RESOLVE_ENDPOINT}' to load this model, couldn't find it in" + f" the cached files and it looks like {pretrained_model_name_or_path} is not the path to a directory" + f" containing a {FEATURE_EXTRACTOR_NAME} file.\nCheckout your internet connection or see how to run" + " the library in offline mode at" + " 'https://huggingface.co/docs/transformers/installation#offline-mode'." ) except EnvironmentError: raise EnvironmentError( @@ -462,7 +463,8 @@ class FeatureExtractionMixin(PushToHubMixin): logger.info(f"loading feature extractor configuration file {feature_extractor_file}") else: logger.info( - f"loading feature extractor configuration file {feature_extractor_file} from cache at {resolved_feature_extractor_file}" + f"loading feature extractor configuration file {feature_extractor_file} from cache at" + f" {resolved_feature_extractor_file}" ) return feature_extractor_dict, kwargs diff --git a/src/transformers/generation_beam_constraints.py b/src/transformers/generation_beam_constraints.py index d50796bf82..dcdadfa4c3 100644 --- a/src/transformers/generation_beam_constraints.py +++ b/src/transformers/generation_beam_constraints.py @@ -219,7 +219,8 @@ class DisjunctiveTrie: if no_subsets and self.has_subsets(root, nested_token_ids): raise ValueError( - f"Each list in `nested_token_ids` can't be a complete subset of another list, but is {nested_token_ids}." + "Each list in `nested_token_ids` can't be a complete subset of another list, but is" + f" {nested_token_ids}." ) self.trie = root diff --git a/src/transformers/generation_beam_search.py b/src/transformers/generation_beam_search.py index aa9b3ec66c..7a9ffe7908 100644 --- a/src/transformers/generation_beam_search.py +++ b/src/transformers/generation_beam_search.py @@ -183,13 +183,14 @@ class BeamSearchScorer(BeamScorer): if not isinstance(num_beams, int) or num_beams <= 1: raise ValueError( - f"`num_beams` has to be an integer strictly greater than 1, but is {num_beams}. For `num_beams` == 1, one should make use of `greedy_search` instead." + f"`num_beams` has to be an integer strictly greater than 1, but is {num_beams}. For `num_beams` == 1," + " one should make use of `greedy_search` instead." ) if not isinstance(num_beam_groups, int) or (num_beam_groups > num_beams) or (num_beams % num_beam_groups != 0): raise ValueError( - f"`num_beam_groups` has to be an integer smaller or equal than `num_beams` and `num_beams` " - f"has to be divisible by `num_beam_groups`, but is {num_beam_groups} with `num_beams` being {num_beams}." + "`num_beam_groups` has to be an integer smaller or equal than `num_beams` and `num_beams` has to be" + f" divisible by `num_beam_groups`, but is {num_beam_groups} with `num_beams` being {num_beams}." ) if "max_length" in kwargs: @@ -272,7 +273,8 @@ class BeamSearchScorer(BeamScorer): if beam_idx < self.group_size: raise ValueError( - f"At most {self.group_size} tokens in {next_tokens[batch_idx]} can be equal to `eos_token_id: {eos_token_id}`. Make sure {next_tokens[batch_idx]} are corrected." + f"At most {self.group_size} tokens in {next_tokens[batch_idx]} can be equal to `eos_token_id:" + f" {eos_token_id}`. Make sure {next_tokens[batch_idx]} are corrected." ) # Check if we are done so that we can save a pad step if all(done) @@ -419,13 +421,14 @@ class ConstrainedBeamSearchScorer(BeamScorer): if not isinstance(num_beams, int) or num_beams <= 1: raise ValueError( - f"`num_beams` has to be an integer strictly greater than 1, but is {num_beams}. For `num_beams` == 1, one should make use of `greedy_search` instead." + f"`num_beams` has to be an integer strictly greater than 1, but is {num_beams}. For `num_beams` == 1," + " one should make use of `greedy_search` instead." ) if not isinstance(num_beam_groups, int) or (num_beam_groups > num_beams) or (num_beams % num_beam_groups != 0): raise ValueError( - f"`num_beam_groups` has to be an integer smaller or equal than `num_beams` and `num_beams` " - f"has to be divisible by `num_beam_groups`, but is {num_beam_groups} with `num_beams` being {num_beams}." + "`num_beam_groups` has to be an integer smaller or equal than `num_beams` and `num_beams` has to be" + f" divisible by `num_beam_groups`, but is {num_beam_groups} with `num_beams` being {num_beams}." ) if "max_length" in kwargs: @@ -571,7 +574,8 @@ class ConstrainedBeamSearchScorer(BeamScorer): if beam_idx < self.group_size: raise ValueError( - f"At most {self.group_size} tokens in {next_tokens[batch_idx]} can be equal to `eos_token_id: {eos_token_id}`. Make sure {next_tokens[batch_idx]} are corrected." + f"At most {self.group_size} tokens in {next_tokens[batch_idx]} can be equal to `eos_token_id:" + f" {eos_token_id}`. Make sure {next_tokens[batch_idx]} are corrected." ) # Check if we are done so that we can save a pad step if all(done) diff --git a/src/transformers/generation_logits_process.py b/src/transformers/generation_logits_process.py index 7aa4004913..2c5c0eeb8e 100644 --- a/src/transformers/generation_logits_process.py +++ b/src/transformers/generation_logits_process.py @@ -474,7 +474,7 @@ class NoBadWordsLogitsProcessor(LogitsProcessor): else: logger.error( f"An invalid bad word ID is defined: {token}. This ID is not contained in the " - f"vocabulary, and is therefore ignored." + "vocabulary, and is therefore ignored." ) if not banned_mask_list and self.static_bad_words_mask is None: return scores diff --git a/src/transformers/generation_tf_utils.py b/src/transformers/generation_tf_utils.py index 2a9251eeb5..04ae9cc31d 100644 --- a/src/transformers/generation_tf_utils.py +++ b/src/transformers/generation_tf_utils.py @@ -599,8 +599,9 @@ class TFGenerationMixin: # We cannot generate if the model does not have a LM head if self.get_output_embeddings() is None: raise AttributeError( - "You tried to generate sequences with a model that does not have a LM Head. " - "Please use another model class (e.g. `TFOpenAIGPTLMHeadModel`, `TFXLNetLMHeadModel`, `TFGPT2LMHeadModel`, `TFCTRLLMHeadModel`, `TFT5ForConditionalGeneration`, `TFTransfoXLLMHeadModel`)" + "You tried to generate sequences with a model that does not have a LM Head. Please use another model" + " class (e.g. `TFOpenAIGPTLMHeadModel`, `TFXLNetLMHeadModel`, `TFGPT2LMHeadModel`," + " `TFCTRLLMHeadModel`, `TFT5ForConditionalGeneration`, `TFTransfoXLLMHeadModel`)" ) max_length = max_length if max_length is not None else self.config.max_length @@ -696,15 +697,17 @@ class TFGenerationMixin: if do_sample is False: if num_beams == 1: # no_beam_search greedy generation conditions - assert ( - num_return_sequences == 1 - ), "Greedy decoding will always produce the same output for num_beams == 1 and num_return_sequences > 1. Please set num_return_sequences = 1" + assert num_return_sequences == 1, ( + "Greedy decoding will always produce the same output for num_beams == 1 and num_return_sequences >" + " 1. Please set num_return_sequences = 1" + ) else: # beam_search greedy generation conditions - assert ( - num_beams >= num_return_sequences - ), "Greedy beam search decoding cannot return more sequences than it has beams. Please set num_beams >= num_return_sequences" + assert num_beams >= num_return_sequences, ( + "Greedy beam search decoding cannot return more sequences than it has beams. Please set num_beams" + " >= num_return_sequences" + ) # create attention mask if necessary accepts_attention_mask = "attention_mask" in set(inspect.signature(self.call).parameters.keys()) @@ -794,9 +797,11 @@ class TFGenerationMixin: encoder_outputs = None cur_len = shape_list(input_ids)[-1] - assert ( - cur_len < max_length - ), f"The context has {cur_len} number of tokens, but `max_length` is only {max_length}. Please make sure that `max_length` is bigger than the number of tokens, by setting either `generate(max_length=...,...)` or `config.max_length = ...`" + assert cur_len < max_length, ( + f"The context has {cur_len} number of tokens, but `max_length` is only {max_length}. Please make sure that" + " `max_length` is bigger than the number of tokens, by setting either `generate(max_length=...,...)` or" + " `config.max_length = ...`" + ) return self._generate_beam_search( input_ids, diff --git a/src/transformers/generation_utils.py b/src/transformers/generation_utils.py index ca8f708f7a..c3f34f8db6 100644 --- a/src/transformers/generation_utils.py +++ b/src/transformers/generation_utils.py @@ -630,7 +630,8 @@ class GenerationMixin: def _reorder_cache(self, past, beam_idx): raise NotImplementedError( - f"Make sure that a `_reorder_cache` function is correctly implemented in {self.__class__.__module__} to enable beam search for {self.__class__}" + f"Make sure that a `_reorder_cache` function is correctly implemented in {self.__class__.__module__} to" + f" enable beam search for {self.__class__}" ) def _get_logits_warper( @@ -791,11 +792,11 @@ class GenerationMixin: if type(custom) is type(default): object_type = "stopping criteria" if isinstance(custom, StoppingCriteria) else "logits processor" raise ValueError( - f"A custom {object_type} of type {type(custom)} with values {custom} has been passed to `generate`, " - f"but it has already been created with the values {default}. {default} has been created by passing the " - "corresponding arguments to generate or by the model's config default values. " - f"If you just want to change the default values of {object_type} consider passing them as arguments " - f"to `generate` instead of using a custom {object_type}." + f"A custom {object_type} of type {type(custom)} with values {custom} has been passed to" + f" `generate`, but it has already been created with the values {default}. {default} has been" + " created by passing the corresponding arguments to generate or by the model's config default" + f" values. If you just want to change the default values of {object_type} consider passing" + f" them as arguments to `generate` instead of using a custom {object_type}." ) default_list.extend(custom_list) return default_list @@ -1212,8 +1213,9 @@ class GenerationMixin: if input_ids_seq_length >= max_length: input_ids_string = "decoder_input_ids" if self.config.is_encoder_decoder else "input_ids" logger.warning( - f"Input length of {input_ids_string} is {input_ids_seq_length}, but ``max_length`` is set to {max_length}. " - "This can lead to unexpected behavior. You should consider increasing ``config.max_length`` or ``max_length``." + f"Input length of {input_ids_string} is {input_ids_seq_length}, but ``max_length`` is set to" + f" {max_length}. This can lead to unexpected behavior. You should consider increasing" + " ``config.max_length`` or ``max_length``." ) # 6. determine generation mode @@ -1620,7 +1622,8 @@ class GenerationMixin: stopping_criteria = stopping_criteria if stopping_criteria is not None else StoppingCriteriaList() if max_length is not None: warnings.warn( - "`max_length` is deprecated in this function, use `stopping_criteria=StoppingCriteriaList([MaxLengthCriteria(max_length=max_length)])` instead.", + "`max_length` is deprecated in this function, use" + " `stopping_criteria=StoppingCriteriaList([MaxLengthCriteria(max_length=max_length)])` instead.", UserWarning, ) stopping_criteria = validate_stopping_criteria(stopping_criteria, max_length) @@ -1872,7 +1875,8 @@ class GenerationMixin: stopping_criteria = stopping_criteria if stopping_criteria is not None else StoppingCriteriaList() if max_length is not None: warnings.warn( - "`max_length` is deprecated in this function, use `stopping_criteria=StoppingCriteriaList(MaxLengthCriteria(max_length=max_length))` instead.", + "`max_length` is deprecated in this function, use" + " `stopping_criteria=StoppingCriteriaList(MaxLengthCriteria(max_length=max_length))` instead.", UserWarning, ) stopping_criteria = validate_stopping_criteria(stopping_criteria, max_length) @@ -2127,7 +2131,8 @@ class GenerationMixin: stopping_criteria = stopping_criteria if stopping_criteria is not None else StoppingCriteriaList() if max_length is not None: warnings.warn( - "`max_length` is deprecated in this function, use `stopping_criteria=StoppingCriteriaList(MaxLengthCriteria(max_length=max_length))` instead.", + "`max_length` is deprecated in this function, use" + " `stopping_criteria=StoppingCriteriaList(MaxLengthCriteria(max_length=max_length))` instead.", UserWarning, ) stopping_criteria = validate_stopping_criteria(stopping_criteria, max_length) @@ -2452,7 +2457,8 @@ class GenerationMixin: stopping_criteria = stopping_criteria if stopping_criteria is not None else StoppingCriteriaList() if max_length is not None: warnings.warn( - "`max_length` is deprecated in this function, use `stopping_criteria=StoppingCriteriaList(MaxLengthCriteria(max_length=max_length))` instead.", + "`max_length` is deprecated in this function, use" + " `stopping_criteria=StoppingCriteriaList(MaxLengthCriteria(max_length=max_length))` instead.", UserWarning, ) stopping_criteria = validate_stopping_criteria(stopping_criteria, max_length) @@ -2767,7 +2773,8 @@ class GenerationMixin: stopping_criteria = stopping_criteria if stopping_criteria is not None else StoppingCriteriaList() if max_length is not None: warnings.warn( - "`max_length` is deprecated in this function, use `stopping_criteria=StoppingCriteriaList(MaxLengthCriteria(max_length=max_length))` instead.", + "`max_length` is deprecated in this function, use" + " `stopping_criteria=StoppingCriteriaList(MaxLengthCriteria(max_length=max_length))` instead.", UserWarning, ) stopping_criteria = validate_stopping_criteria(stopping_criteria, max_length) @@ -3137,7 +3144,8 @@ class GenerationMixin: stopping_criteria = stopping_criteria if stopping_criteria is not None else StoppingCriteriaList() if max_length is not None: warnings.warn( - "`max_length` is deprecated in this function, use `stopping_criteria=StoppingCriteriaList(MaxLengthCriteria(max_length=max_length))` instead.", + "`max_length` is deprecated in this function, use" + " `stopping_criteria=StoppingCriteriaList(MaxLengthCriteria(max_length=max_length))` instead.", UserWarning, ) stopping_criteria = validate_stopping_criteria(stopping_criteria, max_length) diff --git a/src/transformers/hf_argparser.py b/src/transformers/hf_argparser.py index c2514465b7..300de8dbab 100644 --- a/src/transformers/hf_argparser.py +++ b/src/transformers/hf_argparser.py @@ -155,8 +155,8 @@ class HfArgumentParser(ArgumentParser): except NameError: raise RuntimeError( f"Type resolution failed for f{dtype}. Try declaring the class in global scope or " - f"removing line of `from __future__ import annotations` which opts in Postponed " - f"Evaluation of Annotations (PEP 563)" + "removing line of `from __future__ import annotations` which opts in Postponed " + "Evaluation of Annotations (PEP 563)" ) for field in dataclasses.fields(dtype): diff --git a/src/transformers/integrations.py b/src/transformers/integrations.py index 625ee6875a..9e68b142bc 100644 --- a/src/transformers/integrations.py +++ b/src/transformers/integrations.py @@ -389,7 +389,8 @@ def run_hp_search_wandb(trainer, n_trials: int, direction: str, **kwargs) -> Bes format_metrics = rewrite_logs(metrics) if metric not in format_metrics: logger.warning( - f"Provided metric {metric} not found. This might result in unexpected sweeps charts. The available metrics are {format_metrics.keys()}" + f"Provided metric {metric} not found. This might result in unexpected sweeps charts. The available" + f" metrics are {format_metrics.keys()}" ) best_score = False if best_trial["run_id"] is not None: @@ -458,7 +459,8 @@ class TensorBoardCallback(TrainerCallback): has_tensorboard = is_tensorboard_available() if not has_tensorboard: raise RuntimeError( - "TensorBoardCallback requires tensorboard to be installed. Either update your PyTorch version or install tensorboardX." + "TensorBoardCallback requires tensorboard to be installed. Either update your PyTorch version or" + " install tensorboardX." ) if has_tensorboard: try: @@ -811,7 +813,8 @@ class MLflowCallback(TrainerCallback): self._flatten_params = os.getenv("MLFLOW_FLATTEN_PARAMS", "FALSE").upper() in ENV_VARS_TRUE_VALUES self._run_id = os.getenv("MLFLOW_RUN_ID", None) logger.debug( - f"MLflow experiment_name={self._experiment_name}, run_name={args.run_name}, nested={self._nested_run}, tags={self._nested_run}" + f"MLflow experiment_name={self._experiment_name}, run_name={args.run_name}, nested={self._nested_run}," + f" tags={self._nested_run}" ) if state.is_world_process_zero: if self._ml_flow.active_run() is None or self._nested_run or self._run_id: @@ -831,9 +834,10 @@ class MLflowCallback(TrainerCallback): # internally, all values are converted to str in MLflow if len(str(value)) > self._MAX_PARAM_VAL_LENGTH: logger.warning( - f'Trainer is attempting to log a value of "{value}" for key "{name}" as a parameter. ' - f"MLflow's log_param() only accepts values no longer than 250 characters so we dropped this attribute. " - f"You can use `MLFLOW_FLATTEN_PARAMS` environment variable to flatten the parameters and avoid this message." + f'Trainer is attempting to log a value of "{value}" for key "{name}" as a parameter. MLflow\'s' + " log_param() only accepts values no longer than 250 characters so we dropped this attribute." + " You can use `MLFLOW_FLATTEN_PARAMS` environment variable to flatten the parameters and" + " avoid this message." ) del combined_dict[name] # MLflow cannot log more than 100 values in one go, so we have to split it @@ -861,7 +865,7 @@ class MLflowCallback(TrainerCallback): else: logger.warning( f'Trainer is attempting to log a value of "{v}" of type {type(v)} for key "{k}" as a metric. ' - f"MLflow's log_metric() only accepts float and int types so we dropped this attribute." + "MLflow's log_metric() only accepts float and int types so we dropped this attribute." ) self._ml_flow.log_metrics(metrics=metrics, step=state.global_step) diff --git a/src/transformers/modelcard.py b/src/transformers/modelcard.py index a4ec857b18..af075bd990 100644 --- a/src/transformers/modelcard.py +++ b/src/transformers/modelcard.py @@ -492,7 +492,10 @@ class TrainingSummary: if self.finetuned_from is None: model_card += "This model was trained from scratch on " else: - model_card += f"This model is a fine-tuned version of [{self.finetuned_from}](https://huggingface.co/{self.finetuned_from}) on " + model_card += ( + "This model is a fine-tuned version of" + f" [{self.finetuned_from}](https://huggingface.co/{self.finetuned_from}) on " + ) if self.dataset is None: model_card += "an unknown dataset." @@ -875,9 +878,10 @@ def extract_hyperparameters_from_trainer(trainer): if trainer.args.adafactor: hyperparameters["optimizer"] = "Adafactor" else: - hyperparameters[ - "optimizer" - ] = f"Adam with betas=({trainer.args.adam_beta1},{trainer.args.adam_beta2}) and epsilon={trainer.args.adam_epsilon}" + hyperparameters["optimizer"] = ( + f"Adam with betas=({trainer.args.adam_beta1},{trainer.args.adam_beta2}) and" + f" epsilon={trainer.args.adam_epsilon}" + ) hyperparameters["lr_scheduler_type"] = trainer.args.lr_scheduler_type.value if trainer.args.warmup_ratio != 0.0: diff --git a/src/transformers/modeling_flax_pytorch_utils.py b/src/transformers/modeling_flax_pytorch_utils.py index 100e032a38..a91d41b9d6 100644 --- a/src/transformers/modeling_flax_pytorch_utils.py +++ b/src/transformers/modeling_flax_pytorch_utils.py @@ -44,8 +44,9 @@ def load_pytorch_checkpoint_in_flax_state_dict(flax_model, pytorch_checkpoint_pa import torch # noqa: F401 except ImportError: logger.error( - "Loading a PyTorch model in Flax, requires both PyTorch and Flax to be installed. Please see " - "https://pytorch.org/ and https://flax.readthedocs.io/en/latest/installation.html for installation instructions." + "Loading a PyTorch model in Flax, requires both PyTorch and Flax to be installed. Please see" + " https://pytorch.org/ and https://flax.readthedocs.io/en/latest/installation.html for installation" + " instructions." ) raise @@ -185,8 +186,9 @@ def load_flax_weights_in_pytorch_model(pt_model, flax_state): import torch # noqa: F401 except ImportError: logger.error( - "Loading a Flax weights in PyTorch, requires both PyTorch and Flax to be installed. Please see " - "https://pytorch.org/ and https://flax.readthedocs.io/en/latest/installation.html for installation instructions." + "Loading a Flax weights in PyTorch, requires both PyTorch and Flax to be installed. Please see" + " https://pytorch.org/ and https://flax.readthedocs.io/en/latest/installation.html for installation" + " instructions." ) raise @@ -264,20 +266,21 @@ def load_flax_weights_in_pytorch_model(pt_model, flax_state): if len(unexpected_keys) > 0: logger.warning( - "Some weights of the Flax model were not used when " - f"initializing the PyTorch model {pt_model.__class__.__name__}: {unexpected_keys}\n" - f"- This IS expected if you are initializing {pt_model.__class__.__name__} from a Flax model trained on another task " - "or with another architecture (e.g. initializing a BertForSequenceClassification model from a FlaxBertForPreTraining model).\n" - f"- This IS NOT expected if you are initializing {pt_model.__class__.__name__} from a Flax model that you expect " - "to be exactly identical (e.g. initializing a BertForSequenceClassification model from a FlaxBertForSequenceClassification model)." + "Some weights of the Flax model were not used when initializing the PyTorch model" + f" {pt_model.__class__.__name__}: {unexpected_keys}\n- This IS expected if you are initializing" + f" {pt_model.__class__.__name__} from a Flax model trained on another task or with another architecture" + " (e.g. initializing a BertForSequenceClassification model from a FlaxBertForPreTraining model).\n- This" + f" IS NOT expected if you are initializing {pt_model.__class__.__name__} from a Flax model that you expect" + " to be exactly identical (e.g. initializing a BertForSequenceClassification model from a" + " FlaxBertForSequenceClassification model)." ) else: logger.warning(f"All Flax model weights were used when initializing {pt_model.__class__.__name__}.\n") if len(missing_keys) > 0: logger.warning( - f"Some weights of {pt_model.__class__.__name__} were not initialized from the Flax model " - f"and are newly initialized: {missing_keys}\n" - "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference." + f"Some weights of {pt_model.__class__.__name__} were not initialized from the Flax model and are newly" + f" initialized: {missing_keys}\nYou should probably TRAIN this model on a down-stream task to be able to" + " use it for predictions and inference." ) else: logger.warning( diff --git a/src/transformers/modeling_flax_utils.py b/src/transformers/modeling_flax_utils.py index 6b9ddfe7c1..36469cee2c 100644 --- a/src/transformers/modeling_flax_utils.py +++ b/src/transformers/modeling_flax_utils.py @@ -551,14 +551,14 @@ class FlaxPreTrainedModel(PushToHubMixin, FlaxGenerationMixin): has_file_kwargs = {"revision": revision, "proxies": proxies, "use_auth_token": use_auth_token} if has_file(pretrained_model_name_or_path, WEIGHTS_NAME, **has_file_kwargs): raise EnvironmentError( - f"{pretrained_model_name_or_path} does not appear to have a file named {FLAX_WEIGHTS_NAME} " - "but there is a file for PyTorch weights. Use `from_pt=True` to load this model from " - "those weights." + f"{pretrained_model_name_or_path} does not appear to have a file named" + f" {FLAX_WEIGHTS_NAME} but there is a file for PyTorch weights. Use `from_pt=True` to load" + " this model from those weights." ) else: raise EnvironmentError( - f"{pretrained_model_name_or_path} does not appear to have a file named {FLAX_WEIGHTS_NAME} " - f"or {WEIGHTS_NAME}." + f"{pretrained_model_name_or_path} does not appear to have a file named" + f" {FLAX_WEIGHTS_NAME} or {WEIGHTS_NAME}." ) else: raise EnvironmentError( @@ -571,11 +571,11 @@ class FlaxPreTrainedModel(PushToHubMixin, FlaxGenerationMixin): ) except ValueError: raise EnvironmentError( - f"We couldn't connect to '{HUGGINGFACE_CO_RESOLVE_ENDPOINT}' to load this model, couldn't find it in the cached " - f"files and it looks like {pretrained_model_name_or_path} is not the path to a directory " - f"containing a file named {FLAX_WEIGHTS_NAME} or {WEIGHTS_NAME}.\n" - "Checkout your internet connection or see how to run the library in offline mode at " - "'https://huggingface.co/docs/transformers/installation#offline-mode'." + f"We couldn't connect to '{HUGGINGFACE_CO_RESOLVE_ENDPOINT}' to load this model, couldn't find it" + f" in the cached files and it looks like {pretrained_model_name_or_path} is not the path to a" + f" directory containing a file named {FLAX_WEIGHTS_NAME} or {WEIGHTS_NAME}.\nCheckout your" + " internet connection or see how to run the library in offline mode at" + " 'https://huggingface.co/docs/transformers/installation#offline-mode'." ) except EnvironmentError: raise EnvironmentError( @@ -606,9 +606,9 @@ class FlaxPreTrainedModel(PushToHubMixin, FlaxGenerationMixin): with open(resolved_archive_file) as f: if f.read().startswith("version"): raise OSError( - "You seem to have cloned a repository without having git-lfs installed. Please install " - "git-lfs and run `git lfs install` followed by `git lfs pull` in the folder " - "you cloned." + "You seem to have cloned a repository without having git-lfs installed. Please" + " install git-lfs and run `git lfs install` followed by `git lfs pull` in the" + " folder you cloned." ) else: raise ValueError from e @@ -643,7 +643,7 @@ class FlaxPreTrainedModel(PushToHubMixin, FlaxGenerationMixin): if missing_keys and not _do_init: logger.warning( f"The checkpoint {pretrained_model_name_or_path} is missing required keys: {missing_keys}. " - f"Make sure to call model.init_weights to initialize the missing weights." + "Make sure to call model.init_weights to initialize the missing weights." ) cls._missing_keys = missing_keys @@ -674,27 +674,29 @@ class FlaxPreTrainedModel(PushToHubMixin, FlaxGenerationMixin): if len(unexpected_keys) > 0: logger.warning( - f"Some weights of the model checkpoint at {pretrained_model_name_or_path} were not used when " - f"initializing {model.__class__.__name__}: {unexpected_keys}\n" - f"- This IS expected if you are initializing {model.__class__.__name__} from the checkpoint of a model trained on another task " - f"or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n" - f"- This IS NOT expected if you are initializing {model.__class__.__name__} from the checkpoint of a model that you expect " - f"to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model)." + f"Some weights of the model checkpoint at {pretrained_model_name_or_path} were not used when" + f" initializing {model.__class__.__name__}: {unexpected_keys}\n- This IS expected if you are" + f" initializing {model.__class__.__name__} from the checkpoint of a model trained on another task or" + " with another architecture (e.g. initializing a BertForSequenceClassification model from a" + " BertForPreTraining model).\n- This IS NOT expected if you are initializing" + f" {model.__class__.__name__} from the checkpoint of a model that you expect to be exactly identical" + " (initializing a BertForSequenceClassification model from a BertForSequenceClassification model)." ) else: logger.info(f"All model checkpoint weights were used when initializing {model.__class__.__name__}.\n") if len(missing_keys) > 0: logger.warning( - f"Some weights of {model.__class__.__name__} were not initialized from the model checkpoint at {pretrained_model_name_or_path} " - f"and are newly initialized: {missing_keys}\n" - f"You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference." + f"Some weights of {model.__class__.__name__} were not initialized from the model checkpoint at" + f" {pretrained_model_name_or_path} and are newly initialized: {missing_keys}\nYou should probably" + " TRAIN this model on a down-stream task to be able to use it for predictions and inference." ) elif len(mismatched_keys) == 0: logger.info( - f"All the weights of {model.__class__.__name__} were initialized from the model checkpoint at {pretrained_model_name_or_path}.\n" - f"If your task is similar to the task the model of the checkpoint was trained on, " - f"you can already use {model.__class__.__name__} for predictions without further training." + f"All the weights of {model.__class__.__name__} were initialized from the model checkpoint at" + f" {pretrained_model_name_or_path}.\nIf your task is similar to the task the model of the checkpoint" + f" was trained on, you can already use {model.__class__.__name__} for predictions without further" + " training." ) if len(mismatched_keys) > 0: mismatched_warning = "\n".join( @@ -704,9 +706,10 @@ class FlaxPreTrainedModel(PushToHubMixin, FlaxGenerationMixin): ] ) logger.warning( - f"Some weights of {model.__class__.__name__} were not initialized from the model checkpoint at {pretrained_model_name_or_path} " - f"and are newly initialized because the shapes did not match:\n{mismatched_warning}\n" - f"You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference." + f"Some weights of {model.__class__.__name__} were not initialized from the model checkpoint at" + f" {pretrained_model_name_or_path} and are newly initialized because the shapes did not" + f" match:\n{mismatched_warning}\nYou should probably TRAIN this model on a down-stream task to be able" + " to use it for predictions and inference." ) # dictionary of key: dtypes for the model params diff --git a/src/transformers/modeling_tf_pytorch_utils.py b/src/transformers/modeling_tf_pytorch_utils.py index d633c6f5c5..59846a8925 100644 --- a/src/transformers/modeling_tf_pytorch_utils.py +++ b/src/transformers/modeling_tf_pytorch_utils.py @@ -254,25 +254,26 @@ def load_pytorch_weights_in_tf2_model(tf_model, pt_state_dict, tf_inputs=None, a if len(unexpected_keys) > 0: logger.warning( - f"Some weights of the PyTorch model were not used when " - f"initializing the TF 2.0 model {tf_model.__class__.__name__}: {unexpected_keys}\n" - f"- This IS expected if you are initializing {tf_model.__class__.__name__} from a PyTorch model trained on another task " - f"or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).\n" - f"- This IS NOT expected if you are initializing {tf_model.__class__.__name__} from a PyTorch model that you expect " - f"to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model)." + "Some weights of the PyTorch model were not used when initializing the TF 2.0 model" + f" {tf_model.__class__.__name__}: {unexpected_keys}\n- This IS expected if you are initializing" + f" {tf_model.__class__.__name__} from a PyTorch model trained on another task or with another architecture" + " (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).\n- This IS" + f" NOT expected if you are initializing {tf_model.__class__.__name__} from a PyTorch model that you expect" + " to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a" + " BertForSequenceClassification model)." ) else: logger.warning(f"All PyTorch model weights were used when initializing {tf_model.__class__.__name__}.\n") if len(missing_keys) > 0: logger.warning( - f"Some weights or buffers of the TF 2.0 model {tf_model.__class__.__name__} were not initialized from the PyTorch model " - f"and are newly initialized: {missing_keys}\n" - f"You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference." + f"Some weights or buffers of the TF 2.0 model {tf_model.__class__.__name__} were not initialized from the" + f" PyTorch model and are newly initialized: {missing_keys}\nYou should probably TRAIN this model on a" + " down-stream task to be able to use it for predictions and inference." ) else: logger.warning( f"All the weights of {tf_model.__class__.__name__} were initialized from the PyTorch model.\n" - f"If your task is similar to the task the model of the checkpoint was trained on, " + "If your task is similar to the task the model of the checkpoint was trained on, " f"you can already use {tf_model.__class__.__name__} for predictions without further training." ) @@ -430,25 +431,26 @@ def load_tf2_weights_in_pytorch_model(pt_model, tf_weights, allow_missing_keys=F if len(unexpected_keys) > 0: logger.warning( - f"Some weights of the TF 2.0 model were not used when " - f"initializing the PyTorch model {pt_model.__class__.__name__}: {unexpected_keys}\n" - f"- This IS expected if you are initializing {pt_model.__class__.__name__} from a TF 2.0 model trained on another task " - f"or with another architecture (e.g. initializing a BertForSequenceClassification model from a TFBertForPreTraining model).\n" - f"- This IS NOT expected if you are initializing {pt_model.__class__.__name__} from a TF 2.0 model that you expect " - f"to be exactly identical (e.g. initializing a BertForSequenceClassification model from a TFBertForSequenceClassification model)." + "Some weights of the TF 2.0 model were not used when initializing the PyTorch model" + f" {pt_model.__class__.__name__}: {unexpected_keys}\n- This IS expected if you are initializing" + f" {pt_model.__class__.__name__} from a TF 2.0 model trained on another task or with another architecture" + " (e.g. initializing a BertForSequenceClassification model from a TFBertForPreTraining model).\n- This IS" + f" NOT expected if you are initializing {pt_model.__class__.__name__} from a TF 2.0 model that you expect" + " to be exactly identical (e.g. initializing a BertForSequenceClassification model from a" + " TFBertForSequenceClassification model)." ) else: logger.warning(f"All TF 2.0 model weights were used when initializing {pt_model.__class__.__name__}.\n") if len(missing_keys) > 0: logger.warning( - f"Some weights of {pt_model.__class__.__name__} were not initialized from the TF 2.0 model " - f"and are newly initialized: {missing_keys}\n" - f"You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference." + f"Some weights of {pt_model.__class__.__name__} were not initialized from the TF 2.0 model and are newly" + f" initialized: {missing_keys}\nYou should probably TRAIN this model on a down-stream task to be able to" + " use it for predictions and inference." ) else: logger.warning( f"All the weights of {pt_model.__class__.__name__} were initialized from the TF 2.0 model.\n" - f"If your task is similar to the task the model of the checkpoint was trained on, " + "If your task is similar to the task the model of the checkpoint was trained on, " f"you can already use {pt_model.__class__.__name__} for predictions without further training." ) diff --git a/src/transformers/modeling_tf_utils.py b/src/transformers/modeling_tf_utils.py index dacacbb28a..aad1b3483a 100644 --- a/src/transformers/modeling_tf_utils.py +++ b/src/transformers/modeling_tf_utils.py @@ -424,14 +424,16 @@ def input_processing(func, config, input_ids, **kwargs): if "decoder_cached_states" in kwargs["kwargs_call"]: warnings.warn( - "The `decoder_cached_states` argument is deprecated and will be removed in a future version, use `past_key_values` instead.", + "The `decoder_cached_states` argument is deprecated and will be removed in a future version, use" + " `past_key_values` instead.", FutureWarning, ) output["past_key_values"] = kwargs["kwargs_call"].pop("decoder_cached_states") if "past" in kwargs["kwargs_call"] and "past_key_values" in parameter_names: warnings.warn( - "The `past` argument is deprecated and will be removed in a future version, use `past_key_values` instead.", + "The `past` argument is deprecated and will be removed in a future version, use `past_key_values`" + " instead.", FutureWarning, ) kwargs["past_key_values"] = kwargs["kwargs_call"].pop("past") @@ -443,7 +445,8 @@ def input_processing(func, config, input_ids, **kwargs): else: if len(kwargs["kwargs_call"]) > 0: raise ValueError( - f"The following keyword arguments are not supported by this model: {list(kwargs['kwargs_call'].keys())}." + "The following keyword arguments are not supported by this model:" + f" {list(kwargs['kwargs_call'].keys())}." ) kwargs.pop("kwargs_call") @@ -469,12 +472,14 @@ def input_processing(func, config, input_ids, **kwargs): output[parameter_names[i]] = input else: raise ValueError( - f"Data of type {type(input)} is not allowed only {allowed_types} is accepted for {parameter_names[i]}." + f"Data of type {type(input)} is not allowed only {allowed_types} is accepted for" + f" {parameter_names[i]}." ) elif isinstance(input_ids, Mapping): if "inputs" in input_ids: warnings.warn( - "The `inputs` argument is deprecated and will be removed in a future version, use `input_ids` instead.", + "The `inputs` argument is deprecated and will be removed in a future version, use `input_ids`" + " instead.", FutureWarning, ) @@ -482,7 +487,8 @@ def input_processing(func, config, input_ids, **kwargs): if "decoder_cached_states" in input_ids: warnings.warn( - "The `decoder_cached_states` argument is deprecated and will be removed in a future version, use `past_key_values` instead.", + "The `decoder_cached_states` argument is deprecated and will be removed in a future version, use" + " `past_key_values` instead.", FutureWarning, ) output["past_key_values"] = input_ids.pop("decoder_cached_states") @@ -502,7 +508,8 @@ def input_processing(func, config, input_ids, **kwargs): output[parameter_names[0]] = input_ids else: raise ValueError( - f"Data of type {type(input_ids)} is not allowed only {allowed_types} is accepted for {parameter_names[0]}." + f"Data of type {type(input_ids)} is not allowed only {allowed_types} is accepted for" + f" {parameter_names[0]}." ) # Populates any unspecified argument with their default value, according to the signature. @@ -1758,11 +1765,11 @@ class TFPreTrainedModel(tf.keras.Model, TFModelUtilsMixin, TFGenerationMixin, Pu ) except ValueError: raise EnvironmentError( - f"We couldn't connect to '{HUGGINGFACE_CO_RESOLVE_ENDPOINT}' to load this model, couldn't find it in the cached " - f"files and it looks like {pretrained_model_name_or_path} is not the path to a directory " - f"containing a file named {TF2_WEIGHTS_NAME} or {WEIGHTS_NAME}.\n" - "Checkout your internet connection or see how to run the library in offline mode at " - "'https://huggingface.co/docs/transformers/installation#offline-mode'." + f"We couldn't connect to '{HUGGINGFACE_CO_RESOLVE_ENDPOINT}' to load this model, couldn't find it" + f" in the cached files and it looks like {pretrained_model_name_or_path} is not the path to a" + f" directory containing a file named {TF2_WEIGHTS_NAME} or {WEIGHTS_NAME}.\nCheckout your internet" + " connection or see how to run the library in offline mode at" + " 'https://huggingface.co/docs/transformers/installation#offline-mode'." ) except EnvironmentError: raise EnvironmentError( @@ -1841,27 +1848,29 @@ class TFPreTrainedModel(tf.keras.Model, TFModelUtilsMixin, TFGenerationMixin, Pu if len(unexpected_keys) > 0: logger.warning( - f"Some layers from the model checkpoint at {pretrained_model_name_or_path} were not used when " - f"initializing {model.__class__.__name__}: {unexpected_keys}\n" - f"- This IS expected if you are initializing {model.__class__.__name__} from the checkpoint of a model trained on another task " - f"or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n" - f"- This IS NOT expected if you are initializing {model.__class__.__name__} from the checkpoint of a model that you expect " - f"to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model)." + f"Some layers from the model checkpoint at {pretrained_model_name_or_path} were not used when" + f" initializing {model.__class__.__name__}: {unexpected_keys}\n- This IS expected if you are" + f" initializing {model.__class__.__name__} from the checkpoint of a model trained on another task or" + " with another architecture (e.g. initializing a BertForSequenceClassification model from a" + " BertForPreTraining model).\n- This IS NOT expected if you are initializing" + f" {model.__class__.__name__} from the checkpoint of a model that you expect to be exactly identical" + " (initializing a BertForSequenceClassification model from a BertForSequenceClassification model)." ) else: logger.warning(f"All model checkpoint layers were used when initializing {model.__class__.__name__}.\n") if len(missing_keys) > 0: logger.warning( - f"Some layers of {model.__class__.__name__} were not initialized from the model checkpoint at {pretrained_model_name_or_path} " - f"and are newly initialized: {missing_keys}\n" - f"You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference." + f"Some layers of {model.__class__.__name__} were not initialized from the model checkpoint at" + f" {pretrained_model_name_or_path} and are newly initialized: {missing_keys}\nYou should probably" + " TRAIN this model on a down-stream task to be able to use it for predictions and inference." ) elif len(mismatched_keys) == 0: logger.warning( - f"All the layers of {model.__class__.__name__} were initialized from the model checkpoint at {pretrained_model_name_or_path}.\n" - f"If your task is similar to the task the model of the checkpoint was trained on, " - f"you can already use {model.__class__.__name__} for predictions without further training." + f"All the layers of {model.__class__.__name__} were initialized from the model checkpoint at" + f" {pretrained_model_name_or_path}.\nIf your task is similar to the task the model of the checkpoint" + f" was trained on, you can already use {model.__class__.__name__} for predictions without further" + " training." ) if len(mismatched_keys) > 0: mismatched_warning = "\n".join( @@ -1871,9 +1880,10 @@ class TFPreTrainedModel(tf.keras.Model, TFModelUtilsMixin, TFGenerationMixin, Pu ] ) logger.warning( - f"Some weights of {model.__class__.__name__} were not initialized from the model checkpoint at {pretrained_model_name_or_path} " - f"and are newly initialized because the shapes did not match:\n{mismatched_warning}\n" - f"You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference." + f"Some weights of {model.__class__.__name__} were not initialized from the model checkpoint at" + f" {pretrained_model_name_or_path} and are newly initialized because the shapes did not" + f" match:\n{mismatched_warning}\nYou should probably TRAIN this model on a down-stream task to be able" + " to use it for predictions and inference." ) if output_loading_info: diff --git a/src/transformers/modeling_utils.py b/src/transformers/modeling_utils.py index 81db2ff4a2..8a964db24b 100644 --- a/src/transformers/modeling_utils.py +++ b/src/transformers/modeling_utils.py @@ -318,8 +318,8 @@ def get_checkpoint_shard_files( ) except HTTPError: raise EnvironmentError( - f"We couldn't connect to '{HUGGINGFACE_CO_RESOLVE_ENDPOINT}' to load {shard_filename}. You should try again " - "after checking your internet connection." + f"We couldn't connect to '{HUGGINGFACE_CO_RESOLVE_ENDPOINT}' to load {shard_filename}. You should try" + " again after checking your internet connection." ) cached_filenames.append(cached_filename) @@ -560,7 +560,7 @@ class ModuleUtilsMixin: def _hook_rss_memory_pre_forward(module, *args, **kwargs): try: import psutil - except (ImportError): + except ImportError: raise ImportError("You need to install psutil (pip install psutil) to use memory tracing.") process = psutil.Process(os.getpid()) @@ -572,7 +572,7 @@ class ModuleUtilsMixin: def _hook_rss_memory_post_forward(module, *args, **kwargs): try: import psutil - except (ImportError): + except ImportError: raise ImportError("You need to install psutil (pip install psutil) to use memory tracing.") process = psutil.Process(os.getpid()) @@ -1060,7 +1060,8 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix uninitialized_encoder_weights: List[str] = [] if decoder.__class__ != encoder.__class__: logger.info( - f"{decoder.__class__} and {encoder.__class__} are not equal. In this case make sure that all encoder weights are correctly initialized." + f"{decoder.__class__} and {encoder.__class__} are not equal. In this case make sure that all encoder" + " weights are correctly initialized." ) def tie_encoder_to_decoder_recursively( @@ -1106,7 +1107,8 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix continue elif depth > 500: raise ValueError( - "Max depth of recursive function `tie_encoder_to_decoder` reached. It seems that there is a circular dependency between two or more `nn.Modules` of your model." + "Max depth of recursive function `tie_encoder_to_decoder` reached. It seems that there is" + " a circular dependency between two or more `nn.Modules` of your model." ) else: decoder_name = encoder_name = name @@ -1226,8 +1228,9 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix if not isinstance(old_embeddings, nn.Embedding): raise TypeError( - f"Old embeddings are of type {type(old_embeddings)}, which is not an instance of {nn.Embedding}. " - f"You should either use a different resize function or make sure that `old_embeddings` are an instance of {nn.Embedding}." + f"Old embeddings are of type {type(old_embeddings)}, which is not an instance of {nn.Embedding}. You" + " should either use a different resize function or make sure that `old_embeddings` are an instance of" + f" {nn.Embedding}." ) # Build new embeddings @@ -1295,8 +1298,9 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix if not isinstance(old_lm_head, nn.Linear): raise TypeError( - f"Old language model head is of type {type(old_lm_head)}, which is not an instance of {nn.Linear}. " - f"You should either use a different resize function or make sure that `old_lm_head` are an instance of {nn.Linear}." + f"Old language model head is of type {type(old_lm_head)}, which is not an instance of {nn.Linear}. You" + " should either use a different resize function or make sure that `old_lm_head` are an instance of" + f" {nn.Linear}." ) # Build new lm head @@ -1905,20 +1909,20 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix } if has_file(pretrained_model_name_or_path, TF2_WEIGHTS_NAME, **has_file_kwargs): raise EnvironmentError( - f"{pretrained_model_name_or_path} does not appear to have a file named {WEIGHTS_NAME} but " - "there is a file for TensorFlow weights. Use `from_tf=True` to load this model from those " - "weights." + f"{pretrained_model_name_or_path} does not appear to have a file named" + f" {WEIGHTS_NAME} but there is a file for TensorFlow weights. Use `from_tf=True` to" + " load this model from those weights." ) elif has_file(pretrained_model_name_or_path, FLAX_WEIGHTS_NAME, **has_file_kwargs): raise EnvironmentError( - f"{pretrained_model_name_or_path} does not appear to have a file named {WEIGHTS_NAME} but " - "there is a file for Flax weights. Use `from_flax=True` to load this model from those " - "weights." + f"{pretrained_model_name_or_path} does not appear to have a file named" + f" {WEIGHTS_NAME} but there is a file for Flax weights. Use `from_flax=True` to load" + " this model from those weights." ) else: raise EnvironmentError( - f"{pretrained_model_name_or_path} does not appear to have a file named {WEIGHTS_NAME}, " - f"{TF2_WEIGHTS_NAME}, {TF_WEIGHTS_NAME} or {FLAX_WEIGHTS_NAME}." + f"{pretrained_model_name_or_path} does not appear to have a file named {WEIGHTS_NAME}," + f" {TF2_WEIGHTS_NAME}, {TF_WEIGHTS_NAME} or {FLAX_WEIGHTS_NAME}." ) else: raise EnvironmentError( @@ -1931,12 +1935,11 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix ) except ValueError: raise EnvironmentError( - f"We couldn't connect to '{HUGGINGFACE_CO_RESOLVE_ENDPOINT}' to load this model, couldn't find it in the cached " - f"files and it looks like {pretrained_model_name_or_path} is not the path to a directory " - f"containing a file named {WEIGHTS_NAME}, {TF2_WEIGHTS_NAME}, {TF_WEIGHTS_NAME} or " - f"{FLAX_WEIGHTS_NAME}.\n" - "Checkout your internet connection or see how to run the library in offline mode at " - "'https://huggingface.co/docs/transformers/installation#offline-mode'." + f"We couldn't connect to '{HUGGINGFACE_CO_RESOLVE_ENDPOINT}' to load this model, couldn't find it" + f" in the cached files and it looks like {pretrained_model_name_or_path} is not the path to a" + f" directory containing a file named {WEIGHTS_NAME}, {TF2_WEIGHTS_NAME}, {TF_WEIGHTS_NAME} or" + f" {FLAX_WEIGHTS_NAME}.\nCheckout your internet connection or see how to run the library in" + " offline mode at 'https://huggingface.co/docs/transformers/installation#offline-mode'." ) except EnvironmentError: raise EnvironmentError( @@ -2035,8 +2038,9 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix model = load_tf2_checkpoint_in_pytorch_model(model, resolved_archive_file, allow_missing_keys=True) except ImportError: logger.error( - "Loading a TensorFlow model in PyTorch, requires both PyTorch and TensorFlow to be installed. Please see " - "https://pytorch.org/ and https://www.tensorflow.org/install/ for installation instructions." + "Loading a TensorFlow model in PyTorch, requires both PyTorch and TensorFlow to be installed." + " Please see https://pytorch.org/ and https://www.tensorflow.org/install/ for installation" + " instructions." ) raise elif from_flax: @@ -2046,8 +2050,9 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix model = load_flax_checkpoint_in_pytorch_model(model, resolved_archive_file) except ImportError: logger.error( - "Loading a Flax model in PyTorch, requires both PyTorch and Flax to be installed. Please see " - "https://pytorch.org/ and https://flax.readthedocs.io/en/latest/installation.html for installation instructions." + "Loading a Flax model in PyTorch, requires both PyTorch and Flax to be installed. Please see" + " https://pytorch.org/ and https://flax.readthedocs.io/en/latest/installation.html for" + " installation instructions." ) raise elif from_pt: @@ -2252,26 +2257,28 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix if len(unexpected_keys) > 0: logger.warning( - f"Some weights of the model checkpoint at {pretrained_model_name_or_path} were not used when " - f"initializing {model.__class__.__name__}: {unexpected_keys}\n" - f"- This IS expected if you are initializing {model.__class__.__name__} from the checkpoint of a model trained on another task " - f"or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n" - f"- This IS NOT expected if you are initializing {model.__class__.__name__} from the checkpoint of a model that you expect " - f"to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model)." + f"Some weights of the model checkpoint at {pretrained_model_name_or_path} were not used when" + f" initializing {model.__class__.__name__}: {unexpected_keys}\n- This IS expected if you are" + f" initializing {model.__class__.__name__} from the checkpoint of a model trained on another task or" + " with another architecture (e.g. initializing a BertForSequenceClassification model from a" + " BertForPreTraining model).\n- This IS NOT expected if you are initializing" + f" {model.__class__.__name__} from the checkpoint of a model that you expect to be exactly identical" + " (initializing a BertForSequenceClassification model from a BertForSequenceClassification model)." ) else: logger.info(f"All model checkpoint weights were used when initializing {model.__class__.__name__}.\n") if len(missing_keys) > 0: logger.warning( - f"Some weights of {model.__class__.__name__} were not initialized from the model checkpoint at {pretrained_model_name_or_path} " - f"and are newly initialized: {missing_keys}\n" - f"You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference." + f"Some weights of {model.__class__.__name__} were not initialized from the model checkpoint at" + f" {pretrained_model_name_or_path} and are newly initialized: {missing_keys}\nYou should probably" + " TRAIN this model on a down-stream task to be able to use it for predictions and inference." ) elif len(mismatched_keys) == 0: logger.info( - f"All the weights of {model.__class__.__name__} were initialized from the model checkpoint at {pretrained_model_name_or_path}.\n" - f"If your task is similar to the task the model of the checkpoint was trained on, " - f"you can already use {model.__class__.__name__} for predictions without further training." + f"All the weights of {model.__class__.__name__} were initialized from the model checkpoint at" + f" {pretrained_model_name_or_path}.\nIf your task is similar to the task the model of the checkpoint" + f" was trained on, you can already use {model.__class__.__name__} for predictions without further" + " training." ) if len(mismatched_keys) > 0: mismatched_warning = "\n".join( @@ -2281,9 +2288,10 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix ] ) logger.warning( - f"Some weights of {model.__class__.__name__} were not initialized from the model checkpoint at {pretrained_model_name_or_path} " - f"and are newly initialized because the shapes did not match:\n{mismatched_warning}\n" - f"You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference." + f"Some weights of {model.__class__.__name__} were not initialized from the model checkpoint at" + f" {pretrained_model_name_or_path} and are newly initialized because the shapes did not" + f" match:\n{mismatched_warning}\nYou should probably TRAIN this model on a down-stream task to be able" + " to use it for predictions and inference." ) return model, missing_keys, unexpected_keys, mismatched_keys, error_msgs diff --git a/src/transformers/models/albert/convert_albert_original_tf_checkpoint_to_pytorch.py b/src/transformers/models/albert/convert_albert_original_tf_checkpoint_to_pytorch.py index ebfc81eb28..8823a86fc8 100644 --- a/src/transformers/models/albert/convert_albert_original_tf_checkpoint_to_pytorch.py +++ b/src/transformers/models/albert/convert_albert_original_tf_checkpoint_to_pytorch.py @@ -51,8 +51,10 @@ if __name__ == "__main__": default=None, type=str, required=True, - help="The config json file corresponding to the pre-trained ALBERT model. \n" - "This specifies the model architecture.", + help=( + "The config json file corresponding to the pre-trained ALBERT model. \n" + "This specifies the model architecture." + ), ) parser.add_argument( "--pytorch_dump_path", default=None, type=str, required=True, help="Path to the output PyTorch model." diff --git a/src/transformers/models/albert/modeling_albert.py b/src/transformers/models/albert/modeling_albert.py index 514572be71..cc6871d936 100755 --- a/src/transformers/models/albert/modeling_albert.py +++ b/src/transformers/models/albert/modeling_albert.py @@ -1154,8 +1154,10 @@ class AlbertForTokenClassification(AlbertPreTrainedModel): checkpoint="vumichien/tiny-albert", output_type=TokenClassifierOutput, config_class=_CONFIG_FOR_DOC, - expected_output="['LABEL_1', 'LABEL_1', 'LABEL_1', 'LABEL_0', 'LABEL_1', 'LABEL_0', 'LABEL_1', 'LABEL_1', " - "'LABEL_0', 'LABEL_1', 'LABEL_0', 'LABEL_0', 'LABEL_1', 'LABEL_1']", + expected_output=( + "['LABEL_1', 'LABEL_1', 'LABEL_1', 'LABEL_0', 'LABEL_1', 'LABEL_0', 'LABEL_1', 'LABEL_1', " + "'LABEL_0', 'LABEL_1', 'LABEL_0', 'LABEL_0', 'LABEL_1', 'LABEL_1']" + ), expected_loss=0.66, ) def forward( diff --git a/src/transformers/models/albert/modeling_flax_albert.py b/src/transformers/models/albert/modeling_flax_albert.py index 264735dbd2..84b86fa563 100644 --- a/src/transformers/models/albert/modeling_flax_albert.py +++ b/src/transformers/models/albert/modeling_flax_albert.py @@ -198,8 +198,8 @@ class FlaxAlbertSelfAttention(nn.Module): def setup(self): if self.config.hidden_size % self.config.num_attention_heads != 0: raise ValueError( - "`config.hidden_size`: {self.config.hidden_size} has to be a multiple of `config.num_attention_heads`\ - : {self.config.num_attention_heads}" + "`config.hidden_size`: {self.config.hidden_size} has to be a multiple of `config.num_attention_heads` " + " : {self.config.num_attention_heads}" ) self.query = nn.Dense( diff --git a/src/transformers/models/albert/modeling_tf_albert.py b/src/transformers/models/albert/modeling_tf_albert.py index 753152f7a8..692d5fd8d8 100644 --- a/src/transformers/models/albert/modeling_tf_albert.py +++ b/src/transformers/models/albert/modeling_tf_albert.py @@ -1165,8 +1165,10 @@ class TFAlbertForTokenClassification(TFAlbertPreTrainedModel, TFTokenClassificat checkpoint="vumichien/tiny-albert", output_type=TFTokenClassifierOutput, config_class=_CONFIG_FOR_DOC, - expected_output="['LABEL_1', 'LABEL_1', 'LABEL_1', 'LABEL_0', 'LABEL_1', 'LABEL_0', 'LABEL_1', 'LABEL_1', " - "'LABEL_0', 'LABEL_1', 'LABEL_0', 'LABEL_0', 'LABEL_1', 'LABEL_1']", + expected_output=( + "['LABEL_1', 'LABEL_1', 'LABEL_1', 'LABEL_0', 'LABEL_1', 'LABEL_0', 'LABEL_1', 'LABEL_1', " + "'LABEL_0', 'LABEL_1', 'LABEL_0', 'LABEL_0', 'LABEL_1', 'LABEL_1']" + ), expected_loss=0.66, ) def call( diff --git a/src/transformers/models/auto/configuration_auto.py b/src/transformers/models/auto/configuration_auto.py index 0f930694ca..dd2a4b491a 100644 --- a/src/transformers/models/auto/configuration_auto.py +++ b/src/transformers/models/auto/configuration_auto.py @@ -537,7 +537,8 @@ def _list_model_options(indent, config_to_class=None, use_model_types=True): config: MODEL_NAMES_MAPPING[model_type] for model_type, config in CONFIG_MAPPING_NAMES.items() } lines = [ - f"{indent}- [`{config_name}`] configuration class: {config_to_name[config_name]} ({config_to_model_name[config_name]} model)" + f"{indent}- [`{config_name}`] configuration class:" + f" {config_to_name[config_name]} ({config_to_model_name[config_name]} model)" for config_name in sorted(config_to_name.keys()) ] return "\n".join(lines) @@ -558,7 +559,8 @@ def replace_list_option_in_docstrings(config_to_class=None, use_model_types=True docstrings = "\n".join(lines) else: raise ValueError( - f"The function {fn} should have an empty 'List options' in its docstring as placeholder, current docstring is:\n{docstrings}" + f"The function {fn} should have an empty 'List options' in its docstring as placeholder, current" + f" docstring is:\n{docstrings}" ) fn.__doc__ = docstrings return fn @@ -681,9 +683,9 @@ class AutoConfig: if "auto_map" in config_dict and "AutoConfig" in config_dict["auto_map"]: if not trust_remote_code: raise ValueError( - f"Loading {pretrained_model_name_or_path} requires you to execute the configuration file in that repo " - "on your local machine. Make sure you have read the code there to avoid malicious use, then set " - "the option `trust_remote_code=True` to remove this error." + f"Loading {pretrained_model_name_or_path} requires you to execute the configuration file in that" + " repo on your local machine. Make sure you have read the code there to avoid malicious use, then" + " set the option `trust_remote_code=True` to remove this error." ) if kwargs.get("revision", None) is None: logger.warning( diff --git a/src/transformers/models/auto/modeling_tf_auto.py b/src/transformers/models/auto/modeling_tf_auto.py index e2c1f30174..456d1426dc 100644 --- a/src/transformers/models/auto/modeling_tf_auto.py +++ b/src/transformers/models/auto/modeling_tf_auto.py @@ -506,9 +506,9 @@ class TFAutoModelWithLMHead(_TFAutoModelWithLMHead): @classmethod def from_config(cls, config): warnings.warn( - "The class `TFAutoModelWithLMHead` is deprecated and will be removed in a future version. Please use " - "`TFAutoModelForCausalLM` for causal language models, `TFAutoModelForMaskedLM` for masked language models and " - "`TFAutoModelForSeq2SeqLM` for encoder-decoder models.", + "The class `TFAutoModelWithLMHead` is deprecated and will be removed in a future version. Please use" + " `TFAutoModelForCausalLM` for causal language models, `TFAutoModelForMaskedLM` for masked language models" + " and `TFAutoModelForSeq2SeqLM` for encoder-decoder models.", FutureWarning, ) return super().from_config(config) @@ -516,9 +516,9 @@ class TFAutoModelWithLMHead(_TFAutoModelWithLMHead): @classmethod def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs): warnings.warn( - "The class `TFAutoModelWithLMHead` is deprecated and will be removed in a future version. Please use " - "`TFAutoModelForCausalLM` for causal language models, `TFAutoModelForMaskedLM` for masked language models and " - "`TFAutoModelForSeq2SeqLM` for encoder-decoder models.", + "The class `TFAutoModelWithLMHead` is deprecated and will be removed in a future version. Please use" + " `TFAutoModelForCausalLM` for causal language models, `TFAutoModelForMaskedLM` for masked language models" + " and `TFAutoModelForSeq2SeqLM` for encoder-decoder models.", FutureWarning, ) return super().from_pretrained(pretrained_model_name_or_path, *model_args, **kwargs) diff --git a/src/transformers/models/auto/tokenization_auto.py b/src/transformers/models/auto/tokenization_auto.py index 691eb71c1d..4ec7d96ebc 100644 --- a/src/transformers/models/auto/tokenization_auto.py +++ b/src/transformers/models/auto/tokenization_auto.py @@ -521,14 +521,14 @@ class AutoTokenizer: if tokenizer_auto_map is not None: if not trust_remote_code: raise ValueError( - f"Loading {pretrained_model_name_or_path} requires you to execute the tokenizer file in that repo " - "on your local machine. Make sure you have read the code there to avoid malicious use, then set " - "the option `trust_remote_code=True` to remove this error." + f"Loading {pretrained_model_name_or_path} requires you to execute the tokenizer file in that" + " repo on your local machine. Make sure you have read the code there to avoid malicious use," + " then set the option `trust_remote_code=True` to remove this error." ) if kwargs.get("revision", None) is None: logger.warning( - "Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure " - "no malicious code has been contributed in a newer revision." + "Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure" + " no malicious code has been contributed in a newer revision." ) if use_fast and tokenizer_auto_map[1] is not None: diff --git a/src/transformers/models/bart/modeling_bart.py b/src/transformers/models/bart/modeling_bart.py index fa9b659bee..7ebb143e22 100755 --- a/src/transformers/models/bart/modeling_bart.py +++ b/src/transformers/models/bart/modeling_bart.py @@ -229,7 +229,8 @@ class BartAttention(nn.Module): if attn_weights.size() != (bsz * self.num_heads, tgt_len, src_len): raise ValueError( - f"Attention weights should be of size {(bsz * self.num_heads, tgt_len, src_len)}, but is {attn_weights.size()}" + f"Attention weights should be of size {(bsz * self.num_heads, tgt_len, src_len)}, but is" + f" {attn_weights.size()}" ) if attention_mask is not None: @@ -245,7 +246,8 @@ class BartAttention(nn.Module): if layer_head_mask is not None: if layer_head_mask.size() != (self.num_heads,): raise ValueError( - f"Head mask for a single layer should be of size {(self.num_heads,)}, but is {layer_head_mask.size()}" + f"Head mask for a single layer should be of size {(self.num_heads,)}, but is" + f" {layer_head_mask.size()}" ) attn_weights = layer_head_mask.view(1, -1, 1, 1) * attn_weights.view(bsz, self.num_heads, tgt_len, src_len) attn_weights = attn_weights.view(bsz * self.num_heads, tgt_len, src_len) @@ -266,7 +268,8 @@ class BartAttention(nn.Module): if attn_output.size() != (bsz * self.num_heads, tgt_len, self.head_dim): raise ValueError( - f"`attn_output` should be of size {(bsz, self.num_heads, tgt_len, self.head_dim)}, but is {attn_output.size()}" + f"`attn_output` should be of size {(bsz, self.num_heads, tgt_len, self.head_dim)}, but is" + f" {attn_output.size()}" ) attn_output = attn_output.view(bsz, self.num_heads, tgt_len, self.head_dim) @@ -813,7 +816,8 @@ class BartEncoder(BartPretrainedModel): if head_mask is not None: if head_mask.size()[0] != (len(self.layers)): raise ValueError( - f"The head_mask should be specified for {len(self.layers)} layers, but it is for {head_mask.size()[0]}." + f"The head_mask should be specified for {len(self.layers)} layers, but it is for" + f" {head_mask.size()[0]}." ) for idx, encoder_layer in enumerate(self.layers): @@ -1050,7 +1054,8 @@ class BartDecoder(BartPretrainedModel): if attn_mask is not None: if attn_mask.size()[0] != (len(self.layers)): raise ValueError( - f"The `{mask_name}` should be specified for {len(self.layers)} layers, but it is for {head_mask.size()[0]}." + f"The `{mask_name}` should be specified for {len(self.layers)} layers, but it is for" + f" {head_mask.size()[0]}." ) for idx, decoder_layer in enumerate(self.layers): diff --git a/src/transformers/models/bart/modeling_tf_bart.py b/src/transformers/models/bart/modeling_tf_bart.py index 80b94bfe80..21523e2f81 100644 --- a/src/transformers/models/bart/modeling_tf_bart.py +++ b/src/transformers/models/bart/modeling_tf_bart.py @@ -227,7 +227,10 @@ class TFBartAttention(tf.keras.layers.Layer): tf.debugging.assert_equal( shape_list(attn_weights), [bsz * self.num_heads, tgt_len, src_len], - message=f"Attention weights should be of size {(bsz * self.num_heads, tgt_len, src_len)}, but is {shape_list(attn_weights)}", + message=( + f"Attention weights should be of size {(bsz * self.num_heads, tgt_len, src_len)}, but is" + f" {shape_list(attn_weights)}" + ), ) if attention_mask is not None: @@ -237,7 +240,10 @@ class TFBartAttention(tf.keras.layers.Layer): tf.debugging.assert_equal( shape_list(attention_mask), [bsz, 1, tgt_len, src_len], - message=f"Attention mask should be of size {(bsz, 1, tgt_len, src_len)}, but is {shape_list(attention_mask)}", + message=( + f"Attention mask should be of size {(bsz, 1, tgt_len, src_len)}, but is" + f" {shape_list(attention_mask)}" + ), ) attention_mask = tf.cast(attention_mask, dtype=attn_weights.dtype) @@ -253,7 +259,10 @@ class TFBartAttention(tf.keras.layers.Layer): tf.debugging.assert_equal( shape_list(layer_head_mask), [self.num_heads], - message=f"Head mask for a single layer should be of size {(self.num_heads)}, but is {shape_list(layer_head_mask)}", + message=( + f"Head mask for a single layer should be of size {(self.num_heads)}, but is" + f" {shape_list(layer_head_mask)}" + ), ) attn_weights = tf.reshape(layer_head_mask, (1, -1, 1, 1)) * tf.reshape( @@ -270,7 +279,10 @@ class TFBartAttention(tf.keras.layers.Layer): tf.debugging.assert_equal( shape_list(attn_output), [bsz * self.num_heads, tgt_len, self.head_dim], - message=f"`attn_output` should be of size {(bsz, self.num_heads, tgt_len, self.head_dim)}, but is {shape_list(attn_output)}", + message=( + f"`attn_output` should be of size {(bsz, self.num_heads, tgt_len, self.head_dim)}, but is" + f" {shape_list(attn_output)}" + ), ) attn_output = tf.transpose( @@ -751,7 +763,10 @@ class TFBartEncoder(tf.keras.layers.Layer): tf.debugging.assert_equal( shape_list(head_mask)[0], len(self.layers), - message=f"The head_mask should be specified for {len(self.layers)} layers, but it is for {shape_list(head_mask)[0]}.", + message=( + f"The head_mask should be specified for {len(self.layers)} layers, but it is for" + f" {shape_list(head_mask)[0]}." + ), ) # encoder layers @@ -946,7 +961,10 @@ class TFBartDecoder(tf.keras.layers.Layer): tf.debugging.assert_equal( shape_list(attn_mask)[0], len(self.layers), - message=f"The {attn_mask_name} should be specified for {len(self.layers)} layers, but it is for {shape_list(attn_mask)[0]}.", + message=( + f"The {attn_mask_name} should be specified for {len(self.layers)} layers, but it is for" + f" {shape_list(attn_mask)[0]}." + ), ) for idx, decoder_layer in enumerate(self.layers): diff --git a/src/transformers/models/barthez/tokenization_barthez.py b/src/transformers/models/barthez/tokenization_barthez.py index cbe235e650..5f12adb7a3 100644 --- a/src/transformers/models/barthez/tokenization_barthez.py +++ b/src/transformers/models/barthez/tokenization_barthez.py @@ -33,7 +33,9 @@ PRETRAINED_VOCAB_FILES_MAP = { "vocab_file": { "moussaKam/mbarthez": "https://huggingface.co/moussaKam/mbarthez/resolve/main/sentencepiece.bpe.model", "moussaKam/barthez": "https://huggingface.co/moussaKam/barthez/resolve/main/sentencepiece.bpe.model", - "moussaKam/barthez-orangesum-title": "https://huggingface.co/moussaKam/barthez-orangesum-title/resolve/main/sentencepiece.bpe.model", + "moussaKam/barthez-orangesum-title": ( + "https://huggingface.co/moussaKam/barthez-orangesum-title/resolve/main/sentencepiece.bpe.model" + ), }, } diff --git a/src/transformers/models/barthez/tokenization_barthez_fast.py b/src/transformers/models/barthez/tokenization_barthez_fast.py index b8b6813c1e..a7f36e007c 100644 --- a/src/transformers/models/barthez/tokenization_barthez_fast.py +++ b/src/transformers/models/barthez/tokenization_barthez_fast.py @@ -37,12 +37,16 @@ PRETRAINED_VOCAB_FILES_MAP = { "vocab_file": { "moussaKam/mbarthez": "https://huggingface.co/moussaKam/mbarthez/resolve/main/sentencepiece.bpe.model", "moussaKam/barthez": "https://huggingface.co/moussaKam/barthez/resolve/main/sentencepiece.bpe.model", - "moussaKam/barthez-orangesum-title": "https://huggingface.co/moussaKam/barthez-orangesum-title/resolve/main/sentencepiece.bpe.model", + "moussaKam/barthez-orangesum-title": ( + "https://huggingface.co/moussaKam/barthez-orangesum-title/resolve/main/sentencepiece.bpe.model" + ), }, "tokenizer_file": { "moussaKam/mbarthez": "https://huggingface.co/moussaKam/mbarthez/resolve/main/tokenizer.json", "moussaKam/barthez": "https://huggingface.co/moussaKam/barthez/resolve/main/tokenizer.json", - "moussaKam/barthez-orangesum-title": "https://huggingface.co/moussaKam/barthez-orangesum-title/resolve/main/tokenizer.json", + "moussaKam/barthez-orangesum-title": ( + "https://huggingface.co/moussaKam/barthez-orangesum-title/resolve/main/tokenizer.json" + ), }, } diff --git a/src/transformers/models/beit/configuration_beit.py b/src/transformers/models/beit/configuration_beit.py index 820ff5df17..c745f3227d 100644 --- a/src/transformers/models/beit/configuration_beit.py +++ b/src/transformers/models/beit/configuration_beit.py @@ -26,7 +26,9 @@ from ...utils import logging logger = logging.get_logger(__name__) BEIT_PRETRAINED_CONFIG_ARCHIVE_MAP = { - "microsoft/beit-base-patch16-224-pt22k": "https://huggingface.co/microsoft/beit-base-patch16-224-pt22k/resolve/main/config.json", + "microsoft/beit-base-patch16-224-pt22k": ( + "https://huggingface.co/microsoft/beit-base-patch16-224-pt22k/resolve/main/config.json" + ), # See all BEiT models at https://huggingface.co/models?filter=beit } diff --git a/src/transformers/models/beit/feature_extraction_beit.py b/src/transformers/models/beit/feature_extraction_beit.py index fb74a7c59a..62b790621b 100644 --- a/src/transformers/models/beit/feature_extraction_beit.py +++ b/src/transformers/models/beit/feature_extraction_beit.py @@ -166,8 +166,9 @@ class BeitFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMixin): if not valid_segmentation_maps: raise ValueError( - "Segmentation maps must of type `PIL.Image.Image`, `np.ndarray` or `torch.Tensor` (single example)," - "`List[PIL.Image.Image]`, `List[np.ndarray]` or `List[torch.Tensor]` (batch of examples)." + "Segmentation maps must of type `PIL.Image.Image`, `np.ndarray` or `torch.Tensor` (single" + " example),`List[PIL.Image.Image]`, `List[np.ndarray]` or `List[torch.Tensor]` (batch of" + " examples)." ) is_batched = bool( diff --git a/src/transformers/models/bert/configuration_bert.py b/src/transformers/models/bert/configuration_bert.py index 893e6fb6d8..25c0f5b67e 100644 --- a/src/transformers/models/bert/configuration_bert.py +++ b/src/transformers/models/bert/configuration_bert.py @@ -33,19 +33,37 @@ BERT_PRETRAINED_CONFIG_ARCHIVE_MAP = { "bert-base-multilingual-cased": "https://huggingface.co/bert-base-multilingual-cased/resolve/main/config.json", "bert-base-chinese": "https://huggingface.co/bert-base-chinese/resolve/main/config.json", "bert-base-german-cased": "https://huggingface.co/bert-base-german-cased/resolve/main/config.json", - "bert-large-uncased-whole-word-masking": "https://huggingface.co/bert-large-uncased-whole-word-masking/resolve/main/config.json", - "bert-large-cased-whole-word-masking": "https://huggingface.co/bert-large-cased-whole-word-masking/resolve/main/config.json", - "bert-large-uncased-whole-word-masking-finetuned-squad": "https://huggingface.co/bert-large-uncased-whole-word-masking-finetuned-squad/resolve/main/config.json", - "bert-large-cased-whole-word-masking-finetuned-squad": "https://huggingface.co/bert-large-cased-whole-word-masking-finetuned-squad/resolve/main/config.json", + "bert-large-uncased-whole-word-masking": ( + "https://huggingface.co/bert-large-uncased-whole-word-masking/resolve/main/config.json" + ), + "bert-large-cased-whole-word-masking": ( + "https://huggingface.co/bert-large-cased-whole-word-masking/resolve/main/config.json" + ), + "bert-large-uncased-whole-word-masking-finetuned-squad": ( + "https://huggingface.co/bert-large-uncased-whole-word-masking-finetuned-squad/resolve/main/config.json" + ), + "bert-large-cased-whole-word-masking-finetuned-squad": ( + "https://huggingface.co/bert-large-cased-whole-word-masking-finetuned-squad/resolve/main/config.json" + ), "bert-base-cased-finetuned-mrpc": "https://huggingface.co/bert-base-cased-finetuned-mrpc/resolve/main/config.json", "bert-base-german-dbmdz-cased": "https://huggingface.co/bert-base-german-dbmdz-cased/resolve/main/config.json", "bert-base-german-dbmdz-uncased": "https://huggingface.co/bert-base-german-dbmdz-uncased/resolve/main/config.json", "cl-tohoku/bert-base-japanese": "https://huggingface.co/cl-tohoku/bert-base-japanese/resolve/main/config.json", - "cl-tohoku/bert-base-japanese-whole-word-masking": "https://huggingface.co/cl-tohoku/bert-base-japanese-whole-word-masking/resolve/main/config.json", - "cl-tohoku/bert-base-japanese-char": "https://huggingface.co/cl-tohoku/bert-base-japanese-char/resolve/main/config.json", - "cl-tohoku/bert-base-japanese-char-whole-word-masking": "https://huggingface.co/cl-tohoku/bert-base-japanese-char-whole-word-masking/resolve/main/config.json", - "TurkuNLP/bert-base-finnish-cased-v1": "https://huggingface.co/TurkuNLP/bert-base-finnish-cased-v1/resolve/main/config.json", - "TurkuNLP/bert-base-finnish-uncased-v1": "https://huggingface.co/TurkuNLP/bert-base-finnish-uncased-v1/resolve/main/config.json", + "cl-tohoku/bert-base-japanese-whole-word-masking": ( + "https://huggingface.co/cl-tohoku/bert-base-japanese-whole-word-masking/resolve/main/config.json" + ), + "cl-tohoku/bert-base-japanese-char": ( + "https://huggingface.co/cl-tohoku/bert-base-japanese-char/resolve/main/config.json" + ), + "cl-tohoku/bert-base-japanese-char-whole-word-masking": ( + "https://huggingface.co/cl-tohoku/bert-base-japanese-char-whole-word-masking/resolve/main/config.json" + ), + "TurkuNLP/bert-base-finnish-cased-v1": ( + "https://huggingface.co/TurkuNLP/bert-base-finnish-cased-v1/resolve/main/config.json" + ), + "TurkuNLP/bert-base-finnish-uncased-v1": ( + "https://huggingface.co/TurkuNLP/bert-base-finnish-uncased-v1/resolve/main/config.json" + ), "wietsedv/bert-base-dutch-cased": "https://huggingface.co/wietsedv/bert-base-dutch-cased/resolve/main/config.json", # See all BERT models at https://huggingface.co/models?filter=bert } diff --git a/src/transformers/models/bert/convert_bert_original_tf2_checkpoint_to_pytorch.py b/src/transformers/models/bert/convert_bert_original_tf2_checkpoint_to_pytorch.py index 8a48b616a7..40533ede43 100644 --- a/src/transformers/models/bert/convert_bert_original_tf2_checkpoint_to_pytorch.py +++ b/src/transformers/models/bert/convert_bert_original_tf2_checkpoint_to_pytorch.py @@ -79,7 +79,8 @@ def load_tf2_weights_in_bert(model, tf_checkpoint_path, config): layer_depth = list(set(layer_depth))[0] if layer_depth != 1: raise ValueError( - "The model contains more than just the embedding/encoder layers. This script does not handle MLM/NSP heads." + "The model contains more than just the embedding/encoder layers. This script does not handle MLM/NSP" + " heads." ) # convert layers @@ -201,7 +202,8 @@ def load_tf2_weights_in_bert(model, tf_checkpoint_path, config): pointer.data = torch.from_numpy(array) else: raise ValueError( - f"Shape mismatch in layer {full_name}: Model expects shape {pointer.shape} but layer contains shape: {array.shape}" + f"Shape mismatch in layer {full_name}: Model expects shape {pointer.shape} but layer contains shape:" + f" {array.shape}" ) logger.info(f"Successfully set variable {full_name} to PyTorch layer {trace}") return model diff --git a/src/transformers/models/bert/convert_bert_original_tf_checkpoint_to_pytorch.py b/src/transformers/models/bert/convert_bert_original_tf_checkpoint_to_pytorch.py index 19850bc431..09c4e3ee6c 100755 --- a/src/transformers/models/bert/convert_bert_original_tf_checkpoint_to_pytorch.py +++ b/src/transformers/models/bert/convert_bert_original_tf_checkpoint_to_pytorch.py @@ -51,8 +51,10 @@ if __name__ == "__main__": default=None, type=str, required=True, - help="The config json file corresponding to the pre-trained BERT model. \n" - "This specifies the model architecture.", + help=( + "The config json file corresponding to the pre-trained BERT model. \n" + "This specifies the model architecture." + ), ) parser.add_argument( "--pytorch_dump_path", default=None, type=str, required=True, help="Path to the output PyTorch model." diff --git a/src/transformers/models/bert/modeling_bert.py b/src/transformers/models/bert/modeling_bert.py index 9da6258e9e..c1ef87551b 100755 --- a/src/transformers/models/bert/modeling_bert.py +++ b/src/transformers/models/bert/modeling_bert.py @@ -62,7 +62,7 @@ _TOKENIZER_FOR_DOC = "BertTokenizer" # TokenClassification docstring _CHECKPOINT_FOR_TOKEN_CLASSIFICATION = "dbmdz/bert-large-cased-finetuned-conll03-english" _TOKEN_CLASS_EXPECTED_OUTPUT = ( - "['O', 'I-ORG', 'I-ORG', 'I-ORG', 'O', 'O', 'O', 'O', 'O', 'I-LOC', 'O', 'I-LOC', " "'I-LOC'] " + "['O', 'I-ORG', 'I-ORG', 'I-ORG', 'O', 'O', 'O', 'O', 'O', 'I-LOC', 'O', 'I-LOC', 'I-LOC'] " ) _TOKEN_CLASS_EXPECTED_LOSS = 0.01 @@ -510,7 +510,8 @@ class BertLayer(nn.Module): if self.is_decoder and encoder_hidden_states is not None: if not hasattr(self, "crossattention"): raise ValueError( - f"If `encoder_hidden_states` are passed, {self} has to be instantiated with cross-attention layers by setting `config.add_cross_attention=True`" + f"If `encoder_hidden_states` are passed, {self} has to be instantiated with cross-attention layers" + " by setting `config.add_cross_attention=True`" ) # cross_attn cached key/values tuple is at positions 3,4 of past_key_value tuple @@ -1458,7 +1459,8 @@ class BertForNextSentencePrediction(BertPreTrainedModel): if "next_sentence_label" in kwargs: warnings.warn( - "The `next_sentence_label` argument is deprecated and will be removed in a future version, use `labels` instead.", + "The `next_sentence_label` argument is deprecated and will be removed in a future version, use" + " `labels` instead.", FutureWarning, ) labels = kwargs.pop("next_sentence_label") diff --git a/src/transformers/models/bert/modeling_flax_bert.py b/src/transformers/models/bert/modeling_flax_bert.py index 9297348cf4..902d6cca3d 100644 --- a/src/transformers/models/bert/modeling_flax_bert.py +++ b/src/transformers/models/bert/modeling_flax_bert.py @@ -222,8 +222,8 @@ class FlaxBertSelfAttention(nn.Module): self.head_dim = self.config.hidden_size // self.config.num_attention_heads if self.config.hidden_size % self.config.num_attention_heads != 0: raise ValueError( - "`config.hidden_size`: {self.config.hidden_size} has to be a multiple of `config.num_attention_heads`\ - : {self.config.num_attention_heads}" + "`config.hidden_size`: {self.config.hidden_size} has to be a multiple of `config.num_attention_heads` " + " : {self.config.num_attention_heads}" ) self.query = nn.Dense( @@ -571,8 +571,8 @@ class FlaxBertLayerCollection(nn.Module): if head_mask is not None: if head_mask.shape[0] != (len(self.layers)): raise ValueError( - f"The head_mask should be specified for {len(self.layers)} layers, but it is for \ - {head_mask.shape[0]}." + f"The head_mask should be specified for {len(self.layers)} layers, but it is for " + f" {head_mask.shape[0]}." ) for i, layer in enumerate(self.layers): diff --git a/src/transformers/models/bert/modeling_tf_bert.py b/src/transformers/models/bert/modeling_tf_bert.py index 1b75d4dc66..ca839afc5e 100644 --- a/src/transformers/models/bert/modeling_tf_bert.py +++ b/src/transformers/models/bert/modeling_tf_bert.py @@ -72,7 +72,7 @@ _TOKENIZER_FOR_DOC = "BertTokenizer" # TokenClassification docstring _CHECKPOINT_FOR_TOKEN_CLASSIFICATION = "dbmdz/bert-large-cased-finetuned-conll03-english" _TOKEN_CLASS_EXPECTED_OUTPUT = ( - "['O', 'I-ORG', 'I-ORG', 'I-ORG', 'O', 'O', 'O', 'O', 'O', 'I-LOC', 'O', 'I-LOC', " "'I-LOC'] " + "['O', 'I-ORG', 'I-ORG', 'I-ORG', 'O', 'O', 'O', 'O', 'O', 'I-LOC', 'O', 'I-LOC', 'I-LOC'] " ) _TOKEN_CLASS_EXPECTED_LOSS = 0.01 @@ -490,8 +490,8 @@ class TFBertLayer(tf.keras.layers.Layer): if self.is_decoder and encoder_hidden_states is not None: if not hasattr(self, "crossattention"): raise ValueError( - f"If `encoder_hidden_states` are passed, {self} has to be instantiated with cross-attention layers " - "by setting `config.add_cross_attention=True`" + f"If `encoder_hidden_states` are passed, {self} has to be instantiated with cross-attention layers" + " by setting `config.add_cross_attention=True`" ) # cross_attn cached key/values tuple is at positions 3,4 of past_key_value tuple diff --git a/src/transformers/models/bert/tokenization_bert.py b/src/transformers/models/bert/tokenization_bert.py index 1737e509c9..233ef0ab0d 100644 --- a/src/transformers/models/bert/tokenization_bert.py +++ b/src/transformers/models/bert/tokenization_bert.py @@ -34,20 +34,40 @@ PRETRAINED_VOCAB_FILES_MAP = { "bert-large-uncased": "https://huggingface.co/bert-large-uncased/resolve/main/vocab.txt", "bert-base-cased": "https://huggingface.co/bert-base-cased/resolve/main/vocab.txt", "bert-large-cased": "https://huggingface.co/bert-large-cased/resolve/main/vocab.txt", - "bert-base-multilingual-uncased": "https://huggingface.co/bert-base-multilingual-uncased/resolve/main/vocab.txt", + "bert-base-multilingual-uncased": ( + "https://huggingface.co/bert-base-multilingual-uncased/resolve/main/vocab.txt" + ), "bert-base-multilingual-cased": "https://huggingface.co/bert-base-multilingual-cased/resolve/main/vocab.txt", "bert-base-chinese": "https://huggingface.co/bert-base-chinese/resolve/main/vocab.txt", "bert-base-german-cased": "https://huggingface.co/bert-base-german-cased/resolve/main/vocab.txt", - "bert-large-uncased-whole-word-masking": "https://huggingface.co/bert-large-uncased-whole-word-masking/resolve/main/vocab.txt", - "bert-large-cased-whole-word-masking": "https://huggingface.co/bert-large-cased-whole-word-masking/resolve/main/vocab.txt", - "bert-large-uncased-whole-word-masking-finetuned-squad": "https://huggingface.co/bert-large-uncased-whole-word-masking-finetuned-squad/resolve/main/vocab.txt", - "bert-large-cased-whole-word-masking-finetuned-squad": "https://huggingface.co/bert-large-cased-whole-word-masking-finetuned-squad/resolve/main/vocab.txt", - "bert-base-cased-finetuned-mrpc": "https://huggingface.co/bert-base-cased-finetuned-mrpc/resolve/main/vocab.txt", + "bert-large-uncased-whole-word-masking": ( + "https://huggingface.co/bert-large-uncased-whole-word-masking/resolve/main/vocab.txt" + ), + "bert-large-cased-whole-word-masking": ( + "https://huggingface.co/bert-large-cased-whole-word-masking/resolve/main/vocab.txt" + ), + "bert-large-uncased-whole-word-masking-finetuned-squad": ( + "https://huggingface.co/bert-large-uncased-whole-word-masking-finetuned-squad/resolve/main/vocab.txt" + ), + "bert-large-cased-whole-word-masking-finetuned-squad": ( + "https://huggingface.co/bert-large-cased-whole-word-masking-finetuned-squad/resolve/main/vocab.txt" + ), + "bert-base-cased-finetuned-mrpc": ( + "https://huggingface.co/bert-base-cased-finetuned-mrpc/resolve/main/vocab.txt" + ), "bert-base-german-dbmdz-cased": "https://huggingface.co/bert-base-german-dbmdz-cased/resolve/main/vocab.txt", - "bert-base-german-dbmdz-uncased": "https://huggingface.co/bert-base-german-dbmdz-uncased/resolve/main/vocab.txt", - "TurkuNLP/bert-base-finnish-cased-v1": "https://huggingface.co/TurkuNLP/bert-base-finnish-cased-v1/resolve/main/vocab.txt", - "TurkuNLP/bert-base-finnish-uncased-v1": "https://huggingface.co/TurkuNLP/bert-base-finnish-uncased-v1/resolve/main/vocab.txt", - "wietsedv/bert-base-dutch-cased": "https://huggingface.co/wietsedv/bert-base-dutch-cased/resolve/main/vocab.txt", + "bert-base-german-dbmdz-uncased": ( + "https://huggingface.co/bert-base-german-dbmdz-uncased/resolve/main/vocab.txt" + ), + "TurkuNLP/bert-base-finnish-cased-v1": ( + "https://huggingface.co/TurkuNLP/bert-base-finnish-cased-v1/resolve/main/vocab.txt" + ), + "TurkuNLP/bert-base-finnish-uncased-v1": ( + "https://huggingface.co/TurkuNLP/bert-base-finnish-uncased-v1/resolve/main/vocab.txt" + ), + "wietsedv/bert-base-dutch-cased": ( + "https://huggingface.co/wietsedv/bert-base-dutch-cased/resolve/main/vocab.txt" + ), } } @@ -192,8 +212,8 @@ class BertTokenizer(PreTrainedTokenizer): if not os.path.isfile(vocab_file): raise ValueError( - f"Can't find a vocabulary file at path '{vocab_file}'. To load the vocabulary from a Google pretrained " - "model use `tokenizer = BertTokenizer.from_pretrained(PRETRAINED_MODEL_NAME)`" + f"Can't find a vocabulary file at path '{vocab_file}'. To load the vocabulary from a Google pretrained" + " model use `tokenizer = BertTokenizer.from_pretrained(PRETRAINED_MODEL_NAME)`" ) self.vocab = load_vocab(vocab_file) self.ids_to_tokens = collections.OrderedDict([(ids, tok) for tok, ids in self.vocab.items()]) diff --git a/src/transformers/models/bert/tokenization_bert_fast.py b/src/transformers/models/bert/tokenization_bert_fast.py index b66f02c80a..b057f7e4ce 100644 --- a/src/transformers/models/bert/tokenization_bert_fast.py +++ b/src/transformers/models/bert/tokenization_bert_fast.py @@ -34,40 +34,84 @@ PRETRAINED_VOCAB_FILES_MAP = { "bert-large-uncased": "https://huggingface.co/bert-large-uncased/resolve/main/vocab.txt", "bert-base-cased": "https://huggingface.co/bert-base-cased/resolve/main/vocab.txt", "bert-large-cased": "https://huggingface.co/bert-large-cased/resolve/main/vocab.txt", - "bert-base-multilingual-uncased": "https://huggingface.co/bert-base-multilingual-uncased/resolve/main/vocab.txt", + "bert-base-multilingual-uncased": ( + "https://huggingface.co/bert-base-multilingual-uncased/resolve/main/vocab.txt" + ), "bert-base-multilingual-cased": "https://huggingface.co/bert-base-multilingual-cased/resolve/main/vocab.txt", "bert-base-chinese": "https://huggingface.co/bert-base-chinese/resolve/main/vocab.txt", "bert-base-german-cased": "https://huggingface.co/bert-base-german-cased/resolve/main/vocab.txt", - "bert-large-uncased-whole-word-masking": "https://huggingface.co/bert-large-uncased-whole-word-masking/resolve/main/vocab.txt", - "bert-large-cased-whole-word-masking": "https://huggingface.co/bert-large-cased-whole-word-masking/resolve/main/vocab.txt", - "bert-large-uncased-whole-word-masking-finetuned-squad": "https://huggingface.co/bert-large-uncased-whole-word-masking-finetuned-squad/resolve/main/vocab.txt", - "bert-large-cased-whole-word-masking-finetuned-squad": "https://huggingface.co/bert-large-cased-whole-word-masking-finetuned-squad/resolve/main/vocab.txt", - "bert-base-cased-finetuned-mrpc": "https://huggingface.co/bert-base-cased-finetuned-mrpc/resolve/main/vocab.txt", + "bert-large-uncased-whole-word-masking": ( + "https://huggingface.co/bert-large-uncased-whole-word-masking/resolve/main/vocab.txt" + ), + "bert-large-cased-whole-word-masking": ( + "https://huggingface.co/bert-large-cased-whole-word-masking/resolve/main/vocab.txt" + ), + "bert-large-uncased-whole-word-masking-finetuned-squad": ( + "https://huggingface.co/bert-large-uncased-whole-word-masking-finetuned-squad/resolve/main/vocab.txt" + ), + "bert-large-cased-whole-word-masking-finetuned-squad": ( + "https://huggingface.co/bert-large-cased-whole-word-masking-finetuned-squad/resolve/main/vocab.txt" + ), + "bert-base-cased-finetuned-mrpc": ( + "https://huggingface.co/bert-base-cased-finetuned-mrpc/resolve/main/vocab.txt" + ), "bert-base-german-dbmdz-cased": "https://huggingface.co/bert-base-german-dbmdz-cased/resolve/main/vocab.txt", - "bert-base-german-dbmdz-uncased": "https://huggingface.co/bert-base-german-dbmdz-uncased/resolve/main/vocab.txt", - "TurkuNLP/bert-base-finnish-cased-v1": "https://huggingface.co/TurkuNLP/bert-base-finnish-cased-v1/resolve/main/vocab.txt", - "TurkuNLP/bert-base-finnish-uncased-v1": "https://huggingface.co/TurkuNLP/bert-base-finnish-uncased-v1/resolve/main/vocab.txt", - "wietsedv/bert-base-dutch-cased": "https://huggingface.co/wietsedv/bert-base-dutch-cased/resolve/main/vocab.txt", + "bert-base-german-dbmdz-uncased": ( + "https://huggingface.co/bert-base-german-dbmdz-uncased/resolve/main/vocab.txt" + ), + "TurkuNLP/bert-base-finnish-cased-v1": ( + "https://huggingface.co/TurkuNLP/bert-base-finnish-cased-v1/resolve/main/vocab.txt" + ), + "TurkuNLP/bert-base-finnish-uncased-v1": ( + "https://huggingface.co/TurkuNLP/bert-base-finnish-uncased-v1/resolve/main/vocab.txt" + ), + "wietsedv/bert-base-dutch-cased": ( + "https://huggingface.co/wietsedv/bert-base-dutch-cased/resolve/main/vocab.txt" + ), }, "tokenizer_file": { "bert-base-uncased": "https://huggingface.co/bert-base-uncased/resolve/main/tokenizer.json", "bert-large-uncased": "https://huggingface.co/bert-large-uncased/resolve/main/tokenizer.json", "bert-base-cased": "https://huggingface.co/bert-base-cased/resolve/main/tokenizer.json", "bert-large-cased": "https://huggingface.co/bert-large-cased/resolve/main/tokenizer.json", - "bert-base-multilingual-uncased": "https://huggingface.co/bert-base-multilingual-uncased/resolve/main/tokenizer.json", - "bert-base-multilingual-cased": "https://huggingface.co/bert-base-multilingual-cased/resolve/main/tokenizer.json", + "bert-base-multilingual-uncased": ( + "https://huggingface.co/bert-base-multilingual-uncased/resolve/main/tokenizer.json" + ), + "bert-base-multilingual-cased": ( + "https://huggingface.co/bert-base-multilingual-cased/resolve/main/tokenizer.json" + ), "bert-base-chinese": "https://huggingface.co/bert-base-chinese/resolve/main/tokenizer.json", "bert-base-german-cased": "https://huggingface.co/bert-base-german-cased/resolve/main/tokenizer.json", - "bert-large-uncased-whole-word-masking": "https://huggingface.co/bert-large-uncased-whole-word-masking/resolve/main/tokenizer.json", - "bert-large-cased-whole-word-masking": "https://huggingface.co/bert-large-cased-whole-word-masking/resolve/main/tokenizer.json", - "bert-large-uncased-whole-word-masking-finetuned-squad": "https://huggingface.co/bert-large-uncased-whole-word-masking-finetuned-squad/resolve/main/tokenizer.json", - "bert-large-cased-whole-word-masking-finetuned-squad": "https://huggingface.co/bert-large-cased-whole-word-masking-finetuned-squad/resolve/main/tokenizer.json", - "bert-base-cased-finetuned-mrpc": "https://huggingface.co/bert-base-cased-finetuned-mrpc/resolve/main/tokenizer.json", - "bert-base-german-dbmdz-cased": "https://huggingface.co/bert-base-german-dbmdz-cased/resolve/main/tokenizer.json", - "bert-base-german-dbmdz-uncased": "https://huggingface.co/bert-base-german-dbmdz-uncased/resolve/main/tokenizer.json", - "TurkuNLP/bert-base-finnish-cased-v1": "https://huggingface.co/TurkuNLP/bert-base-finnish-cased-v1/resolve/main/tokenizer.json", - "TurkuNLP/bert-base-finnish-uncased-v1": "https://huggingface.co/TurkuNLP/bert-base-finnish-uncased-v1/resolve/main/tokenizer.json", - "wietsedv/bert-base-dutch-cased": "https://huggingface.co/wietsedv/bert-base-dutch-cased/resolve/main/tokenizer.json", + "bert-large-uncased-whole-word-masking": ( + "https://huggingface.co/bert-large-uncased-whole-word-masking/resolve/main/tokenizer.json" + ), + "bert-large-cased-whole-word-masking": ( + "https://huggingface.co/bert-large-cased-whole-word-masking/resolve/main/tokenizer.json" + ), + "bert-large-uncased-whole-word-masking-finetuned-squad": ( + "https://huggingface.co/bert-large-uncased-whole-word-masking-finetuned-squad/resolve/main/tokenizer.json" + ), + "bert-large-cased-whole-word-masking-finetuned-squad": ( + "https://huggingface.co/bert-large-cased-whole-word-masking-finetuned-squad/resolve/main/tokenizer.json" + ), + "bert-base-cased-finetuned-mrpc": ( + "https://huggingface.co/bert-base-cased-finetuned-mrpc/resolve/main/tokenizer.json" + ), + "bert-base-german-dbmdz-cased": ( + "https://huggingface.co/bert-base-german-dbmdz-cased/resolve/main/tokenizer.json" + ), + "bert-base-german-dbmdz-uncased": ( + "https://huggingface.co/bert-base-german-dbmdz-uncased/resolve/main/tokenizer.json" + ), + "TurkuNLP/bert-base-finnish-cased-v1": ( + "https://huggingface.co/TurkuNLP/bert-base-finnish-cased-v1/resolve/main/tokenizer.json" + ), + "TurkuNLP/bert-base-finnish-uncased-v1": ( + "https://huggingface.co/TurkuNLP/bert-base-finnish-uncased-v1/resolve/main/tokenizer.json" + ), + "wietsedv/bert-base-dutch-cased": ( + "https://huggingface.co/wietsedv/bert-base-dutch-cased/resolve/main/tokenizer.json" + ), }, } diff --git a/src/transformers/models/bert_generation/tokenization_bert_generation.py b/src/transformers/models/bert_generation/tokenization_bert_generation.py index e0e6a7ccb1..2ff9382a7b 100644 --- a/src/transformers/models/bert_generation/tokenization_bert_generation.py +++ b/src/transformers/models/bert_generation/tokenization_bert_generation.py @@ -31,7 +31,9 @@ VOCAB_FILES_NAMES = {"vocab_file": "spiece.model"} PRETRAINED_VOCAB_FILES_MAP = { "vocab_file": { - "bert_for_seq_generation": "https://huggingface.co/google/bert_for_seq_generation_L-24_bbc_encoder/resolve/main/spiece.model", + "bert_for_seq_generation": ( + "https://huggingface.co/google/bert_for_seq_generation_L-24_bbc_encoder/resolve/main/spiece.model" + ), } } diff --git a/src/transformers/models/bert_japanese/tokenization_bert_japanese.py b/src/transformers/models/bert_japanese/tokenization_bert_japanese.py index 588612029a..0b33e858a1 100644 --- a/src/transformers/models/bert_japanese/tokenization_bert_japanese.py +++ b/src/transformers/models/bert_japanese/tokenization_bert_japanese.py @@ -32,9 +32,15 @@ VOCAB_FILES_NAMES = {"vocab_file": "vocab.txt"} PRETRAINED_VOCAB_FILES_MAP = { "vocab_file": { "cl-tohoku/bert-base-japanese": "https://huggingface.co/cl-tohoku/bert-base-japanese/resolve/main/vocab.txt", - "cl-tohoku/bert-base-japanese-whole-word-masking": "https://huggingface.co/cl-tohoku/bert-base-japanese-whole-word-masking/resolve/main/vocab.txt", - "cl-tohoku/bert-base-japanese-char": "https://huggingface.co/cl-tohoku/bert-base-japanese-char/resolve/main/vocab.txt", - "cl-tohoku/bert-base-japanese-char-whole-word-masking": "https://huggingface.co/cl-tohoku/bert-base-japanese-char-whole-word-masking/resolve/main/vocab.txt", + "cl-tohoku/bert-base-japanese-whole-word-masking": ( + "https://huggingface.co/cl-tohoku/bert-base-japanese-whole-word-masking/resolve/main/vocab.txt" + ), + "cl-tohoku/bert-base-japanese-char": ( + "https://huggingface.co/cl-tohoku/bert-base-japanese-char/resolve/main/vocab.txt" + ), + "cl-tohoku/bert-base-japanese-char-whole-word-masking": ( + "https://huggingface.co/cl-tohoku/bert-base-japanese-char-whole-word-masking/resolve/main/vocab.txt" + ), } } @@ -131,8 +137,8 @@ class BertJapaneseTokenizer(BertTokenizer): if not os.path.isfile(vocab_file): raise ValueError( - f"Can't find a vocabulary file at path '{vocab_file}'. To load the vocabulary from a Google pretrained " - "model use `tokenizer = AutoTokenizer.from_pretrained(PRETRAINED_MODEL_NAME)`" + f"Can't find a vocabulary file at path '{vocab_file}'. To load the vocabulary from a Google pretrained" + " model use `tokenizer = AutoTokenizer.from_pretrained(PRETRAINED_MODEL_NAME)`" ) self.vocab = load_vocab(vocab_file) self.ids_to_tokens = collections.OrderedDict([(ids, tok) for tok, ids in self.vocab.items()]) diff --git a/src/transformers/models/bertweet/tokenization_bertweet.py b/src/transformers/models/bertweet/tokenization_bertweet.py index 2c0d191ad8..50de2db480 100644 --- a/src/transformers/models/bertweet/tokenization_bertweet.py +++ b/src/transformers/models/bertweet/tokenization_bertweet.py @@ -152,7 +152,8 @@ class BertweetTokenizer(PreTrainedTokenizer): self.demojizer = demojize except ImportError: logger.warning( - "emoji is not installed, thus not converting emoticons or emojis into text. Install emoji: pip3 install emoji==0.6.0" + "emoji is not installed, thus not converting emoticons or emojis into text. Install emoji: pip3" + " install emoji==0.6.0" ) self.demojizer = None diff --git a/src/transformers/models/big_bird/convert_bigbird_original_tf_checkpoint_to_pytorch.py b/src/transformers/models/big_bird/convert_bigbird_original_tf_checkpoint_to_pytorch.py index 2d400bb828..614443d81a 100644 --- a/src/transformers/models/big_bird/convert_bigbird_original_tf_checkpoint_to_pytorch.py +++ b/src/transformers/models/big_bird/convert_bigbird_original_tf_checkpoint_to_pytorch.py @@ -54,8 +54,10 @@ if __name__ == "__main__": default=None, type=str, required=True, - help="The config json file corresponding to the pre-trained BERT model. \n" - "This specifies the model architecture.", + help=( + "The config json file corresponding to the pre-trained BERT model. \n" + "This specifies the model architecture." + ), ) parser.add_argument( "--pytorch_dump_path", default=None, type=str, required=True, help="Path to the output PyTorch model." diff --git a/src/transformers/models/big_bird/modeling_big_bird.py b/src/transformers/models/big_bird/modeling_big_bird.py index a2ea03c17a..3c41c457bd 100755 --- a/src/transformers/models/big_bird/modeling_big_bird.py +++ b/src/transformers/models/big_bird/modeling_big_bird.py @@ -971,8 +971,8 @@ class BigBirdBlockSparseAttention(nn.Module): if params.shape[:2] != indices.shape[:2]: raise ValueError( - f"Make sure that the first two dimensions of params and indices are identical, \ - but they are params: {params.shape[:2]} vs. indices: {params.shape[:2]}" + "Make sure that the first two dimensions of params and indices are identical, but" + f" they are params: {params.shape[:2]} vs. indices: {params.shape[:2]}" ) num_indices_to_gather = indices.shape[-2] * indices.shape[-1] num_indices_to_pick_from = params.shape[2] @@ -1517,8 +1517,8 @@ class BigBirdLayer(nn.Module): if self.is_decoder and encoder_hidden_states is not None: if not hasattr(self, "crossattention"): raise ValueError( - f"If `encoder_hidden_states` are passed, {self} has to be instantiated with \ - cross-attention layers by setting `config.add_cross_attention=True`" + f"If `encoder_hidden_states` are passed, {self} has to be instantiated with " + " cross-attention layers by setting `config.add_cross_attention=True`" ) # cross_attn cached key/values tuple is at positions 3,4 of past_key_value tuple @@ -1957,7 +1957,8 @@ class BigBirdModel(BigBirdPreTrainedModel): if self.attention_type != "original_full" and config.add_cross_attention: logger.warning( - "When using `BigBirdForCausalLM` as decoder, then `attention_type` must be `original_full`. Setting `attention_type=original_full`" + "When using `BigBirdForCausalLM` as decoder, then `attention_type` must be `original_full`. Setting" + " `attention_type=original_full`" ) self.set_attention_type("original_full") @@ -2187,7 +2188,8 @@ class BigBirdModel(BigBirdPreTrainedModel): batch_size, seq_length = attention_mask.size() if seq_length % block_size != 0: raise ValueError( - f"Sequence length must be multiple of block size, but sequence length is {seq_length}, while block size is {block_size}." + f"Sequence length must be multiple of block size, but sequence length is {seq_length}, while block" + f" size is {block_size}." ) def create_band_mask_from_inputs(from_blocked_mask, to_blocked_mask): @@ -2916,8 +2918,10 @@ class BigBirdForTokenClassification(BigBirdPreTrainedModel): checkpoint="vumichien/token-classification-bigbird-roberta-base-random", output_type=TokenClassifierOutput, config_class=_CONFIG_FOR_DOC, - expected_output="['LABEL_1', 'LABEL_1', 'LABEL_1', 'LABEL_1', 'LABEL_1', 'LABEL_1', 'LABEL_1', 'LABEL_1', " - "'LABEL_1', 'LABEL_1', 'LABEL_1', 'LABEL_1']", + expected_output=( + "['LABEL_1', 'LABEL_1', 'LABEL_1', 'LABEL_1', 'LABEL_1', 'LABEL_1', 'LABEL_1', 'LABEL_1', " + "'LABEL_1', 'LABEL_1', 'LABEL_1', 'LABEL_1']" + ), expected_loss=0.54, ) def forward( diff --git a/src/transformers/models/big_bird/modeling_flax_big_bird.py b/src/transformers/models/big_bird/modeling_flax_big_bird.py index 202e703114..7d5f64a7e3 100644 --- a/src/transformers/models/big_bird/modeling_flax_big_bird.py +++ b/src/transformers/models/big_bird/modeling_flax_big_bird.py @@ -244,8 +244,8 @@ class FlaxBigBirdSelfAttention(nn.Module): self.head_dim = self.config.hidden_size // self.config.num_attention_heads if self.config.hidden_size % self.config.num_attention_heads != 0: raise ValueError( - "`config.hidden_size`: {self.config.hidden_size} has to be a multiple of `config.num_attention_heads`\ - : {self.config.num_attention_heads}" + "`config.hidden_size`: {self.config.hidden_size} has to be a multiple of `config.num_attention_heads` " + " : {self.config.num_attention_heads}" ) self.query = nn.Dense( @@ -480,7 +480,8 @@ class FlaxBigBirdBlockSparseAttention(nn.Module): batch_size, seq_length = attention_mask.shape if seq_length % block_size != 0: raise ValueError( - f"Sequence length must be multiple of block size, but sequence length is {seq_length}, while block size is {block_size}." + f"Sequence length must be multiple of block size, but sequence length is {seq_length}, while block" + f" size is {block_size}." ) def create_band_mask_from_inputs(from_blocked_mask, to_blocked_mask): @@ -1216,7 +1217,8 @@ class FlaxBigBirdAttention(nn.Module): self.self = FlaxBigBirdBlockSparseAttention(self.config, block_sparse_seed=self.layer_id, dtype=self.dtype) else: raise ValueError( - f"Your `config.attention_type` is {self.config.attention_type} but it can either be `original_full` or `block_sparse`" + f"Your `config.attention_type` is {self.config.attention_type} but it can either be `original_full` or" + " `block_sparse`" ) self.output = FlaxBigBirdSelfOutput(self.config, dtype=self.dtype) @@ -1395,8 +1397,8 @@ class FlaxBigBirdLayerCollection(nn.Module): if head_mask is not None: if head_mask.shape[0] != (len(self.layers)): raise ValueError( - f"The head_mask should be specified for {len(self.layers)} layers, but it is for \ - {head_mask.shape[0]}." + f"The head_mask should be specified for {len(self.layers)} layers, but it is for " + f" {head_mask.shape[0]}." ) for i, layer in enumerate(self.layers): diff --git a/src/transformers/models/big_bird/tokenization_big_bird.py b/src/transformers/models/big_bird/tokenization_big_bird.py index 19f507f92b..f39aa29d0c 100644 --- a/src/transformers/models/big_bird/tokenization_big_bird.py +++ b/src/transformers/models/big_bird/tokenization_big_bird.py @@ -32,8 +32,12 @@ VOCAB_FILES_NAMES = {"vocab_file": "spiece.model"} PRETRAINED_VOCAB_FILES_MAP = { "vocab_file": { "google/bigbird-roberta-base": "https://huggingface.co/google/bigbird-roberta-base/resolve/main/spiece.model", - "google/bigbird-roberta-large": "https://huggingface.co/google/bigbird-roberta-large/resolve/main/spiece.model", - "google/bigbird-base-trivia-itc": "https://huggingface.co/google/bigbird-base-trivia-itc/resolve/main/spiece.model", + "google/bigbird-roberta-large": ( + "https://huggingface.co/google/bigbird-roberta-large/resolve/main/spiece.model" + ), + "google/bigbird-base-trivia-itc": ( + "https://huggingface.co/google/bigbird-base-trivia-itc/resolve/main/spiece.model" + ), } } diff --git a/src/transformers/models/big_bird/tokenization_big_bird_fast.py b/src/transformers/models/big_bird/tokenization_big_bird_fast.py index c645fb0059..6ff063e772 100644 --- a/src/transformers/models/big_bird/tokenization_big_bird_fast.py +++ b/src/transformers/models/big_bird/tokenization_big_bird_fast.py @@ -35,13 +35,23 @@ VOCAB_FILES_NAMES = {"vocab_file": "spiece.model", "tokenizer_file": "tokenizer. PRETRAINED_VOCAB_FILES_MAP = { "vocab_file": { "google/bigbird-roberta-base": "https://huggingface.co/google/bigbird-roberta-base/resolve/main/spiece.model", - "google/bigbird-roberta-large": "https://huggingface.co/google/bigbird-roberta-large/resolve/main/spiece.model", - "google/bigbird-base-trivia-itc": "https://huggingface.co/google/bigbird-base-trivia-itc/resolve/main/spiece.model", + "google/bigbird-roberta-large": ( + "https://huggingface.co/google/bigbird-roberta-large/resolve/main/spiece.model" + ), + "google/bigbird-base-trivia-itc": ( + "https://huggingface.co/google/bigbird-base-trivia-itc/resolve/main/spiece.model" + ), }, "tokenizer_file": { - "google/bigbird-roberta-base": "https://huggingface.co/google/bigbird-roberta-base/resolve/main/tokenizer.json", - "google/bigbird-roberta-large": "https://huggingface.co/google/bigbird-roberta-large/resolve/main/tokenizer.json", - "google/bigbird-base-trivia-itc": "https://huggingface.co/google/bigbird-base-trivia-itc/resolve/main/tokenizer.json", + "google/bigbird-roberta-base": ( + "https://huggingface.co/google/bigbird-roberta-base/resolve/main/tokenizer.json" + ), + "google/bigbird-roberta-large": ( + "https://huggingface.co/google/bigbird-roberta-large/resolve/main/tokenizer.json" + ), + "google/bigbird-base-trivia-itc": ( + "https://huggingface.co/google/bigbird-base-trivia-itc/resolve/main/tokenizer.json" + ), }, } diff --git a/src/transformers/models/bigbird_pegasus/configuration_bigbird_pegasus.py b/src/transformers/models/bigbird_pegasus/configuration_bigbird_pegasus.py index 6aa013a4aa..dd9fb89d55 100644 --- a/src/transformers/models/bigbird_pegasus/configuration_bigbird_pegasus.py +++ b/src/transformers/models/bigbird_pegasus/configuration_bigbird_pegasus.py @@ -27,9 +27,15 @@ from ...utils import TensorType, is_torch_available, logging logger = logging.get_logger(__name__) BIGBIRD_PEGASUS_PRETRAINED_CONFIG_ARCHIVE_MAP = { - "google/bigbird-pegasus-large-arxiv": "https://huggingface.co/google/bigbird-pegasus-large-arxiv/resolve/main/config.json", - "google/bigbird-pegasus-large-pubmed": "https://huggingface.co/google/bigbird-pegasus-large-pubmed/resolve/main/config.json", - "google/bigbird-pegasus-large-bigpatent": "https://huggingface.co/google/bigbird-pegasus-large-bigpatent/resolve/main/config.json", + "google/bigbird-pegasus-large-arxiv": ( + "https://huggingface.co/google/bigbird-pegasus-large-arxiv/resolve/main/config.json" + ), + "google/bigbird-pegasus-large-pubmed": ( + "https://huggingface.co/google/bigbird-pegasus-large-pubmed/resolve/main/config.json" + ), + "google/bigbird-pegasus-large-bigpatent": ( + "https://huggingface.co/google/bigbird-pegasus-large-bigpatent/resolve/main/config.json" + ), # See all BigBirdPegasus models at https://huggingface.co/models?filter=bigbird_pegasus } diff --git a/src/transformers/models/bigbird_pegasus/modeling_bigbird_pegasus.py b/src/transformers/models/bigbird_pegasus/modeling_bigbird_pegasus.py index 4a4bb96375..c7a84695a7 100755 --- a/src/transformers/models/bigbird_pegasus/modeling_bigbird_pegasus.py +++ b/src/transformers/models/bigbird_pegasus/modeling_bigbird_pegasus.py @@ -797,8 +797,8 @@ class BigBirdPegasusBlockSparseAttention(nn.Module): if params.shape[:2] != indices.shape[:2]: raise ValueError( - f"Make sure that the first two dimensions of params and indices are identical, \ - but they are params: {params.shape[:2]} vs. indices: {params.shape[:2]}" + "Make sure that the first two dimensions of params and indices are identical, but" + f" they are params: {params.shape[:2]} vs. indices: {params.shape[:2]}" ) num_indices_to_gather = indices.shape[-2] * indices.shape[-1] num_indices_to_pick_from = params.shape[2] @@ -1305,7 +1305,8 @@ class BigBirdPegasusDecoderAttention(nn.Module): if attn_weights.size() != (bsz * self.num_heads, tgt_len, src_len): raise ValueError( - f"Attention weights should be of size {(bsz * self.num_heads, tgt_len, src_len)}, but is {attn_weights.size()}" + f"Attention weights should be of size {(bsz * self.num_heads, tgt_len, src_len)}, but is" + f" {attn_weights.size()}" ) if attention_mask is not None: @@ -1321,7 +1322,8 @@ class BigBirdPegasusDecoderAttention(nn.Module): if layer_head_mask is not None: if layer_head_mask.size() != (self.num_heads,): raise ValueError( - f"Head mask for a single layer should be of size {(self.num_heads,)}, but is {layer_head_mask.size()}" + f"Head mask for a single layer should be of size {(self.num_heads,)}, but is" + f" {layer_head_mask.size()}" ) attn_weights = layer_head_mask.view(1, -1, 1, 1) * attn_weights.view(bsz, self.num_heads, tgt_len, src_len) attn_weights = attn_weights.view(bsz * self.num_heads, tgt_len, src_len) @@ -1342,7 +1344,8 @@ class BigBirdPegasusDecoderAttention(nn.Module): if attn_output.size() != (bsz * self.num_heads, tgt_len, self.head_dim): raise ValueError( - f"`attn_output` should be of size {(bsz, self.num_heads, tgt_len, self.head_dim)}, but is {attn_output.size()}" + f"`attn_output` should be of size {(bsz, self.num_heads, tgt_len, self.head_dim)}, but is" + f" {attn_output.size()}" ) attn_output = attn_output.view(bsz, self.num_heads, tgt_len, self.head_dim) @@ -1919,7 +1922,8 @@ class BigBirdPegasusEncoder(BigBirdPegasusPreTrainedModel): if head_mask is not None: if head_mask.size()[0] != len(self.layers): raise ValueError( - f"The head_mask should be specified for {len(self.layers)} layers, but it is for {head_mask.size()[0]}." + f"The head_mask should be specified for {len(self.layers)} layers, but it is for" + f" {head_mask.size()[0]}." ) for idx, encoder_layer in enumerate(self.layers): @@ -2003,7 +2007,8 @@ class BigBirdPegasusEncoder(BigBirdPegasusPreTrainedModel): batch_size, seq_length = attention_mask.size() if seq_length % block_size != 0: raise ValueError( - f"Sequence length must be multiple of block size, but sequence length is {seq_length}, while block size is {block_size}." + f"Sequence length must be multiple of block size, but sequence length is {seq_length}, while block" + f" size is {block_size}." ) def create_band_mask_from_inputs(from_blocked_mask, to_blocked_mask): @@ -2249,7 +2254,8 @@ class BigBirdPegasusDecoder(BigBirdPegasusPreTrainedModel): if attn_mask is not None: if attn_mask.size()[0] != len(self.layers): raise ValueError( - f"The `{mask_name}` should be specified for {len(self.layers)} layers, but it is for {head_mask.size()[0]}." + f"The `{mask_name}` should be specified for {len(self.layers)} layers, but it is for" + f" {head_mask.size()[0]}." ) for idx, decoder_layer in enumerate(self.layers): # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description) diff --git a/src/transformers/models/blenderbot/modeling_blenderbot.py b/src/transformers/models/blenderbot/modeling_blenderbot.py index c1f9c35ee7..612685dbb4 100755 --- a/src/transformers/models/blenderbot/modeling_blenderbot.py +++ b/src/transformers/models/blenderbot/modeling_blenderbot.py @@ -218,7 +218,8 @@ class BlenderbotAttention(nn.Module): if attn_weights.size() != (bsz * self.num_heads, tgt_len, src_len): raise ValueError( - f"Attention weights should be of size {(bsz * self.num_heads, tgt_len, src_len)}, but is {attn_weights.size()}" + f"Attention weights should be of size {(bsz * self.num_heads, tgt_len, src_len)}, but is" + f" {attn_weights.size()}" ) if attention_mask is not None: @@ -234,7 +235,8 @@ class BlenderbotAttention(nn.Module): if layer_head_mask is not None: if layer_head_mask.size() != (self.num_heads,): raise ValueError( - f"Head mask for a single layer should be of size {(self.num_heads,)}, but is {layer_head_mask.size()}" + f"Head mask for a single layer should be of size {(self.num_heads,)}, but is" + f" {layer_head_mask.size()}" ) attn_weights = layer_head_mask.view(1, -1, 1, 1) * attn_weights.view(bsz, self.num_heads, tgt_len, src_len) attn_weights = attn_weights.view(bsz * self.num_heads, tgt_len, src_len) @@ -255,7 +257,8 @@ class BlenderbotAttention(nn.Module): if attn_output.size() != (bsz * self.num_heads, tgt_len, self.head_dim): raise ValueError( - f"`attn_output` should be of size {(bsz, self.num_heads, tgt_len, self.head_dim)}, but is {attn_output.size()}" + f"`attn_output` should be of size {(bsz, self.num_heads, tgt_len, self.head_dim)}, but is" + f" {attn_output.size()}" ) attn_output = attn_output.view(bsz, self.num_heads, tgt_len, self.head_dim) @@ -749,7 +752,8 @@ class BlenderbotEncoder(BlenderbotPreTrainedModel): if head_mask is not None: if head_mask.size()[0] != len(self.layers): raise ValueError( - f"The head_mask should be specified for {len(self.layers)} layers, but it is for {head_mask.size()[0]}." + f"The head_mask should be specified for {len(self.layers)} layers, but it is for" + f" {head_mask.size()[0]}." ) for idx, encoder_layer in enumerate(self.layers): if output_hidden_states: @@ -989,7 +993,8 @@ class BlenderbotDecoder(BlenderbotPreTrainedModel): if attn_mask is not None: if attn_mask.size()[0] != len(self.layers): raise ValueError( - f"The `{mask_name}` should be specified for {len(self.layers)} layers, but it is for {head_mask.size()[0]}." + f"The `{mask_name}` should be specified for {len(self.layers)} layers, but it is for" + f" {head_mask.size()[0]}." ) for idx, decoder_layer in enumerate(self.layers): # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description) @@ -1096,7 +1101,9 @@ class BlenderbotModel(BlenderbotPreTrainedModel): def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.PathLike]], *model_args, **kwargs): if pretrained_model_name_or_path == "facebook/blenderbot-90M": warnings.warn( - "The checkpoint `facebook/blenderbot-90M` is deprecated. In the future, please use the identical checkpoint `facebook/small_blenderbot-90M` with `BlenderbotSmallModel.from_pretrained('facebook/small_blenderbot-90M')` instead.", + "The checkpoint `facebook/blenderbot-90M` is deprecated. In the future, please use the identical" + " checkpoint `facebook/small_blenderbot-90M` with" + " `BlenderbotSmallModel.from_pretrained('facebook/small_blenderbot-90M')` instead.", FutureWarning, ) return BlenderbotSmallModel.from_pretrained(pretrained_model_name_or_path) @@ -1237,7 +1244,9 @@ class BlenderbotForConditionalGeneration(BlenderbotPreTrainedModel): def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.PathLike]], *model_args, **kwargs): if pretrained_model_name_or_path == "facebook/blenderbot-90M": warnings.warn( - "The checkpoint `facebook/blenderbot-90M` is deprecated. In the future, please use the identical checkpoint `facebook/small_blenderbot-90M` with `BlenderbotSmallForConditionalGeneration.from_pretrained('facebook/small_blenderbot-90M')` instead.", + "The checkpoint `facebook/blenderbot-90M` is deprecated. In the future, please use the identical" + " checkpoint `facebook/small_blenderbot-90M` with" + " `BlenderbotSmallForConditionalGeneration.from_pretrained('facebook/small_blenderbot-90M')` instead.", FutureWarning, ) return BlenderbotSmallForConditionalGeneration.from_pretrained(pretrained_model_name_or_path) diff --git a/src/transformers/models/blenderbot/modeling_tf_blenderbot.py b/src/transformers/models/blenderbot/modeling_tf_blenderbot.py index b4bceee3e2..24ed4baa96 100644 --- a/src/transformers/models/blenderbot/modeling_tf_blenderbot.py +++ b/src/transformers/models/blenderbot/modeling_tf_blenderbot.py @@ -228,7 +228,10 @@ class TFBlenderbotAttention(tf.keras.layers.Layer): tf.debugging.assert_equal( shape_list(attn_weights), [bsz * self.num_heads, tgt_len, src_len], - message=f"Attention weights should be of size {(bsz * self.num_heads, tgt_len, src_len)}, but is {shape_list(attn_weights)}", + message=( + f"Attention weights should be of size {(bsz * self.num_heads, tgt_len, src_len)}, but is" + f" {shape_list(attn_weights)}" + ), ) if attention_mask is not None: @@ -238,7 +241,10 @@ class TFBlenderbotAttention(tf.keras.layers.Layer): tf.debugging.assert_equal( shape_list(attention_mask), [bsz, 1, tgt_len, src_len], - message=f"Attention mask should be of size {(bsz, 1, tgt_len, src_len)}, but is {shape_list(attention_mask)}", + message=( + f"Attention mask should be of size {(bsz, 1, tgt_len, src_len)}, but is" + f" {shape_list(attention_mask)}" + ), ) attention_mask = tf.cast(attention_mask, dtype=attn_weights.dtype) @@ -254,7 +260,10 @@ class TFBlenderbotAttention(tf.keras.layers.Layer): tf.debugging.assert_equal( shape_list(layer_head_mask), [self.num_heads], - message=f"Head mask for a single layer should be of size {(self.num_heads)}, but is {shape_list(layer_head_mask)}", + message=( + f"Head mask for a single layer should be of size {(self.num_heads)}, but is" + f" {shape_list(layer_head_mask)}" + ), ) attn_weights = tf.reshape(layer_head_mask, (1, -1, 1, 1)) * tf.reshape( @@ -271,7 +280,10 @@ class TFBlenderbotAttention(tf.keras.layers.Layer): tf.debugging.assert_equal( shape_list(attn_output), [bsz * self.num_heads, tgt_len, self.head_dim], - message=f"`attn_output` should be of size {(bsz, self.num_heads, tgt_len, self.head_dim)}, but is {shape_list(attn_output)}", + message=( + f"`attn_output` should be of size {(bsz, self.num_heads, tgt_len, self.head_dim)}, but is" + f" {shape_list(attn_output)}" + ), ) attn_output = tf.transpose( @@ -738,7 +750,10 @@ class TFBlenderbotEncoder(tf.keras.layers.Layer): tf.debugging.assert_equal( shape_list(head_mask)[0], len(self.layers), - message=f"The head_mask should be specified for {len(self.layers)} layers, but it is for {shape_list(head_mask)[0]}.", + message=( + f"The head_mask should be specified for {len(self.layers)} layers, but it is for" + f" {shape_list(head_mask)[0]}." + ), ) # encoder layers @@ -940,7 +955,10 @@ class TFBlenderbotDecoder(tf.keras.layers.Layer): tf.debugging.assert_equal( shape_list(attn_mask)[0], len(self.layers), - message=f"The {attn_mask_name} should be specified for {len(self.layers)} layers, but it is for {shape_list(attn_mask)[0]}.", + message=( + f"The {attn_mask_name} should be specified for {len(self.layers)} layers, but it is for" + f" {shape_list(attn_mask)[0]}." + ), ) for idx, decoder_layer in enumerate(self.layers): # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description) @@ -1124,7 +1142,10 @@ class TFBlenderbotModel(TFBlenderbotPreTrainedModel): from ..blenderbot_small import TFBlenderbotSmallModel warnings.warn( - "The checkpoint `facebook/blenderbot-90M` is deprecated. In the future, please use the identical checkpoint `facebook/small_blenderbot-90M` with `TFBlenderbotSmallForConditionalGeneration.from_pretrained('facebook/small_blenderbot-90M')` instead.", + "The checkpoint `facebook/blenderbot-90M` is deprecated. In the future, please use the identical" + " checkpoint `facebook/small_blenderbot-90M` with" + " `TFBlenderbotSmallForConditionalGeneration.from_pretrained('facebook/small_blenderbot-90M')`" + " instead.", FutureWarning, ) return TFBlenderbotSmallModel.from_pretrained(pretrained_model_name_or_path) @@ -1244,7 +1265,10 @@ class TFBlenderbotForConditionalGeneration(TFBlenderbotPreTrainedModel, TFCausal from ..blenderbot_small import TFBlenderbotSmallForConditionalGeneration warnings.warn( - "The checkpoint `facebook/blenderbot-90M` is deprecated. In the future, please use the identical checkpoint `facebook/small_blenderbot-90M` with `TFBlenderbotSmallForConditionalGeneration.from_pretrained('facebook/small_blenderbot-90M')` instead.", + "The checkpoint `facebook/blenderbot-90M` is deprecated. In the future, please use the identical" + " checkpoint `facebook/small_blenderbot-90M` with" + " `TFBlenderbotSmallForConditionalGeneration.from_pretrained('facebook/small_blenderbot-90M')`" + " instead.", FutureWarning, ) return TFBlenderbotSmallForConditionalGeneration.from_pretrained(pretrained_model_name_or_path) diff --git a/src/transformers/models/blenderbot_small/modeling_blenderbot_small.py b/src/transformers/models/blenderbot_small/modeling_blenderbot_small.py index efe72ef533..9b32fccc1f 100755 --- a/src/transformers/models/blenderbot_small/modeling_blenderbot_small.py +++ b/src/transformers/models/blenderbot_small/modeling_blenderbot_small.py @@ -216,7 +216,8 @@ class BlenderbotSmallAttention(nn.Module): if attn_weights.size() != (bsz * self.num_heads, tgt_len, src_len): raise ValueError( - f"Attention weights should be of size {(bsz * self.num_heads, tgt_len, src_len)}, but is {attn_weights.size()}" + f"Attention weights should be of size {(bsz * self.num_heads, tgt_len, src_len)}, but is" + f" {attn_weights.size()}" ) if attention_mask is not None: @@ -232,7 +233,8 @@ class BlenderbotSmallAttention(nn.Module): if layer_head_mask is not None: if layer_head_mask.size() != (self.num_heads,): raise ValueError( - f"Head mask for a single layer should be of size {(self.num_heads,)}, but is {layer_head_mask.size()}" + f"Head mask for a single layer should be of size {(self.num_heads,)}, but is" + f" {layer_head_mask.size()}" ) attn_weights = layer_head_mask.view(1, -1, 1, 1) * attn_weights.view(bsz, self.num_heads, tgt_len, src_len) attn_weights = attn_weights.view(bsz * self.num_heads, tgt_len, src_len) @@ -253,7 +255,8 @@ class BlenderbotSmallAttention(nn.Module): if attn_output.size() != (bsz * self.num_heads, tgt_len, self.head_dim): raise ValueError( - f"`attn_output` should be of size {(bsz, self.num_heads, tgt_len, self.head_dim)}, but is {attn_output.size()}" + f"`attn_output` should be of size {(bsz, self.num_heads, tgt_len, self.head_dim)}, but is" + f" {attn_output.size()}" ) attn_output = attn_output.view(bsz, self.num_heads, tgt_len, self.head_dim) @@ -748,7 +751,8 @@ class BlenderbotSmallEncoder(BlenderbotSmallPreTrainedModel): if head_mask is not None: if head_mask.size()[0] != len(self.layers): raise ValueError( - f"The head_mask should be specified for {len(self.layers)} layers, but it is for {head_mask.size()[0]}." + f"The head_mask should be specified for {len(self.layers)} layers, but it is for" + f" {head_mask.size()[0]}." ) for idx, encoder_layer in enumerate(self.layers): if output_hidden_states: @@ -986,7 +990,8 @@ class BlenderbotSmallDecoder(BlenderbotSmallPreTrainedModel): if attn_mask is not None: if attn_mask.size()[0] != len(self.layers): raise ValueError( - f"The `{mask_name}` should be specified for {len(self.layers)} layers, but it is for {head_mask.size()[0]}." + f"The `{mask_name}` should be specified for {len(self.layers)} layers, but it is for" + f" {head_mask.size()[0]}." ) for idx, decoder_layer in enumerate(self.layers): # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description) diff --git a/src/transformers/models/blenderbot_small/modeling_tf_blenderbot_small.py b/src/transformers/models/blenderbot_small/modeling_tf_blenderbot_small.py index 95078af4b9..157af64468 100644 --- a/src/transformers/models/blenderbot_small/modeling_tf_blenderbot_small.py +++ b/src/transformers/models/blenderbot_small/modeling_tf_blenderbot_small.py @@ -228,7 +228,10 @@ class TFBlenderbotSmallAttention(tf.keras.layers.Layer): tf.debugging.assert_equal( shape_list(attn_weights), [bsz * self.num_heads, tgt_len, src_len], - message=f"Attention weights should be of size {(bsz * self.num_heads, tgt_len, src_len)}, but is {shape_list(attn_weights)}", + message=( + f"Attention weights should be of size {(bsz * self.num_heads, tgt_len, src_len)}, but is" + f" {shape_list(attn_weights)}" + ), ) if attention_mask is not None: @@ -238,7 +241,10 @@ class TFBlenderbotSmallAttention(tf.keras.layers.Layer): tf.debugging.assert_equal( shape_list(attention_mask), [bsz, 1, tgt_len, src_len], - message=f"Attention mask should be of size {(bsz, 1, tgt_len, src_len)}, but is {shape_list(attention_mask)}", + message=( + f"Attention mask should be of size {(bsz, 1, tgt_len, src_len)}, but is" + f" {shape_list(attention_mask)}" + ), ) attention_mask = tf.cast(attention_mask, dtype=attn_weights.dtype) @@ -254,7 +260,10 @@ class TFBlenderbotSmallAttention(tf.keras.layers.Layer): tf.debugging.assert_equal( shape_list(layer_head_mask), [self.num_heads], - message=f"Head mask for a single layer should be of size {(self.num_heads)}, but is {shape_list(layer_head_mask)}", + message=( + f"Head mask for a single layer should be of size {(self.num_heads)}, but is" + f" {shape_list(layer_head_mask)}" + ), ) attn_weights = tf.reshape(layer_head_mask, (1, -1, 1, 1)) * tf.reshape( @@ -271,7 +280,10 @@ class TFBlenderbotSmallAttention(tf.keras.layers.Layer): tf.debugging.assert_equal( shape_list(attn_output), [bsz * self.num_heads, tgt_len, self.head_dim], - message=f"`attn_output` should be of size {(bsz, self.num_heads, tgt_len, self.head_dim)}, but is {shape_list(attn_output)}", + message=( + f"`attn_output` should be of size {(bsz, self.num_heads, tgt_len, self.head_dim)}, but is" + f" {shape_list(attn_output)}" + ), ) attn_output = tf.transpose( @@ -744,7 +756,10 @@ class TFBlenderbotSmallEncoder(tf.keras.layers.Layer): tf.debugging.assert_equal( shape_list(head_mask)[0], len(self.layers), - message=f"The head_mask should be specified for {len(self.layers)} layers, but it is for {shape_list(head_mask)[0]}.", + message=( + f"The head_mask should be specified for {len(self.layers)} layers, but it is for" + f" {shape_list(head_mask)[0]}." + ), ) # encoder layers @@ -942,7 +957,10 @@ class TFBlenderbotSmallDecoder(tf.keras.layers.Layer): tf.debugging.assert_equal( shape_list(attn_mask)[0], len(self.layers), - message=f"The {attn_mask_name} should be specified for {len(self.layers)} layers, but it is for {shape_list(attn_mask)[0]}.", + message=( + f"The {attn_mask_name} should be specified for {len(self.layers)} layers, but it is for" + f" {shape_list(attn_mask)[0]}." + ), ) for idx, decoder_layer in enumerate(self.layers): diff --git a/src/transformers/models/blenderbot_small/tokenization_blenderbot_small.py b/src/transformers/models/blenderbot_small/tokenization_blenderbot_small.py index f5263a5af9..1df2203470 100644 --- a/src/transformers/models/blenderbot_small/tokenization_blenderbot_small.py +++ b/src/transformers/models/blenderbot_small/tokenization_blenderbot_small.py @@ -41,7 +41,9 @@ PRETRAINED_VOCAB_FILES_MAP = { "facebook/blenderbot_small-90M": "https://huggingface.co/facebook/blenderbot_small-90M/resolve/main/merges.txt" }, "tokenizer_config_file": { - "facebook/blenderbot_small-90M": "https://huggingface.co/facebook/blenderbot_small-90M/resolve/main/tokenizer_config.json" + "facebook/blenderbot_small-90M": ( + "https://huggingface.co/facebook/blenderbot_small-90M/resolve/main/tokenizer_config.json" + ) }, } diff --git a/src/transformers/models/blenderbot_small/tokenization_blenderbot_small_fast.py b/src/transformers/models/blenderbot_small/tokenization_blenderbot_small_fast.py index 63c8c39563..8dfae5894f 100644 --- a/src/transformers/models/blenderbot_small/tokenization_blenderbot_small_fast.py +++ b/src/transformers/models/blenderbot_small/tokenization_blenderbot_small_fast.py @@ -38,7 +38,9 @@ PRETRAINED_VOCAB_FILES_MAP = { "facebook/blenderbot_small-90M": "https://huggingface.co/facebook/blenderbot_small-90M/resolve/main/merges.txt" }, "tokenizer_config_file": { - "facebook/blenderbot_small-90M": "https://huggingface.co/facebook/blenderbot_small-90M/resolve/main/tokenizer_config.json" + "facebook/blenderbot_small-90M": ( + "https://huggingface.co/facebook/blenderbot_small-90M/resolve/main/tokenizer_config.json" + ) }, } diff --git a/src/transformers/models/byt5/convert_byt5_original_tf_checkpoint_to_pytorch.py b/src/transformers/models/byt5/convert_byt5_original_tf_checkpoint_to_pytorch.py index a002030168..7d9a20f3b0 100755 --- a/src/transformers/models/byt5/convert_byt5_original_tf_checkpoint_to_pytorch.py +++ b/src/transformers/models/byt5/convert_byt5_original_tf_checkpoint_to_pytorch.py @@ -49,8 +49,9 @@ if __name__ == "__main__": default=None, type=str, required=True, - help="The config json file corresponding to the pre-trained T5 model. \n" - "This specifies the model architecture.", + help=( + "The config json file corresponding to the pre-trained T5 model. \nThis specifies the model architecture." + ), ) parser.add_argument( "--pytorch_dump_path", default=None, type=str, required=True, help="Path to the output PyTorch model." diff --git a/src/transformers/models/byt5/tokenization_byt5.py b/src/transformers/models/byt5/tokenization_byt5.py index 77eb34f929..0071d7a9af 100644 --- a/src/transformers/models/byt5/tokenization_byt5.py +++ b/src/transformers/models/byt5/tokenization_byt5.py @@ -77,8 +77,9 @@ class ByT5Tokenizer(PreTrainedTokenizer): extra_tokens = len(set(filter(lambda x: bool("extra_id" in str(x)), additional_special_tokens))) if extra_tokens != extra_ids: raise ValueError( - f"Both extra_ids ({extra_ids}) and additional_special_tokens ({additional_special_tokens}) are provided to ByT5Tokenizer. " - "In this case the additional_special_tokens must include the extra_ids tokens" + f"Both extra_ids ({extra_ids}) and additional_special_tokens ({additional_special_tokens}) are" + " provided to ByT5Tokenizer. In this case the additional_special_tokens must include the" + " extra_ids tokens" ) pad_token = AddedToken(pad_token, lstrip=False, rstrip=False) if isinstance(pad_token, str) else pad_token @@ -146,7 +147,8 @@ class ByT5Tokenizer(PreTrainedTokenizer): """Do not add eos again if user already added it.""" if len(token_ids) > 0 and token_ids[-1] == self.eos_token_id: warnings.warn( - f"This sequence already has {self.eos_token}. In future versions this behavior may lead to duplicated eos tokens being added." + f"This sequence already has {self.eos_token}. In future versions this behavior may lead to duplicated" + " eos tokens being added." ) return token_ids else: diff --git a/src/transformers/models/camembert/configuration_camembert.py b/src/transformers/models/camembert/configuration_camembert.py index 982afceb70..6f87223732 100644 --- a/src/transformers/models/camembert/configuration_camembert.py +++ b/src/transformers/models/camembert/configuration_camembert.py @@ -27,8 +27,12 @@ logger = logging.get_logger(__name__) CAMEMBERT_PRETRAINED_CONFIG_ARCHIVE_MAP = { "camembert-base": "https://huggingface.co/camembert-base/resolve/main/config.json", - "umberto-commoncrawl-cased-v1": "https://huggingface.co/Musixmatch/umberto-commoncrawl-cased-v1/resolve/main/config.json", - "umberto-wikipedia-uncased-v1": "https://huggingface.co/Musixmatch/umberto-wikipedia-uncased-v1/resolve/main/config.json", + "umberto-commoncrawl-cased-v1": ( + "https://huggingface.co/Musixmatch/umberto-commoncrawl-cased-v1/resolve/main/config.json" + ), + "umberto-wikipedia-uncased-v1": ( + "https://huggingface.co/Musixmatch/umberto-wikipedia-uncased-v1/resolve/main/config.json" + ), } diff --git a/src/transformers/models/canine/modeling_canine.py b/src/transformers/models/canine/modeling_canine.py index b93a7b3d46..bb7b1492c7 100644 --- a/src/transformers/models/canine/modeling_canine.py +++ b/src/transformers/models/canine/modeling_canine.py @@ -546,12 +546,11 @@ class CanineAttention(nn.Module): self.local = local if attend_from_chunk_width < attend_from_chunk_stride: raise ValueError( - "`attend_from_chunk_width` < `attend_from_chunk_stride` " - "would cause sequence positions to get skipped." + "`attend_from_chunk_width` < `attend_from_chunk_stride` would cause sequence positions to get skipped." ) if attend_to_chunk_width < attend_to_chunk_stride: raise ValueError( - "`attend_to_chunk_width` < `attend_to_chunk_stride`" "would cause sequence positions to get skipped." + "`attend_to_chunk_width` < `attend_to_chunk_stride`would cause sequence positions to get skipped." ) self.always_attend_to_first_position = always_attend_to_first_position self.first_position_attends_to_all = first_position_attends_to_all diff --git a/src/transformers/models/clip/modeling_clip.py b/src/transformers/models/clip/modeling_clip.py index 44c340847e..25137e268d 100755 --- a/src/transformers/models/clip/modeling_clip.py +++ b/src/transformers/models/clip/modeling_clip.py @@ -181,7 +181,8 @@ class CLIPAttention(nn.Module): self.head_dim = self.embed_dim // self.num_heads if self.head_dim * self.num_heads != self.embed_dim: raise ValueError( - f"embed_dim must be divisible by num_heads (got `embed_dim`: {self.embed_dim} and `num_heads`: {self.num_heads})." + f"embed_dim must be divisible by num_heads (got `embed_dim`: {self.embed_dim} and `num_heads`:" + f" {self.num_heads})." ) self.scale = self.head_dim**-0.5 self.dropout = config.attention_dropout @@ -220,14 +221,16 @@ class CLIPAttention(nn.Module): if attn_weights.size() != (bsz * self.num_heads, tgt_len, src_len): raise ValueError( - f"Attention weights should be of size {(bsz * self.num_heads, tgt_len, src_len)}, but is {attn_weights.size()}" + f"Attention weights should be of size {(bsz * self.num_heads, tgt_len, src_len)}, but is" + f" {attn_weights.size()}" ) # apply the causal_attention_mask first if causal_attention_mask is not None: if causal_attention_mask.size() != (bsz, 1, tgt_len, src_len): raise ValueError( - f"Attention mask should be of size {(bsz, 1, tgt_len, src_len)}, but is {causal_attention_mask.size()}" + f"Attention mask should be of size {(bsz, 1, tgt_len, src_len)}, but is" + f" {causal_attention_mask.size()}" ) attn_weights = attn_weights.view(bsz, self.num_heads, tgt_len, src_len) + causal_attention_mask attn_weights = attn_weights.view(bsz * self.num_heads, tgt_len, src_len) @@ -258,7 +261,8 @@ class CLIPAttention(nn.Module): if attn_output.size() != (bsz * self.num_heads, tgt_len, self.head_dim): raise ValueError( - f"`attn_output` should be of size {(bsz, self.num_heads, tgt_len, self.head_dim)}, but is {attn_output.size()}" + f"`attn_output` should be of size {(bsz, self.num_heads, tgt_len, self.head_dim)}, but is" + f" {attn_output.size()}" ) attn_output = attn_output.view(bsz, self.num_heads, tgt_len, self.head_dim) @@ -848,12 +852,14 @@ class CLIPModel(CLIPPreTrainedModel): if not isinstance(config.text_config, CLIPTextConfig): raise ValueError( - f"config.text_config is expected to be of type CLIPTextConfig but is of type {type(config.text_config)}." + "config.text_config is expected to be of type CLIPTextConfig but is of type" + f" {type(config.text_config)}." ) if not isinstance(config.vision_config, CLIPVisionConfig): raise ValueError( - f"config.vision_config is expected to be of type CLIPVisionConfig but is of type {type(config.vision_config)}." + "config.vision_config is expected to be of type CLIPVisionConfig but is of type" + f" {type(config.vision_config)}." ) text_config = config.text_config diff --git a/src/transformers/models/clip/modeling_flax_clip.py b/src/transformers/models/clip/modeling_flax_clip.py index 792c7b5325..aa8ef87d5b 100644 --- a/src/transformers/models/clip/modeling_flax_clip.py +++ b/src/transformers/models/clip/modeling_flax_clip.py @@ -262,7 +262,8 @@ class FlaxCLIPAttention(nn.Module): self.head_dim = self.embed_dim // self.num_heads if self.head_dim * self.num_heads != self.embed_dim: raise ValueError( - f"embed_dim must be divisible by num_heads (got `embed_dim`: {self.embed_dim} and `num_heads`: {self.num_heads})." + f"embed_dim must be divisible by num_heads (got `embed_dim`: {self.embed_dim} and `num_heads`:" + f" {self.num_heads})." ) self.scale = self.head_dim**-0.5 self.dropout = self.config.attention_dropout diff --git a/src/transformers/models/clip/modeling_tf_clip.py b/src/transformers/models/clip/modeling_tf_clip.py index ad26a7bfc3..6ba83f04b8 100644 --- a/src/transformers/models/clip/modeling_tf_clip.py +++ b/src/transformers/models/clip/modeling_tf_clip.py @@ -266,7 +266,8 @@ class TFCLIPAttention(tf.keras.layers.Layer): self.attention_head_size = self.embed_dim // self.num_attention_heads if self.attention_head_size * self.num_attention_heads != self.embed_dim: raise ValueError( - f"embed_dim must be divisible by num_heads (got `embed_dim`: {self.embed_dim} and `num_heads`: {self.num_attention_heads})." + f"embed_dim must be divisible by num_heads (got `embed_dim`: {self.embed_dim} and `num_heads`:" + f" {self.num_attention_heads})." ) factor = config.initializer_factor @@ -708,12 +709,14 @@ class TFCLIPMainLayer(tf.keras.layers.Layer): if not isinstance(config.text_config, CLIPTextConfig): raise ValueError( - f"config.text_config is expected to be of type CLIPTextConfig but is of type {type(config.text_config)}." + "config.text_config is expected to be of type CLIPTextConfig but is of type" + f" {type(config.text_config)}." ) if not isinstance(config.vision_config, CLIPVisionConfig): raise ValueError( - f"config.vision_config is expected to be of type CLIPVisionConfig but is of type {type(config.vision_config)}." + "config.vision_config is expected to be of type CLIPVisionConfig but is of type" + f" {type(config.vision_config)}." ) self.config = config diff --git a/src/transformers/models/clip/tokenization_clip_fast.py b/src/transformers/models/clip/tokenization_clip_fast.py index f6ff684c6b..5fe6d3d445 100644 --- a/src/transformers/models/clip/tokenization_clip_fast.py +++ b/src/transformers/models/clip/tokenization_clip_fast.py @@ -36,7 +36,9 @@ PRETRAINED_VOCAB_FILES_MAP = { "openai/clip-vit-base-patch32": "https://huggingface.co/openai/clip-vit-base-patch32/resolve/main/merges.txt", }, "tokenizer_file": { - "openai/clip-vit-base-patch32": "https://huggingface.co/openai/clip-vit-base-patch32/resolve/main/tokenizer.json", + "openai/clip-vit-base-patch32": ( + "https://huggingface.co/openai/clip-vit-base-patch32/resolve/main/tokenizer.json" + ), }, } @@ -97,12 +99,12 @@ class CLIPTokenizerFast(PreTrainedTokenizerFast): if not isinstance(self.backend_tokenizer.pre_tokenizer, pre_tokenizers.Sequence): raise ValueError( - "The `backend_tokenizer` provided does not match the expected format. The CLIP tokenizer has been " - "heavily modified from transformers version 4.17.0. You need to convert the tokenizer you are using to be compatible with this version." - "The easiest way to do so is " - '`CLIPTokenizerFast.from_pretrained("path_to_local_folder_or_hub_repo, from_slow=True)`.' - " If you want to use your existing tokenizer, you will have to revert to a version prior to " - "4.17.0 of transformers." + "The `backend_tokenizer` provided does not match the expected format. The CLIP tokenizer has been" + " heavily modified from transformers version 4.17.0. You need to convert the tokenizer you are using" + " to be compatible with this version.The easiest way to do so is" + ' `CLIPTokenizerFast.from_pretrained("path_to_local_folder_or_hub_repo, from_slow=True)`. If you want' + " to use your existing tokenizer, you will have to revert to a version prior to 4.17.0 of" + " transformers." ) self._wrap_decode_method_backend_tokenizer() diff --git a/src/transformers/models/convbert/configuration_convbert.py b/src/transformers/models/convbert/configuration_convbert.py index 5efa6018b6..2b5bc42502 100644 --- a/src/transformers/models/convbert/configuration_convbert.py +++ b/src/transformers/models/convbert/configuration_convbert.py @@ -26,7 +26,9 @@ logger = logging.get_logger(__name__) CONVBERT_PRETRAINED_CONFIG_ARCHIVE_MAP = { "YituTech/conv-bert-base": "https://huggingface.co/YituTech/conv-bert-base/resolve/main/config.json", - "YituTech/conv-bert-medium-small": "https://huggingface.co/YituTech/conv-bert-medium-small/resolve/main/config.json", + "YituTech/conv-bert-medium-small": ( + "https://huggingface.co/YituTech/conv-bert-medium-small/resolve/main/config.json" + ), "YituTech/conv-bert-small": "https://huggingface.co/YituTech/conv-bert-small/resolve/main/config.json", # See all ConvBERT models at https://huggingface.co/models?filter=convbert } diff --git a/src/transformers/models/convbert/convert_convbert_original_tf1_checkpoint_to_pytorch_and_tf2.py b/src/transformers/models/convbert/convert_convbert_original_tf1_checkpoint_to_pytorch_and_tf2.py index cdea57cc24..3d4ff77987 100644 --- a/src/transformers/models/convbert/convert_convbert_original_tf1_checkpoint_to_pytorch_and_tf2.py +++ b/src/transformers/models/convbert/convert_convbert_original_tf1_checkpoint_to_pytorch_and_tf2.py @@ -45,8 +45,10 @@ if __name__ == "__main__": default=None, type=str, required=True, - help="The config json file corresponding to the pre-trained ConvBERT model. \n" - "This specifies the model architecture.", + help=( + "The config json file corresponding to the pre-trained ConvBERT model. \n" + "This specifies the model architecture." + ), ) parser.add_argument( "--pytorch_dump_path", default=None, type=str, required=True, help="Path to the output PyTorch model." diff --git a/src/transformers/models/convbert/modeling_convbert.py b/src/transformers/models/convbert/modeling_convbert.py index 3a3b44b986..4fbe71fb55 100755 --- a/src/transformers/models/convbert/modeling_convbert.py +++ b/src/transformers/models/convbert/modeling_convbert.py @@ -581,7 +581,8 @@ class ConvBertLayer(nn.Module): if self.is_decoder and encoder_hidden_states is not None: if not hasattr(self, "crossattention"): raise AttributeError( - f"If `encoder_hidden_states` are passed, {self} has to be instantiated with cross-attention layers by setting `config.add_cross_attention=True`" + f"If `encoder_hidden_states` are passed, {self} has to be instantiated with cross-attention layers" + " by setting `config.add_cross_attention=True`" ) cross_attention_outputs = self.crossattention( attention_output, diff --git a/src/transformers/models/convbert/tokenization_convbert.py b/src/transformers/models/convbert/tokenization_convbert.py index a49e32ec00..8bf1b2826e 100644 --- a/src/transformers/models/convbert/tokenization_convbert.py +++ b/src/transformers/models/convbert/tokenization_convbert.py @@ -24,7 +24,9 @@ VOCAB_FILES_NAMES = {"vocab_file": "vocab.txt"} PRETRAINED_VOCAB_FILES_MAP = { "vocab_file": { "YituTech/conv-bert-base": "https://huggingface.co/YituTech/conv-bert-base/resolve/main/vocab.txt", - "YituTech/conv-bert-medium-small": "https://huggingface.co/YituTech/conv-bert-medium-small/resolve/main/vocab.txt", + "YituTech/conv-bert-medium-small": ( + "https://huggingface.co/YituTech/conv-bert-medium-small/resolve/main/vocab.txt" + ), "YituTech/conv-bert-small": "https://huggingface.co/YituTech/conv-bert-small/resolve/main/vocab.txt", } } diff --git a/src/transformers/models/convbert/tokenization_convbert_fast.py b/src/transformers/models/convbert/tokenization_convbert_fast.py index 525e369c4b..383382e130 100644 --- a/src/transformers/models/convbert/tokenization_convbert_fast.py +++ b/src/transformers/models/convbert/tokenization_convbert_fast.py @@ -25,7 +25,9 @@ VOCAB_FILES_NAMES = {"vocab_file": "vocab.txt"} PRETRAINED_VOCAB_FILES_MAP = { "vocab_file": { "YituTech/conv-bert-base": "https://huggingface.co/YituTech/conv-bert-base/resolve/main/vocab.txt", - "YituTech/conv-bert-medium-small": "https://huggingface.co/YituTech/conv-bert-medium-small/resolve/main/vocab.txt", + "YituTech/conv-bert-medium-small": ( + "https://huggingface.co/YituTech/conv-bert-medium-small/resolve/main/vocab.txt" + ), "YituTech/conv-bert-small": "https://huggingface.co/YituTech/conv-bert-small/resolve/main/vocab.txt", } } diff --git a/src/transformers/models/ctrl/modeling_ctrl.py b/src/transformers/models/ctrl/modeling_ctrl.py index 291e12002f..cec2d0d345 100644 --- a/src/transformers/models/ctrl/modeling_ctrl.py +++ b/src/transformers/models/ctrl/modeling_ctrl.py @@ -784,7 +784,7 @@ class CTRLForSequenceClassification(CTRLPreTrainedModel): sequence_lengths = -1 logger.warning( f"{self.__class__.__name__} will not detect padding tokens in `inputs_embeds`. Results may be " - f"unexpected if using padding tokens in conjunction with `inputs_embeds.`" + "unexpected if using padding tokens in conjunction with `inputs_embeds.`" ) pooled_logits = logits[range(batch_size), sequence_lengths] diff --git a/src/transformers/models/ctrl/modeling_tf_ctrl.py b/src/transformers/models/ctrl/modeling_tf_ctrl.py index 7fadc65cff..cdbed79135 100644 --- a/src/transformers/models/ctrl/modeling_tf_ctrl.py +++ b/src/transformers/models/ctrl/modeling_tf_ctrl.py @@ -807,7 +807,7 @@ class TFCTRLForSequenceClassification(TFCTRLPreTrainedModel, TFSequenceClassific sequence_lengths = -1 logger.warning( f"{self.__class__.__name__} will not detect padding tokens in `inputs_embeds`. Results may be " - f"unexpected if using padding tokens in conjunction with `inputs_embeds.`" + "unexpected if using padding tokens in conjunction with `inputs_embeds.`" ) loss = None diff --git a/src/transformers/models/data2vec/configuration_data2vec_audio.py b/src/transformers/models/data2vec/configuration_data2vec_audio.py index 71d455702e..b221c656f4 100644 --- a/src/transformers/models/data2vec/configuration_data2vec_audio.py +++ b/src/transformers/models/data2vec/configuration_data2vec_audio.py @@ -245,10 +245,10 @@ class Data2VecAudioConfig(PretrainedConfig): or (len(self.conv_dim) != self.num_feat_extract_layers) ): raise ValueError( - "Configuration for convolutional layers is incorrect. " - "It is required that `len(config.conv_dim)` == `len(config.conv_stride)` == `len(config.conv_kernel)`, " - f"but is `len(config.conv_dim) = {len(self.conv_dim)}`, `len(config.conv_stride) " - f"= {len(self.conv_stride)}`, `len(config.conv_kernel) = {len(self.conv_kernel)}`." + "Configuration for convolutional layers is incorrect. It is required that `len(config.conv_dim)` ==" + " `len(config.conv_stride)` == `len(config.conv_kernel)`, but is `len(config.conv_dim) =" + f" {len(self.conv_dim)}`, `len(config.conv_stride) = {len(self.conv_stride)}`," + f" `len(config.conv_kernel) = {len(self.conv_kernel)}`." ) # fine-tuning config parameters for SpecAugment: https://arxiv.org/abs/1904.08779 diff --git a/src/transformers/models/data2vec/configuration_data2vec_vision.py b/src/transformers/models/data2vec/configuration_data2vec_vision.py index 5508f4d9e7..a7dd85b817 100644 --- a/src/transformers/models/data2vec/configuration_data2vec_vision.py +++ b/src/transformers/models/data2vec/configuration_data2vec_vision.py @@ -26,7 +26,9 @@ from ...utils import logging logger = logging.get_logger(__name__) DATA2VEC_VISION_PRETRAINED_CONFIG_ARCHIVE_MAP = { - "facebook/data2vec-vision-base-ft": "https://huggingface.co/facebook/data2vec-vision-base-ft/resolve/main/config.json", + "facebook/data2vec-vision-base-ft": ( + "https://huggingface.co/facebook/data2vec-vision-base-ft/resolve/main/config.json" + ), } diff --git a/src/transformers/models/data2vec/convert_data2vec_audio_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/data2vec/convert_data2vec_audio_original_pytorch_checkpoint_to_pytorch.py index e8a703de91..01c2d8cab2 100644 --- a/src/transformers/models/data2vec/convert_data2vec_audio_original_pytorch_checkpoint_to_pytorch.py +++ b/src/transformers/models/data2vec/convert_data2vec_audio_original_pytorch_checkpoint_to_pytorch.py @@ -66,7 +66,8 @@ def set_recursively(hf_pointer, key, value, full_name, weight_type): if hf_shape != value.shape: raise ValueError( - f"Shape of hf {key + '.' + weight_type if weight_type is not None else ''} is {hf_shape}, but should be {value.shape} for {full_name}" + f"Shape of hf {key + '.' + weight_type if weight_type is not None else ''} is {hf_shape}, but should be" + f" {value.shape} for {full_name}" ) if weight_type == "weight": diff --git a/src/transformers/models/data2vec/convert_data2vec_text_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/data2vec/convert_data2vec_text_original_pytorch_checkpoint_to_pytorch.py index 8659e36d9f..9a38b3ae0b 100644 --- a/src/transformers/models/data2vec/convert_data2vec_text_original_pytorch_checkpoint_to_pytorch.py +++ b/src/transformers/models/data2vec/convert_data2vec_text_original_pytorch_checkpoint_to_pytorch.py @@ -98,13 +98,22 @@ def convert_data2vec_checkpoint_to_pytorch( self_attn: BertSelfAttention = layer.attention.self assert data2vec_layer.self_attn.k_proj.weight.data.shape == torch.Size( (config.hidden_size, config.hidden_size) - ), f"Shape for data2vec_layer.self_attn.k_proj.weight.data should be {torch.Size((config.hidden_size, config.hidden_size))}" + ), ( + "Shape for data2vec_layer.self_attn.k_proj.weight.data should be" + f" {torch.Size((config.hidden_size, config.hidden_size))}" + ) assert data2vec_layer.self_attn.q_proj.weight.data.shape == torch.Size( (config.hidden_size, config.hidden_size) - ), f"Shape for data2vec_layer.self_attn.q_proj.weight.data should be {torch.Size((config.hidden_size, config.hidden_size))}" + ), ( + "Shape for data2vec_layer.self_attn.q_proj.weight.data should be" + f" {torch.Size((config.hidden_size, config.hidden_size))}" + ) assert data2vec_layer.self_attn.v_proj.weight.data.shape == torch.Size( (config.hidden_size, config.hidden_size) - ), f"Shape for data2vec_layer.self_attn.v_proj.weight.data should be {torch.Size((config.hidden_size, config.hidden_size))}" + ), ( + "Shape for data2vec_layer.self_attn.v_proj.weight.data should be" + f" {torch.Size((config.hidden_size, config.hidden_size))}" + ) self_attn.query.weight.data = data2vec_layer.self_attn.q_proj.weight self_attn.query.bias.data = data2vec_layer.self_attn.q_proj.bias diff --git a/src/transformers/models/data2vec/modeling_data2vec_audio.py b/src/transformers/models/data2vec/modeling_data2vec_audio.py index 3f255248c1..a4b6e6e65c 100755 --- a/src/transformers/models/data2vec/modeling_data2vec_audio.py +++ b/src/transformers/models/data2vec/modeling_data2vec_audio.py @@ -498,7 +498,8 @@ class Data2VecAudioAttention(nn.Module): if attn_weights.size() != (bsz * self.num_heads, tgt_len, src_len): raise ValueError( - f"Attention weights should be of size {(bsz * self.num_heads, tgt_len, src_len)}, but is {attn_weights.size()}" + f"Attention weights should be of size {(bsz * self.num_heads, tgt_len, src_len)}, but is" + f" {attn_weights.size()}" ) if attention_mask is not None: @@ -514,7 +515,8 @@ class Data2VecAudioAttention(nn.Module): if layer_head_mask is not None: if layer_head_mask.size() != (self.num_heads,): raise ValueError( - f"Head mask for a single layer should be of size {(self.num_heads,)}, but is {layer_head_mask.size()}" + f"Head mask for a single layer should be of size {(self.num_heads,)}, but is" + f" {layer_head_mask.size()}" ) attn_weights = layer_head_mask.view(1, -1, 1, 1) * attn_weights.view(bsz, self.num_heads, tgt_len, src_len) attn_weights = attn_weights.view(bsz * self.num_heads, tgt_len, src_len) @@ -535,7 +537,8 @@ class Data2VecAudioAttention(nn.Module): if attn_output.size() != (bsz * self.num_heads, tgt_len, self.head_dim): raise ValueError( - f"`attn_output` should be of size {(bsz, self.num_heads, tgt_len, self.head_dim)}, but is {attn_output.size()}" + f"`attn_output` should be of size {(bsz, self.num_heads, tgt_len, self.head_dim)}, but is" + f" {attn_output.size()}" ) attn_output = attn_output.view(bsz, self.num_heads, tgt_len, self.head_dim) @@ -1294,7 +1297,8 @@ class Data2VecAudioForAudioFrameClassification(Data2VecAudioPreTrainedModel): if hasattr(config, "add_adapter") and config.add_adapter: raise ValueError( - "Audio frame classification does not support the use of Data2VecAudio adapters (config.add_adapter=True)" + "Audio frame classification does not support the use of Data2VecAudio adapters" + " (config.add_adapter=True)" ) self.data2vec_audio = Data2VecAudioModel(config) num_layers = config.num_hidden_layers + 1 # transformer layers + input embeddings diff --git a/src/transformers/models/data2vec/modeling_data2vec_text.py b/src/transformers/models/data2vec/modeling_data2vec_text.py index 345d075d31..9c85d34617 100644 --- a/src/transformers/models/data2vec/modeling_data2vec_text.py +++ b/src/transformers/models/data2vec/modeling_data2vec_text.py @@ -426,7 +426,8 @@ class Data2VecTextLayer(nn.Module): if self.is_decoder and encoder_hidden_states is not None: if not hasattr(self, "crossattention"): raise ValueError( - f"If `encoder_hidden_states` are passed, {self} has to be instantiated with cross-attention layers by setting `config.add_cross_attention=True`" + f"If `encoder_hidden_states` are passed, {self} has to be instantiated with cross-attention layers" + " by setting `config.add_cross_attention=True`" ) # cross_attn cached key/values tuple is at positions 3,4 of past_key_value tuple diff --git a/src/transformers/models/data2vec/modeling_tf_data2vec_vision.py b/src/transformers/models/data2vec/modeling_tf_data2vec_vision.py index 4c3446dc06..618e66a10d 100644 --- a/src/transformers/models/data2vec/modeling_tf_data2vec_vision.py +++ b/src/transformers/models/data2vec/modeling_tf_data2vec_vision.py @@ -227,7 +227,8 @@ class TFPatchEmbeddings(tf.keras.layers.Layer): if getattr(height, "numpy", None) and getattr(width, "numpy", None): if height != self.image_size[0] or width != self.image_size[1]: raise ValueError( - f"Input image size ({height}*{width}) doesn't match model ({self.image_size[0]}*{self.image_size[1]})." + f"Input image size ({height}*{width}) doesn't match model" + f" ({self.image_size[0]}*{self.image_size[1]})." ) # When running on CPU, `tf.keras.layers.Conv2D` doesn't support `NCHW` format. diff --git a/src/transformers/models/deberta/tokenization_deberta.py b/src/transformers/models/deberta/tokenization_deberta.py index 13bb8b4817..0ff9359fb0 100644 --- a/src/transformers/models/deberta/tokenization_deberta.py +++ b/src/transformers/models/deberta/tokenization_deberta.py @@ -32,7 +32,9 @@ PRETRAINED_VOCAB_FILES_MAP = { "microsoft/deberta-xlarge": "https://huggingface.co/microsoft/deberta-xlarge/resolve/main/vocab.json", "microsoft/deberta-base-mnli": "https://huggingface.co/microsoft/deberta-base-mnli/resolve/main/vocab.json", "microsoft/deberta-large-mnli": "https://huggingface.co/microsoft/deberta-large-mnli/resolve/main/vocab.json", - "microsoft/deberta-xlarge-mnli": "https://huggingface.co/microsoft/deberta-xlarge-mnli/resolve/main/vocab.json", + "microsoft/deberta-xlarge-mnli": ( + "https://huggingface.co/microsoft/deberta-xlarge-mnli/resolve/main/vocab.json" + ), }, "merges_file": { "microsoft/deberta-base": "https://huggingface.co/microsoft/deberta-base/resolve/main/merges.txt", @@ -40,7 +42,9 @@ PRETRAINED_VOCAB_FILES_MAP = { "microsoft/deberta-xlarge": "https://huggingface.co/microsoft/deberta-xlarge/resolve/main/merges.txt", "microsoft/deberta-base-mnli": "https://huggingface.co/microsoft/deberta-base-mnli/resolve/main/merges.txt", "microsoft/deberta-large-mnli": "https://huggingface.co/microsoft/deberta-large-mnli/resolve/main/merges.txt", - "microsoft/deberta-xlarge-mnli": "https://huggingface.co/microsoft/deberta-xlarge-mnli/resolve/main/merges.txt", + "microsoft/deberta-xlarge-mnli": ( + "https://huggingface.co/microsoft/deberta-xlarge-mnli/resolve/main/merges.txt" + ), }, } diff --git a/src/transformers/models/deberta/tokenization_deberta_fast.py b/src/transformers/models/deberta/tokenization_deberta_fast.py index 62deff8b14..5b3852a6ed 100644 --- a/src/transformers/models/deberta/tokenization_deberta_fast.py +++ b/src/transformers/models/deberta/tokenization_deberta_fast.py @@ -33,7 +33,9 @@ PRETRAINED_VOCAB_FILES_MAP = { "microsoft/deberta-xlarge": "https://huggingface.co/microsoft/deberta-xlarge/resolve/main/vocab.json", "microsoft/deberta-base-mnli": "https://huggingface.co/microsoft/deberta-base-mnli/resolve/main/vocab.json", "microsoft/deberta-large-mnli": "https://huggingface.co/microsoft/deberta-large-mnli/resolve/main/vocab.json", - "microsoft/deberta-xlarge-mnli": "https://huggingface.co/microsoft/deberta-xlarge-mnli/resolve/main/vocab.json", + "microsoft/deberta-xlarge-mnli": ( + "https://huggingface.co/microsoft/deberta-xlarge-mnli/resolve/main/vocab.json" + ), }, "merges_file": { "microsoft/deberta-base": "https://huggingface.co/microsoft/deberta-base/resolve/main/merges.txt", @@ -41,7 +43,9 @@ PRETRAINED_VOCAB_FILES_MAP = { "microsoft/deberta-xlarge": "https://huggingface.co/microsoft/deberta-xlarge/resolve/main/merges.txt", "microsoft/deberta-base-mnli": "https://huggingface.co/microsoft/deberta-base-mnli/resolve/main/merges.txt", "microsoft/deberta-large-mnli": "https://huggingface.co/microsoft/deberta-large-mnli/resolve/main/merges.txt", - "microsoft/deberta-xlarge-mnli": "https://huggingface.co/microsoft/deberta-xlarge-mnli/resolve/main/merges.txt", + "microsoft/deberta-xlarge-mnli": ( + "https://huggingface.co/microsoft/deberta-xlarge-mnli/resolve/main/merges.txt" + ), }, } diff --git a/src/transformers/models/deberta_v2/configuration_deberta_v2.py b/src/transformers/models/deberta_v2/configuration_deberta_v2.py index 0f6f268c38..7b81f146b9 100644 --- a/src/transformers/models/deberta_v2/configuration_deberta_v2.py +++ b/src/transformers/models/deberta_v2/configuration_deberta_v2.py @@ -23,8 +23,12 @@ logger = logging.get_logger(__name__) DEBERTA_V2_PRETRAINED_CONFIG_ARCHIVE_MAP = { "microsoft/deberta-v2-xlarge": "https://huggingface.co/microsoft/deberta-v2-xlarge/resolve/main/config.json", "microsoft/deberta-v2-xxlarge": "https://huggingface.co/microsoft/deberta-v2-xxlarge/resolve/main/config.json", - "microsoft/deberta-v2-xlarge-mnli": "https://huggingface.co/microsoft/deberta-v2-xlarge-mnli/resolve/main/config.json", - "microsoft/deberta-v2-xxlarge-mnli": "https://huggingface.co/microsoft/deberta-v2-xxlarge-mnli/resolve/main/config.json", + "microsoft/deberta-v2-xlarge-mnli": ( + "https://huggingface.co/microsoft/deberta-v2-xlarge-mnli/resolve/main/config.json" + ), + "microsoft/deberta-v2-xxlarge-mnli": ( + "https://huggingface.co/microsoft/deberta-v2-xxlarge-mnli/resolve/main/config.json" + ), } diff --git a/src/transformers/models/deberta_v2/tokenization_deberta_v2.py b/src/transformers/models/deberta_v2/tokenization_deberta_v2.py index 577532e1be..123afacf82 100644 --- a/src/transformers/models/deberta_v2/tokenization_deberta_v2.py +++ b/src/transformers/models/deberta_v2/tokenization_deberta_v2.py @@ -28,8 +28,12 @@ PRETRAINED_VOCAB_FILES_MAP = { "vocab_file": { "microsoft/deberta-v2-xlarge": "https://huggingface.co/microsoft/deberta-v2-xlarge/resolve/main/spm.model", "microsoft/deberta-v2-xxlarge": "https://huggingface.co/microsoft/deberta-v2-xxlarge/resolve/main/spm.model", - "microsoft/deberta-v2-xlarge-mnli": "https://huggingface.co/microsoft/deberta-v2-xlarge-mnli/resolve/main/spm.model", - "microsoft/deberta-v2-xxlarge-mnli": "https://huggingface.co/microsoft/deberta-v2-xxlarge-mnli/resolve/main/spm.model", + "microsoft/deberta-v2-xlarge-mnli": ( + "https://huggingface.co/microsoft/deberta-v2-xlarge-mnli/resolve/main/spm.model" + ), + "microsoft/deberta-v2-xxlarge-mnli": ( + "https://huggingface.co/microsoft/deberta-v2-xxlarge-mnli/resolve/main/spm.model" + ), } } @@ -137,8 +141,8 @@ class DebertaV2Tokenizer(PreTrainedTokenizer): if not os.path.isfile(vocab_file): raise ValueError( - f"Can't find a vocabulary file at path '{vocab_file}'. To load the vocabulary from a Google pretrained " - "model use `tokenizer = AutoTokenizer.from_pretrained(PRETRAINED_MODEL_NAME)`" + f"Can't find a vocabulary file at path '{vocab_file}'. To load the vocabulary from a Google pretrained" + " model use `tokenizer = AutoTokenizer.from_pretrained(PRETRAINED_MODEL_NAME)`" ) self.do_lower_case = do_lower_case self.split_by_punct = split_by_punct diff --git a/src/transformers/models/deberta_v2/tokenization_deberta_v2_fast.py b/src/transformers/models/deberta_v2/tokenization_deberta_v2_fast.py index 8aa92180d6..32ccd84862 100644 --- a/src/transformers/models/deberta_v2/tokenization_deberta_v2_fast.py +++ b/src/transformers/models/deberta_v2/tokenization_deberta_v2_fast.py @@ -36,8 +36,12 @@ PRETRAINED_VOCAB_FILES_MAP = { "vocab_file": { "microsoft/deberta-v2-xlarge": "https://huggingface.co/microsoft/deberta-v2-xlarge/resolve/main/spm.model", "microsoft/deberta-v2-xxlarge": "https://huggingface.co/microsoft/deberta-v2-xxlarge/resolve/main/spm.model", - "microsoft/deberta-v2-xlarge-mnli": "https://huggingface.co/microsoft/deberta-v2-xlarge-mnli/resolve/main/spm.model", - "microsoft/deberta-v2-xxlarge-mnli": "https://huggingface.co/microsoft/deberta-v2-xxlarge-mnli/resolve/main/spm.model", + "microsoft/deberta-v2-xlarge-mnli": ( + "https://huggingface.co/microsoft/deberta-v2-xlarge-mnli/resolve/main/spm.model" + ), + "microsoft/deberta-v2-xxlarge-mnli": ( + "https://huggingface.co/microsoft/deberta-v2-xxlarge-mnli/resolve/main/spm.model" + ), } } diff --git a/src/transformers/models/decision_transformer/configuration_decision_transformer.py b/src/transformers/models/decision_transformer/configuration_decision_transformer.py index 389cb0d302..01c74c247b 100644 --- a/src/transformers/models/decision_transformer/configuration_decision_transformer.py +++ b/src/transformers/models/decision_transformer/configuration_decision_transformer.py @@ -21,7 +21,9 @@ from ...utils import logging logger = logging.get_logger(__name__) DECISION_TRANSFORMER_PRETRAINED_CONFIG_ARCHIVE_MAP = { - "edbeeching/decision-transformer-gym-hopper-medium": "https://huggingface.co/edbeeching/decision-transformer-gym-hopper-medium/resolve/main/config.json", + "edbeeching/decision-transformer-gym-hopper-medium": ( + "https://huggingface.co/edbeeching/decision-transformer-gym-hopper-medium/resolve/main/config.json" + ), # See all DecisionTransformer models at https://huggingface.co/models?filter=decision_transformer } diff --git a/src/transformers/models/decision_transformer/modeling_decision_transformer.py b/src/transformers/models/decision_transformer/modeling_decision_transformer.py index dcad3786d8..4f202800b7 100755 --- a/src/transformers/models/decision_transformer/modeling_decision_transformer.py +++ b/src/transformers/models/decision_transformer/modeling_decision_transformer.py @@ -135,7 +135,8 @@ class DecisionTransformerGPT2Attention(nn.Module): self.split_size = self.embed_dim if self.head_dim * self.num_heads != self.embed_dim: raise ValueError( - f"`embed_dim` must be divisible by num_heads (got `embed_dim`: {self.embed_dim} and `num_heads`: {self.num_heads})." + f"`embed_dim` must be divisible by num_heads (got `embed_dim`: {self.embed_dim} and `num_heads`:" + f" {self.num_heads})." ) self.scale_attn_weights = config.scale_attn_weights @@ -290,8 +291,8 @@ class DecisionTransformerGPT2Attention(nn.Module): if encoder_hidden_states is not None: if not hasattr(self, "q_attn"): raise ValueError( - "If class is used as cross attention, the weights `q_attn` have to be defined. " - "Please make sure to instantiate class with `DecisionTransformerGPT2Attention(..., is_cross_attention=True)`." + "If class is used as cross attention, the weights `q_attn` have to be defined. Please make sure to" + " instantiate class with `DecisionTransformerGPT2Attention(..., is_cross_attention=True)`." ) query = self.q_attn(hidden_states) diff --git a/src/transformers/models/deit/configuration_deit.py b/src/transformers/models/deit/configuration_deit.py index 022df1727f..df74664ace 100644 --- a/src/transformers/models/deit/configuration_deit.py +++ b/src/transformers/models/deit/configuration_deit.py @@ -27,7 +27,9 @@ from ...utils import logging logger = logging.get_logger(__name__) DEIT_PRETRAINED_CONFIG_ARCHIVE_MAP = { - "facebook/deit-base-distilled-patch16-224": "https://huggingface.co/facebook/deit-base-patch16-224/resolve/main/config.json", + "facebook/deit-base-distilled-patch16-224": ( + "https://huggingface.co/facebook/deit-base-patch16-224/resolve/main/config.json" + ), # See all DeiT models at https://huggingface.co/models?filter=deit } diff --git a/src/transformers/models/deit/modeling_deit.py b/src/transformers/models/deit/modeling_deit.py index d6fc9d8551..ac429c0a61 100644 --- a/src/transformers/models/deit/modeling_deit.py +++ b/src/transformers/models/deit/modeling_deit.py @@ -570,7 +570,8 @@ class DeiTPooler(nn.Module): @add_start_docstrings( - "DeiT Model with a decoder on top for masked image modeling, as proposed in `SimMIM `__.", + "DeiT Model with a decoder on top for masked image modeling, as proposed in `SimMIM" + " `__.", DEIT_START_DOCSTRING, ) class DeiTForMaskedImageModeling(DeiTPreTrainedModel): diff --git a/src/transformers/models/detr/feature_extraction_detr.py b/src/transformers/models/detr/feature_extraction_detr.py index 15b37fbae7..91e406c71f 100644 --- a/src/transformers/models/detr/feature_extraction_detr.py +++ b/src/transformers/models/detr/feature_extraction_detr.py @@ -538,7 +538,8 @@ class DetrFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMixin): valid_masks_path = True if not valid_masks_path: raise ValueError( - "The path to the directory containing the mask PNG files should be provided as a `pathlib.Path` object." + "The path to the directory containing the mask PNG files should be provided as a" + " `pathlib.Path` object." ) if not is_batched: diff --git a/src/transformers/models/detr/modeling_detr.py b/src/transformers/models/detr/modeling_detr.py index 64f8190d62..d261104ac7 100644 --- a/src/transformers/models/detr/modeling_detr.py +++ b/src/transformers/models/detr/modeling_detr.py @@ -489,7 +489,8 @@ class DetrAttention(nn.Module): self.head_dim = embed_dim // num_heads if self.head_dim * num_heads != self.embed_dim: raise ValueError( - f"embed_dim must be divisible by num_heads (got `embed_dim`: {self.embed_dim} and `num_heads`: {num_heads})." + f"embed_dim must be divisible by num_heads (got `embed_dim`: {self.embed_dim} and `num_heads`:" + f" {num_heads})." ) self.scaling = self.head_dim**-0.5 @@ -553,7 +554,8 @@ class DetrAttention(nn.Module): if attn_weights.size() != (bsz * self.num_heads, tgt_len, src_len): raise ValueError( - f"Attention weights should be of size {(bsz * self.num_heads, tgt_len, src_len)}, but is {attn_weights.size()}" + f"Attention weights should be of size {(bsz * self.num_heads, tgt_len, src_len)}, but is" + f" {attn_weights.size()}" ) if attention_mask is not None: @@ -582,7 +584,8 @@ class DetrAttention(nn.Module): if attn_output.size() != (bsz * self.num_heads, tgt_len, self.head_dim): raise ValueError( - f"`attn_output` should be of size {(bsz, self.num_heads, tgt_len, self.head_dim)}, but is {attn_output.size()}" + f"`attn_output` should be of size {(bsz, self.num_heads, tgt_len, self.head_dim)}, but is" + f" {attn_output.size()}" ) attn_output = attn_output.view(bsz, self.num_heads, tgt_len, self.head_dim) @@ -1714,7 +1717,8 @@ class DetrMaskHeadSmallConv(nn.Module): if dim % 8 != 0: raise ValueError( - "The hidden_size + number of attention heads must be divisible by 8 as the number of groups in GroupNorm is set to 8" + "The hidden_size + number of attention heads must be divisible by 8 as the number of groups in" + " GroupNorm is set to 8" ) inter_dims = [dim, context_dim // 2, context_dim // 4, context_dim // 8, context_dim // 16, context_dim // 64] diff --git a/src/transformers/models/distilbert/configuration_distilbert.py b/src/transformers/models/distilbert/configuration_distilbert.py index 59752bbe7e..c746ad0d64 100644 --- a/src/transformers/models/distilbert/configuration_distilbert.py +++ b/src/transformers/models/distilbert/configuration_distilbert.py @@ -25,12 +25,20 @@ logger = logging.get_logger(__name__) DISTILBERT_PRETRAINED_CONFIG_ARCHIVE_MAP = { "distilbert-base-uncased": "https://huggingface.co/distilbert-base-uncased/resolve/main/config.json", - "distilbert-base-uncased-distilled-squad": "https://huggingface.co/distilbert-base-uncased-distilled-squad/resolve/main/config.json", + "distilbert-base-uncased-distilled-squad": ( + "https://huggingface.co/distilbert-base-uncased-distilled-squad/resolve/main/config.json" + ), "distilbert-base-cased": "https://huggingface.co/distilbert-base-cased/resolve/main/config.json", - "distilbert-base-cased-distilled-squad": "https://huggingface.co/distilbert-base-cased-distilled-squad/resolve/main/config.json", + "distilbert-base-cased-distilled-squad": ( + "https://huggingface.co/distilbert-base-cased-distilled-squad/resolve/main/config.json" + ), "distilbert-base-german-cased": "https://huggingface.co/distilbert-base-german-cased/resolve/main/config.json", - "distilbert-base-multilingual-cased": "https://huggingface.co/distilbert-base-multilingual-cased/resolve/main/config.json", - "distilbert-base-uncased-finetuned-sst-2-english": "https://huggingface.co/distilbert-base-uncased-finetuned-sst-2-english/resolve/main/config.json", + "distilbert-base-multilingual-cased": ( + "https://huggingface.co/distilbert-base-multilingual-cased/resolve/main/config.json" + ), + "distilbert-base-uncased-finetuned-sst-2-english": ( + "https://huggingface.co/distilbert-base-uncased-finetuned-sst-2-english/resolve/main/config.json" + ), } diff --git a/src/transformers/models/distilbert/tokenization_distilbert.py b/src/transformers/models/distilbert/tokenization_distilbert.py index 694c0ad25a..9408ca0b0f 100644 --- a/src/transformers/models/distilbert/tokenization_distilbert.py +++ b/src/transformers/models/distilbert/tokenization_distilbert.py @@ -25,11 +25,17 @@ VOCAB_FILES_NAMES = {"vocab_file": "vocab.txt"} PRETRAINED_VOCAB_FILES_MAP = { "vocab_file": { "distilbert-base-uncased": "https://huggingface.co/distilbert-base-uncased/resolve/main/vocab.txt", - "distilbert-base-uncased-distilled-squad": "https://huggingface.co/distilbert-base-uncased-distilled-squad/resolve/main/vocab.txt", + "distilbert-base-uncased-distilled-squad": ( + "https://huggingface.co/distilbert-base-uncased-distilled-squad/resolve/main/vocab.txt" + ), "distilbert-base-cased": "https://huggingface.co/distilbert-base-cased/resolve/main/vocab.txt", - "distilbert-base-cased-distilled-squad": "https://huggingface.co/distilbert-base-cased-distilled-squad/resolve/main/vocab.txt", + "distilbert-base-cased-distilled-squad": ( + "https://huggingface.co/distilbert-base-cased-distilled-squad/resolve/main/vocab.txt" + ), "distilbert-base-german-cased": "https://huggingface.co/distilbert-base-german-cased/resolve/main/vocab.txt", - "distilbert-base-multilingual-cased": "https://huggingface.co/distilbert-base-multilingual-cased/resolve/main/vocab.txt", + "distilbert-base-multilingual-cased": ( + "https://huggingface.co/distilbert-base-multilingual-cased/resolve/main/vocab.txt" + ), } } diff --git a/src/transformers/models/distilbert/tokenization_distilbert_fast.py b/src/transformers/models/distilbert/tokenization_distilbert_fast.py index 6a4ddfb819..fdd69dc3e0 100644 --- a/src/transformers/models/distilbert/tokenization_distilbert_fast.py +++ b/src/transformers/models/distilbert/tokenization_distilbert_fast.py @@ -26,19 +26,33 @@ VOCAB_FILES_NAMES = {"vocab_file": "vocab.txt", "tokenizer_file": "tokenizer.jso PRETRAINED_VOCAB_FILES_MAP = { "vocab_file": { "distilbert-base-uncased": "https://huggingface.co/distilbert-base-uncased/resolve/main/vocab.txt", - "distilbert-base-uncased-distilled-squad": "https://huggingface.co/distilbert-base-uncased-distilled-squad/resolve/main/vocab.txt", + "distilbert-base-uncased-distilled-squad": ( + "https://huggingface.co/distilbert-base-uncased-distilled-squad/resolve/main/vocab.txt" + ), "distilbert-base-cased": "https://huggingface.co/distilbert-base-cased/resolve/main/vocab.txt", - "distilbert-base-cased-distilled-squad": "https://huggingface.co/distilbert-base-cased-distilled-squad/resolve/main/vocab.txt", + "distilbert-base-cased-distilled-squad": ( + "https://huggingface.co/distilbert-base-cased-distilled-squad/resolve/main/vocab.txt" + ), "distilbert-base-german-cased": "https://huggingface.co/distilbert-base-german-cased/resolve/main/vocab.txt", - "distilbert-base-multilingual-cased": "https://huggingface.co/distilbert-base-multilingual-cased/resolve/main/vocab.txt", + "distilbert-base-multilingual-cased": ( + "https://huggingface.co/distilbert-base-multilingual-cased/resolve/main/vocab.txt" + ), }, "tokenizer_file": { "distilbert-base-uncased": "https://huggingface.co/distilbert-base-uncased/resolve/main/tokenizer.json", - "distilbert-base-uncased-distilled-squad": "https://huggingface.co/distilbert-base-uncased-distilled-squad/resolve/main/tokenizer.json", + "distilbert-base-uncased-distilled-squad": ( + "https://huggingface.co/distilbert-base-uncased-distilled-squad/resolve/main/tokenizer.json" + ), "distilbert-base-cased": "https://huggingface.co/distilbert-base-cased/resolve/main/tokenizer.json", - "distilbert-base-cased-distilled-squad": "https://huggingface.co/distilbert-base-cased-distilled-squad/resolve/main/tokenizer.json", - "distilbert-base-german-cased": "https://huggingface.co/distilbert-base-german-cased/resolve/main/tokenizer.json", - "distilbert-base-multilingual-cased": "https://huggingface.co/distilbert-base-multilingual-cased/resolve/main/tokenizer.json", + "distilbert-base-cased-distilled-squad": ( + "https://huggingface.co/distilbert-base-cased-distilled-squad/resolve/main/tokenizer.json" + ), + "distilbert-base-german-cased": ( + "https://huggingface.co/distilbert-base-german-cased/resolve/main/tokenizer.json" + ), + "distilbert-base-multilingual-cased": ( + "https://huggingface.co/distilbert-base-multilingual-cased/resolve/main/tokenizer.json" + ), }, } diff --git a/src/transformers/models/dpr/configuration_dpr.py b/src/transformers/models/dpr/configuration_dpr.py index 0828f0a92c..799f9aae4e 100644 --- a/src/transformers/models/dpr/configuration_dpr.py +++ b/src/transformers/models/dpr/configuration_dpr.py @@ -21,12 +21,24 @@ from ...utils import logging logger = logging.get_logger(__name__) DPR_PRETRAINED_CONFIG_ARCHIVE_MAP = { - "facebook/dpr-ctx_encoder-single-nq-base": "https://huggingface.co/facebook/dpr-ctx_encoder-single-nq-base/resolve/main/config.json", - "facebook/dpr-question_encoder-single-nq-base": "https://huggingface.co/facebook/dpr-question_encoder-single-nq-base/resolve/main/config.json", - "facebook/dpr-reader-single-nq-base": "https://huggingface.co/facebook/dpr-reader-single-nq-base/resolve/main/config.json", - "facebook/dpr-ctx_encoder-multiset-base": "https://huggingface.co/facebook/dpr-ctx_encoder-multiset-base/resolve/main/config.json", - "facebook/dpr-question_encoder-multiset-base": "https://huggingface.co/facebook/dpr-question_encoder-multiset-base/resolve/main/config.json", - "facebook/dpr-reader-multiset-base": "https://huggingface.co/facebook/dpr-reader-multiset-base/resolve/main/config.json", + "facebook/dpr-ctx_encoder-single-nq-base": ( + "https://huggingface.co/facebook/dpr-ctx_encoder-single-nq-base/resolve/main/config.json" + ), + "facebook/dpr-question_encoder-single-nq-base": ( + "https://huggingface.co/facebook/dpr-question_encoder-single-nq-base/resolve/main/config.json" + ), + "facebook/dpr-reader-single-nq-base": ( + "https://huggingface.co/facebook/dpr-reader-single-nq-base/resolve/main/config.json" + ), + "facebook/dpr-ctx_encoder-multiset-base": ( + "https://huggingface.co/facebook/dpr-ctx_encoder-multiset-base/resolve/main/config.json" + ), + "facebook/dpr-question_encoder-multiset-base": ( + "https://huggingface.co/facebook/dpr-question_encoder-multiset-base/resolve/main/config.json" + ), + "facebook/dpr-reader-multiset-base": ( + "https://huggingface.co/facebook/dpr-reader-multiset-base/resolve/main/config.json" + ), } diff --git a/src/transformers/models/dpr/convert_dpr_original_checkpoint_to_pytorch.py b/src/transformers/models/dpr/convert_dpr_original_checkpoint_to_pytorch.py index c6484581b7..6ea8562024 100644 --- a/src/transformers/models/dpr/convert_dpr_original_checkpoint_to_pytorch.py +++ b/src/transformers/models/dpr/convert_dpr_original_checkpoint_to_pytorch.py @@ -124,7 +124,11 @@ if __name__ == "__main__": parser.add_argument( "--src", type=str, - help="Path to the dpr checkpoint file. They can be downloaded from the official DPR repo https://github.com/facebookresearch/DPR. Note that in the official repo, both encoders are stored in the 'retriever' checkpoints.", + help=( + "Path to the dpr checkpoint file. They can be downloaded from the official DPR repo" + " https://github.com/facebookresearch/DPR. Note that in the official repo, both encoders are stored in the" + " 'retriever' checkpoints." + ), ) parser.add_argument("--dest", type=str, default=None, help="Path to the output PyTorch model directory.") args = parser.parse_args() diff --git a/src/transformers/models/dpr/tokenization_dpr.py b/src/transformers/models/dpr/tokenization_dpr.py index 8edaf2d3d1..208b9c377e 100644 --- a/src/transformers/models/dpr/tokenization_dpr.py +++ b/src/transformers/models/dpr/tokenization_dpr.py @@ -29,32 +29,56 @@ VOCAB_FILES_NAMES = {"vocab_file": "vocab.txt", "tokenizer_file": "tokenizer.jso CONTEXT_ENCODER_PRETRAINED_VOCAB_FILES_MAP = { "vocab_file": { - "facebook/dpr-ctx_encoder-single-nq-base": "https://huggingface.co/facebook/dpr-ctx_encoder-single-nq-base/resolve/main/vocab.txt", - "facebook/dpr-ctx_encoder-multiset-base": "https://huggingface.co/facebook/dpr-ctx_encoder-multiset-base/resolve/main/vocab.txt", + "facebook/dpr-ctx_encoder-single-nq-base": ( + "https://huggingface.co/facebook/dpr-ctx_encoder-single-nq-base/resolve/main/vocab.txt" + ), + "facebook/dpr-ctx_encoder-multiset-base": ( + "https://huggingface.co/facebook/dpr-ctx_encoder-multiset-base/resolve/main/vocab.txt" + ), }, "tokenizer_file": { - "facebook/dpr-ctx_encoder-single-nq-base": "https://huggingface.co/facebook/dpr-ctx_encoder-single-nq-base/resolve/main/tokenizer.json", - "facebook/dpr-ctx_encoder-multiset-base": "https://huggingface.co/facebook/dpr-ctx_encoder-multiset-base/resolve/main/tokenizer.json", + "facebook/dpr-ctx_encoder-single-nq-base": ( + "https://huggingface.co/facebook/dpr-ctx_encoder-single-nq-base/resolve/main/tokenizer.json" + ), + "facebook/dpr-ctx_encoder-multiset-base": ( + "https://huggingface.co/facebook/dpr-ctx_encoder-multiset-base/resolve/main/tokenizer.json" + ), }, } QUESTION_ENCODER_PRETRAINED_VOCAB_FILES_MAP = { "vocab_file": { - "facebook/dpr-question_encoder-single-nq-base": "https://huggingface.co/facebook/dpr-question_encoder-single-nq-base/resolve/main/vocab.txt", - "facebook/dpr-question_encoder-multiset-base": "https://huggingface.co/facebook/dpr-question_encoder-multiset-base/resolve/main/vocab.txt", + "facebook/dpr-question_encoder-single-nq-base": ( + "https://huggingface.co/facebook/dpr-question_encoder-single-nq-base/resolve/main/vocab.txt" + ), + "facebook/dpr-question_encoder-multiset-base": ( + "https://huggingface.co/facebook/dpr-question_encoder-multiset-base/resolve/main/vocab.txt" + ), }, "tokenizer_file": { - "facebook/dpr-question_encoder-single-nq-base": "https://huggingface.co/facebook/dpr-question_encoder-single-nq-base/resolve/main/tokenizer.json", - "facebook/dpr-question_encoder-multiset-base": "https://huggingface.co/facebook/dpr-question_encoder-multiset-base/resolve/main/tokenizer.json", + "facebook/dpr-question_encoder-single-nq-base": ( + "https://huggingface.co/facebook/dpr-question_encoder-single-nq-base/resolve/main/tokenizer.json" + ), + "facebook/dpr-question_encoder-multiset-base": ( + "https://huggingface.co/facebook/dpr-question_encoder-multiset-base/resolve/main/tokenizer.json" + ), }, } READER_PRETRAINED_VOCAB_FILES_MAP = { "vocab_file": { - "facebook/dpr-reader-single-nq-base": "https://huggingface.co/facebook/dpr-reader-single-nq-base/resolve/main/vocab.txt", - "facebook/dpr-reader-multiset-base": "https://huggingface.co/facebook/dpr-reader-multiset-base/resolve/main/vocab.txt", + "facebook/dpr-reader-single-nq-base": ( + "https://huggingface.co/facebook/dpr-reader-single-nq-base/resolve/main/vocab.txt" + ), + "facebook/dpr-reader-multiset-base": ( + "https://huggingface.co/facebook/dpr-reader-multiset-base/resolve/main/vocab.txt" + ), }, "tokenizer_file": { - "facebook/dpr-reader-single-nq-base": "https://huggingface.co/facebook/dpr-reader-single-nq-base/resolve/main/tokenizer.json", - "facebook/dpr-reader-multiset-base": "https://huggingface.co/facebook/dpr-reader-multiset-base/resolve/main/tokenizer.json", + "facebook/dpr-reader-single-nq-base": ( + "https://huggingface.co/facebook/dpr-reader-single-nq-base/resolve/main/tokenizer.json" + ), + "facebook/dpr-reader-multiset-base": ( + "https://huggingface.co/facebook/dpr-reader-multiset-base/resolve/main/tokenizer.json" + ), }, } @@ -342,8 +366,8 @@ class CustomDPRReaderTokenizerMixin: `span_score` order and keeping max `top_spans` spans. Spans longer that `max_answer_length` are ignored. """ scores = [] - for (start_index, start_score) in enumerate(start_logits): - for (answer_length, end_score) in enumerate(end_logits[start_index : start_index + max_answer_length]): + for start_index, start_score in enumerate(start_logits): + for answer_length, end_score in enumerate(end_logits[start_index : start_index + max_answer_length]): scores.append(((start_index, start_index + answer_length), start_score + end_score)) scores = sorted(scores, key=lambda x: x[1], reverse=True) chosen_span_intervals = [] diff --git a/src/transformers/models/dpr/tokenization_dpr_fast.py b/src/transformers/models/dpr/tokenization_dpr_fast.py index ea021dcb6a..486eb9f387 100644 --- a/src/transformers/models/dpr/tokenization_dpr_fast.py +++ b/src/transformers/models/dpr/tokenization_dpr_fast.py @@ -30,32 +30,56 @@ VOCAB_FILES_NAMES = {"vocab_file": "vocab.txt", "tokenizer_file": "tokenizer.jso CONTEXT_ENCODER_PRETRAINED_VOCAB_FILES_MAP = { "vocab_file": { - "facebook/dpr-ctx_encoder-single-nq-base": "https://huggingface.co/facebook/dpr-ctx_encoder-single-nq-base/resolve/main/vocab.txt", - "facebook/dpr-ctx_encoder-multiset-base": "https://huggingface.co/facebook/dpr-ctx_encoder-multiset-base/resolve/main/vocab.txt", + "facebook/dpr-ctx_encoder-single-nq-base": ( + "https://huggingface.co/facebook/dpr-ctx_encoder-single-nq-base/resolve/main/vocab.txt" + ), + "facebook/dpr-ctx_encoder-multiset-base": ( + "https://huggingface.co/facebook/dpr-ctx_encoder-multiset-base/resolve/main/vocab.txt" + ), }, "tokenizer_file": { - "facebook/dpr-ctx_encoder-single-nq-base": "https://huggingface.co/facebook/dpr-ctx_encoder-single-nq-base/resolve/main/tokenizer.json", - "facebook/dpr-ctx_encoder-multiset-base": "https://huggingface.co/facebook/dpr-ctx_encoder-multiset-base/resolve/main/tokenizer.json", + "facebook/dpr-ctx_encoder-single-nq-base": ( + "https://huggingface.co/facebook/dpr-ctx_encoder-single-nq-base/resolve/main/tokenizer.json" + ), + "facebook/dpr-ctx_encoder-multiset-base": ( + "https://huggingface.co/facebook/dpr-ctx_encoder-multiset-base/resolve/main/tokenizer.json" + ), }, } QUESTION_ENCODER_PRETRAINED_VOCAB_FILES_MAP = { "vocab_file": { - "facebook/dpr-question_encoder-single-nq-base": "https://huggingface.co/facebook/dpr-question_encoder-single-nq-base/resolve/main/vocab.txt", - "facebook/dpr-question_encoder-multiset-base": "https://huggingface.co/facebook/dpr-question_encoder-multiset-base/resolve/main/vocab.txt", + "facebook/dpr-question_encoder-single-nq-base": ( + "https://huggingface.co/facebook/dpr-question_encoder-single-nq-base/resolve/main/vocab.txt" + ), + "facebook/dpr-question_encoder-multiset-base": ( + "https://huggingface.co/facebook/dpr-question_encoder-multiset-base/resolve/main/vocab.txt" + ), }, "tokenizer_file": { - "facebook/dpr-question_encoder-single-nq-base": "https://huggingface.co/facebook/dpr-question_encoder-single-nq-base/resolve/main/tokenizer.json", - "facebook/dpr-question_encoder-multiset-base": "https://huggingface.co/facebook/dpr-question_encoder-multiset-base/resolve/main/tokenizer.json", + "facebook/dpr-question_encoder-single-nq-base": ( + "https://huggingface.co/facebook/dpr-question_encoder-single-nq-base/resolve/main/tokenizer.json" + ), + "facebook/dpr-question_encoder-multiset-base": ( + "https://huggingface.co/facebook/dpr-question_encoder-multiset-base/resolve/main/tokenizer.json" + ), }, } READER_PRETRAINED_VOCAB_FILES_MAP = { "vocab_file": { - "facebook/dpr-reader-single-nq-base": "https://huggingface.co/facebook/dpr-reader-single-nq-base/resolve/main/vocab.txt", - "facebook/dpr-reader-multiset-base": "https://huggingface.co/facebook/dpr-reader-multiset-base/resolve/main/vocab.txt", + "facebook/dpr-reader-single-nq-base": ( + "https://huggingface.co/facebook/dpr-reader-single-nq-base/resolve/main/vocab.txt" + ), + "facebook/dpr-reader-multiset-base": ( + "https://huggingface.co/facebook/dpr-reader-multiset-base/resolve/main/vocab.txt" + ), }, "tokenizer_file": { - "facebook/dpr-reader-single-nq-base": "https://huggingface.co/facebook/dpr-reader-single-nq-base/resolve/main/tokenizer.json", - "facebook/dpr-reader-multiset-base": "https://huggingface.co/facebook/dpr-reader-multiset-base/resolve/main/tokenizer.json", + "facebook/dpr-reader-single-nq-base": ( + "https://huggingface.co/facebook/dpr-reader-single-nq-base/resolve/main/tokenizer.json" + ), + "facebook/dpr-reader-multiset-base": ( + "https://huggingface.co/facebook/dpr-reader-multiset-base/resolve/main/tokenizer.json" + ), }, } @@ -342,8 +366,8 @@ class CustomDPRReaderTokenizerMixin: `span_score` order and keeping max `top_spans` spans. Spans longer that `max_answer_length` are ignored. """ scores = [] - for (start_index, start_score) in enumerate(start_logits): - for (answer_length, end_score) in enumerate(end_logits[start_index : start_index + max_answer_length]): + for start_index, start_score in enumerate(start_logits): + for answer_length, end_score in enumerate(end_logits[start_index : start_index + max_answer_length]): scores.append(((start_index, start_index + answer_length), start_score + end_score)) scores = sorted(scores, key=lambda x: x[1], reverse=True) chosen_span_intervals = [] diff --git a/src/transformers/models/electra/configuration_electra.py b/src/transformers/models/electra/configuration_electra.py index 765498ef83..3ea54aa7ca 100644 --- a/src/transformers/models/electra/configuration_electra.py +++ b/src/transformers/models/electra/configuration_electra.py @@ -29,9 +29,15 @@ ELECTRA_PRETRAINED_CONFIG_ARCHIVE_MAP = { "google/electra-small-generator": "https://huggingface.co/google/electra-small-generator/resolve/main/config.json", "google/electra-base-generator": "https://huggingface.co/google/electra-base-generator/resolve/main/config.json", "google/electra-large-generator": "https://huggingface.co/google/electra-large-generator/resolve/main/config.json", - "google/electra-small-discriminator": "https://huggingface.co/google/electra-small-discriminator/resolve/main/config.json", - "google/electra-base-discriminator": "https://huggingface.co/google/electra-base-discriminator/resolve/main/config.json", - "google/electra-large-discriminator": "https://huggingface.co/google/electra-large-discriminator/resolve/main/config.json", + "google/electra-small-discriminator": ( + "https://huggingface.co/google/electra-small-discriminator/resolve/main/config.json" + ), + "google/electra-base-discriminator": ( + "https://huggingface.co/google/electra-base-discriminator/resolve/main/config.json" + ), + "google/electra-large-discriminator": ( + "https://huggingface.co/google/electra-large-discriminator/resolve/main/config.json" + ), } diff --git a/src/transformers/models/electra/convert_electra_original_tf_checkpoint_to_pytorch.py b/src/transformers/models/electra/convert_electra_original_tf_checkpoint_to_pytorch.py index 0e8a5c5917..d5d6376d7b 100644 --- a/src/transformers/models/electra/convert_electra_original_tf_checkpoint_to_pytorch.py +++ b/src/transformers/models/electra/convert_electra_original_tf_checkpoint_to_pytorch.py @@ -59,8 +59,7 @@ if __name__ == "__main__": default=None, type=str, required=True, - help="The config json file corresponding to the pre-trained model. \n" - "This specifies the model architecture.", + help="The config json file corresponding to the pre-trained model. \nThis specifies the model architecture.", ) parser.add_argument( "--pytorch_dump_path", default=None, type=str, required=True, help="Path to the output PyTorch model." @@ -70,8 +69,10 @@ if __name__ == "__main__": default=None, type=str, required=True, - help="Whether to export the generator or the discriminator. Should be a string, either 'discriminator' or " - "'generator'.", + help=( + "Whether to export the generator or the discriminator. Should be a string, either 'discriminator' or " + "'generator'." + ), ) args = parser.parse_args() convert_tf_checkpoint_to_pytorch( diff --git a/src/transformers/models/electra/modeling_electra.py b/src/transformers/models/electra/modeling_electra.py index fa91647c39..f397af9062 100644 --- a/src/transformers/models/electra/modeling_electra.py +++ b/src/transformers/models/electra/modeling_electra.py @@ -487,7 +487,8 @@ class ElectraLayer(nn.Module): if self.is_decoder and encoder_hidden_states is not None: if not hasattr(self, "crossattention"): raise ValueError( - f"If `encoder_hidden_states` are passed, {self} has to be instantiated with cross-attention layers by setting `config.add_cross_attention=True`" + f"If `encoder_hidden_states` are passed, {self} has to be instantiated with cross-attention layers" + " by setting `config.add_cross_attention=True`" ) # cross_attn cached key/values tuple is at positions 3,4 of past_key_value tuple diff --git a/src/transformers/models/electra/modeling_flax_electra.py b/src/transformers/models/electra/modeling_flax_electra.py index 951eb1bc53..3e3a7103f0 100644 --- a/src/transformers/models/electra/modeling_flax_electra.py +++ b/src/transformers/models/electra/modeling_flax_electra.py @@ -193,8 +193,8 @@ class FlaxElectraSelfAttention(nn.Module): self.head_dim = self.config.hidden_size // self.config.num_attention_heads if self.config.hidden_size % self.config.num_attention_heads != 0: raise ValueError( - "`config.hidden_size`: {self.config.hidden_size} has to be a multiple of `config.num_attention_heads`\ - : {self.config.num_attention_heads}" + "`config.hidden_size`: {self.config.hidden_size} has to be a multiple of `config.num_attention_heads` " + " : {self.config.num_attention_heads}" ) self.query = nn.Dense( @@ -548,8 +548,8 @@ class FlaxElectraLayerCollection(nn.Module): if head_mask is not None: if head_mask.shape[0] != (len(self.layers)): raise ValueError( - f"The head_mask should be specified for {len(self.layers)} layers, but it is for \ - {head_mask.shape[0]}." + f"The head_mask should be specified for {len(self.layers)} layers, but it is for " + f" {head_mask.shape[0]}." ) for i, layer in enumerate(self.layers): diff --git a/src/transformers/models/electra/modeling_tf_electra.py b/src/transformers/models/electra/modeling_tf_electra.py index 6483988a30..57f17c8a97 100644 --- a/src/transformers/models/electra/modeling_tf_electra.py +++ b/src/transformers/models/electra/modeling_tf_electra.py @@ -344,8 +344,8 @@ class TFElectraLayer(tf.keras.layers.Layer): if self.is_decoder and encoder_hidden_states is not None: if not hasattr(self, "crossattention"): raise ValueError( - f"If `encoder_hidden_states` are passed, {self} has to be instantiated with cross-attention layers " - "by setting `config.add_cross_attention=True`" + f"If `encoder_hidden_states` are passed, {self} has to be instantiated with cross-attention layers" + " by setting `config.add_cross_attention=True`" ) # cross_attn cached key/values tuple is at positions 3,4 of past_key_value tuple diff --git a/src/transformers/models/electra/tokenization_electra.py b/src/transformers/models/electra/tokenization_electra.py index 9fd5568cde..2feeaaa2a7 100644 --- a/src/transformers/models/electra/tokenization_electra.py +++ b/src/transformers/models/electra/tokenization_electra.py @@ -20,12 +20,22 @@ VOCAB_FILES_NAMES = {"vocab_file": "vocab.txt"} PRETRAINED_VOCAB_FILES_MAP = { "vocab_file": { - "google/electra-small-generator": "https://huggingface.co/google/electra-small-generator/resolve/main/vocab.txt", + "google/electra-small-generator": ( + "https://huggingface.co/google/electra-small-generator/resolve/main/vocab.txt" + ), "google/electra-base-generator": "https://huggingface.co/google/electra-base-generator/resolve/main/vocab.txt", - "google/electra-large-generator": "https://huggingface.co/google/electra-large-generator/resolve/main/vocab.txt", - "google/electra-small-discriminator": "https://huggingface.co/google/electra-small-discriminator/resolve/main/vocab.txt", - "google/electra-base-discriminator": "https://huggingface.co/google/electra-base-discriminator/resolve/main/vocab.txt", - "google/electra-large-discriminator": "https://huggingface.co/google/electra-large-discriminator/resolve/main/vocab.txt", + "google/electra-large-generator": ( + "https://huggingface.co/google/electra-large-generator/resolve/main/vocab.txt" + ), + "google/electra-small-discriminator": ( + "https://huggingface.co/google/electra-small-discriminator/resolve/main/vocab.txt" + ), + "google/electra-base-discriminator": ( + "https://huggingface.co/google/electra-base-discriminator/resolve/main/vocab.txt" + ), + "google/electra-large-discriminator": ( + "https://huggingface.co/google/electra-large-discriminator/resolve/main/vocab.txt" + ), } } diff --git a/src/transformers/models/electra/tokenization_electra_fast.py b/src/transformers/models/electra/tokenization_electra_fast.py index 48a28cc98b..c37163672c 100644 --- a/src/transformers/models/electra/tokenization_electra_fast.py +++ b/src/transformers/models/electra/tokenization_electra_fast.py @@ -21,20 +21,42 @@ VOCAB_FILES_NAMES = {"vocab_file": "vocab.txt", "tokenizer_file": "tokenizer.jso PRETRAINED_VOCAB_FILES_MAP = { "vocab_file": { - "google/electra-small-generator": "https://huggingface.co/google/electra-small-generator/resolve/main/vocab.txt", + "google/electra-small-generator": ( + "https://huggingface.co/google/electra-small-generator/resolve/main/vocab.txt" + ), "google/electra-base-generator": "https://huggingface.co/google/electra-base-generator/resolve/main/vocab.txt", - "google/electra-large-generator": "https://huggingface.co/google/electra-large-generator/resolve/main/vocab.txt", - "google/electra-small-discriminator": "https://huggingface.co/google/electra-small-discriminator/resolve/main/vocab.txt", - "google/electra-base-discriminator": "https://huggingface.co/google/electra-base-discriminator/resolve/main/vocab.txt", - "google/electra-large-discriminator": "https://huggingface.co/google/electra-large-discriminator/resolve/main/vocab.txt", + "google/electra-large-generator": ( + "https://huggingface.co/google/electra-large-generator/resolve/main/vocab.txt" + ), + "google/electra-small-discriminator": ( + "https://huggingface.co/google/electra-small-discriminator/resolve/main/vocab.txt" + ), + "google/electra-base-discriminator": ( + "https://huggingface.co/google/electra-base-discriminator/resolve/main/vocab.txt" + ), + "google/electra-large-discriminator": ( + "https://huggingface.co/google/electra-large-discriminator/resolve/main/vocab.txt" + ), }, "tokenizer_file": { - "google/electra-small-generator": "https://huggingface.co/google/electra-small-generator/resolve/main/tokenizer.json", - "google/electra-base-generator": "https://huggingface.co/google/electra-base-generator/resolve/main/tokenizer.json", - "google/electra-large-generator": "https://huggingface.co/google/electra-large-generator/resolve/main/tokenizer.json", - "google/electra-small-discriminator": "https://huggingface.co/google/electra-small-discriminator/resolve/main/tokenizer.json", - "google/electra-base-discriminator": "https://huggingface.co/google/electra-base-discriminator/resolve/main/tokenizer.json", - "google/electra-large-discriminator": "https://huggingface.co/google/electra-large-discriminator/resolve/main/tokenizer.json", + "google/electra-small-generator": ( + "https://huggingface.co/google/electra-small-generator/resolve/main/tokenizer.json" + ), + "google/electra-base-generator": ( + "https://huggingface.co/google/electra-base-generator/resolve/main/tokenizer.json" + ), + "google/electra-large-generator": ( + "https://huggingface.co/google/electra-large-generator/resolve/main/tokenizer.json" + ), + "google/electra-small-discriminator": ( + "https://huggingface.co/google/electra-small-discriminator/resolve/main/tokenizer.json" + ), + "google/electra-base-discriminator": ( + "https://huggingface.co/google/electra-base-discriminator/resolve/main/tokenizer.json" + ), + "google/electra-large-discriminator": ( + "https://huggingface.co/google/electra-large-discriminator/resolve/main/tokenizer.json" + ), }, } diff --git a/src/transformers/models/encoder_decoder/modeling_encoder_decoder.py b/src/transformers/models/encoder_decoder/modeling_encoder_decoder.py index a5e1b8311f..a7ff6a7e3a 100644 --- a/src/transformers/models/encoder_decoder/modeling_encoder_decoder.py +++ b/src/transformers/models/encoder_decoder/modeling_encoder_decoder.py @@ -35,10 +35,10 @@ logger = logging.get_logger(__name__) _CONFIG_FOR_DOC = "EncoderDecoderConfig" DEPRECATION_WARNING = ( - "Version v4.12.0 introduces a better way to train encoder-decoder models by computing the loss inside the " - "encoder-decoder framework rather than in the decoder itself. You may observe training discrepancies if fine-tuning " - "a model trained with versions anterior to 4.12.0. The decoder_input_ids are now created based on the labels, no " - "need to pass them yourself anymore." + "Version v4.12.0 introduces a better way to train encoder-decoder models by computing the loss inside the" + " encoder-decoder framework rather than in the decoder itself. You may observe training discrepancies if" + " fine-tuning a model trained with versions anterior to 4.12.0. The decoder_input_ids are now created based on the" + " labels, no need to pass them yourself anymore." ) ENCODER_DECODER_START_DOCSTRING = r""" @@ -189,10 +189,10 @@ class EncoderDecoderModel(PreTrainedModel): if config.decoder.cross_attention_hidden_size is not None: if config.decoder.cross_attention_hidden_size != config.encoder.hidden_size: raise ValueError( - "If `cross_attention_hidden_size` is specified in the decoder's configuration, " - "it has to be equal to the encoder's `hidden_size`. " - f"Got {config.decoder.cross_attention_hidden_size} for `config.decoder.cross_attention_hidden_size` " - f"and {config.encoder.hidden_size} for `config.encoder.hidden_size`." + "If `cross_attention_hidden_size` is specified in the decoder's configuration, it has to be equal" + f" to the encoder's `hidden_size`. Got {config.decoder.cross_attention_hidden_size} for" + f" `config.decoder.cross_attention_hidden_size` and {config.encoder.hidden_size} for" + " `config.encoder.hidden_size`." ) # initialize with config @@ -213,11 +213,13 @@ class EncoderDecoderModel(PreTrainedModel): if self.encoder.config.to_dict() != self.config.encoder.to_dict(): logger.warning( - f"Config of the encoder: {self.encoder.__class__} is overwritten by shared encoder config: {self.config.encoder}" + f"Config of the encoder: {self.encoder.__class__} is overwritten by shared encoder config:" + f" {self.config.encoder}" ) if self.decoder.config.to_dict() != self.config.decoder.to_dict(): logger.warning( - f"Config of the decoder: {self.decoder.__class__} is overwritten by shared decoder config: {self.config.decoder}" + f"Config of the decoder: {self.decoder.__class__} is overwritten by shared decoder config:" + f" {self.config.decoder}" ) # make sure that the individual model's config refers to the shared config @@ -401,10 +403,9 @@ class EncoderDecoderModel(PreTrainedModel): if decoder_config.is_decoder is False or decoder_config.add_cross_attention is False: logger.info( - f"Initializing {decoder_pretrained_model_name_or_path} as a decoder model. " - f"Cross attention layers are added to {decoder_pretrained_model_name_or_path} " - f"and randomly initialized if {decoder_pretrained_model_name_or_path}'s architecture allows for " - "cross attention layers." + f"Initializing {decoder_pretrained_model_name_or_path} as a decoder model. Cross attention" + f" layers are added to {decoder_pretrained_model_name_or_path} and randomly initialized if" + f" {decoder_pretrained_model_name_or_path}'s architecture allows for cross attention layers." ) decoder_config.is_decoder = True decoder_config.add_cross_attention = True @@ -572,8 +573,9 @@ class EncoderDecoderModel(PreTrainedModel): def resize_token_embeddings(self, *args, **kwargs): raise NotImplementedError( - "Resizing the embedding layers via the EncoderDecoderModel directly is not supported. " - "Please use the respective methods of the wrapped objects (model.encoder.resize_token_embeddings(...) or model.decoder.resize_token_embeddings(...))" + "Resizing the embedding layers via the EncoderDecoderModel directly is not supported. Please use the" + " respective methods of the wrapped objects (model.encoder.resize_token_embeddings(...) or" + " model.decoder.resize_token_embeddings(...))" ) def _reorder_cache(self, past, beam_idx): diff --git a/src/transformers/models/encoder_decoder/modeling_flax_encoder_decoder.py b/src/transformers/models/encoder_decoder/modeling_flax_encoder_decoder.py index 267b8f40a5..36df84f305 100644 --- a/src/transformers/models/encoder_decoder/modeling_flax_encoder_decoder.py +++ b/src/transformers/models/encoder_decoder/modeling_flax_encoder_decoder.py @@ -330,10 +330,10 @@ class FlaxEncoderDecoderModel(FlaxPreTrainedModel): if config.decoder.cross_attention_hidden_size is not None: if config.decoder.cross_attention_hidden_size != config.encoder.hidden_size: raise ValueError( - "If `cross_attention_hidden_size` is specified in the decoder's configuration, " - "it has to be equal to the encoder's `hidden_size`. " - f"Got {config.decoder.cross_attention_hidden_size} for `config.decoder.cross_attention_hidden_size` " - f"and {config.encoder.hidden_size} for `config.encoder.hidden_size`." + "If `cross_attention_hidden_size` is specified in the decoder's configuration, it has to be equal" + f" to the encoder's `hidden_size`. Got {config.decoder.cross_attention_hidden_size} for" + f" `config.decoder.cross_attention_hidden_size` and {config.encoder.hidden_size} for" + " `config.encoder.hidden_size`." ) module = self.module_class(config=config, dtype=dtype, **kwargs) @@ -354,7 +354,8 @@ class FlaxEncoderDecoderModel(FlaxPreTrainedModel): decoder_batch_size, decoder_sequence_length = decoder_input_ids.shape if not decoder_batch_size == batch_size: raise ValueError( - f"The inputs of encoder and decoder should have the same batch size, but got {batch_size} for encoder and {decoder_batch_size} for decoder." + f"The inputs of encoder and decoder should have the same batch size, but got {batch_size} for encoder" + f" and {decoder_batch_size} for decoder." ) decoder_position_ids = jnp.broadcast_to( jnp.arange(decoder_sequence_length)[None, :], (decoder_batch_size, decoder_sequence_length) @@ -689,7 +690,8 @@ class FlaxEncoderDecoderModel(FlaxPreTrainedModel): # prepare decoder inputs if decoder_input_ids is None: raise ValueError( - "`decoder_input_ids` cannot be `None`. For sequence to sequence training, `decoder_position_ids` must be specified as an input argument." + "`decoder_input_ids` cannot be `None`. For sequence to sequence training, `decoder_position_ids` must" + " be specified as an input argument." ) if decoder_attention_mask is None: decoder_attention_mask = jnp.ones_like(decoder_input_ids) @@ -869,10 +871,9 @@ class FlaxEncoderDecoderModel(FlaxPreTrainedModel): ) if decoder_config.is_decoder is False or decoder_config.add_cross_attention is False: logger.info( - f"Initializing {decoder_pretrained_model_name_or_path} as a decoder model. " - f"Cross attention layers are added to {decoder_pretrained_model_name_or_path} " - f"and randomly initialized if {decoder_pretrained_model_name_or_path}'s architecture allows for " - "cross attention layers." + f"Initializing {decoder_pretrained_model_name_or_path} as a decoder model. Cross attention" + f" layers are added to {decoder_pretrained_model_name_or_path} and randomly initialized if" + f" {decoder_pretrained_model_name_or_path}'s architecture allows for cross attention layers." ) decoder_config.is_decoder = True decoder_config.add_cross_attention = True diff --git a/src/transformers/models/encoder_decoder/modeling_tf_encoder_decoder.py b/src/transformers/models/encoder_decoder/modeling_tf_encoder_decoder.py index 96c93d31ca..5c74e8433e 100644 --- a/src/transformers/models/encoder_decoder/modeling_tf_encoder_decoder.py +++ b/src/transformers/models/encoder_decoder/modeling_tf_encoder_decoder.py @@ -43,10 +43,10 @@ logger = logging.get_logger(__name__) _CONFIG_FOR_DOC = "EncoderDecoderConfig" DEPRECATION_WARNING = ( - "Version v4.17.0 introduces a better way to train encoder-decoder models by computing the loss inside the " - "encoder-decoder framework rather than in the decoder itself. You may observe training discrepancies if fine-tuning " - "a model trained with versions anterior to 4.17.0. The decoder_input_ids are now created based on the labels, no " - "need to pass them yourself anymore." + "Version v4.17.0 introduces a better way to train encoder-decoder models by computing the loss inside the" + " encoder-decoder framework rather than in the decoder itself. You may observe training discrepancies if" + " fine-tuning a model trained with versions anterior to 4.17.0. The decoder_input_ids are now created based on the" + " labels, no need to pass them yourself anymore." ) ENCODER_DECODER_START_DOCSTRING = r""" @@ -211,10 +211,10 @@ class TFEncoderDecoderModel(TFPreTrainedModel, TFCausalLanguageModelingLoss): if config.decoder.cross_attention_hidden_size is not None: if config.decoder.cross_attention_hidden_size != config.encoder.hidden_size: raise ValueError( - "If `cross_attention_hidden_size` is specified in the decoder's configuration, " - "it has to be equal to the encoder's `hidden_size`. " - f"Got {config.decoder.cross_attention_hidden_size} for `config.decoder.cross_attention_hidden_size` " - f"and {config.encoder.hidden_size} for `config.encoder.hidden_size`." + "If `cross_attention_hidden_size` is specified in the decoder's configuration, it has to be equal" + f" to the encoder's `hidden_size`. Got {config.decoder.cross_attention_hidden_size} for" + f" `config.decoder.cross_attention_hidden_size` and {config.encoder.hidden_size} for" + " `config.encoder.hidden_size`." ) # initialize with config @@ -231,11 +231,13 @@ class TFEncoderDecoderModel(TFPreTrainedModel, TFCausalLanguageModelingLoss): if self.encoder.config.to_dict() != self.config.encoder.to_dict(): logger.warning( - f"Config of the encoder: {self.encoder.__class__} is overwritten by shared encoder config: {self.config.encoder}" + f"Config of the encoder: {self.encoder.__class__} is overwritten by shared encoder config:" + f" {self.config.encoder}" ) if self.decoder.config.to_dict() != self.config.decoder.to_dict(): logger.warning( - f"Config of the decoder: {self.decoder.__class__} is overwritten by shared decoder config: {self.config.decoder}" + f"Config of the decoder: {self.decoder.__class__} is overwritten by shared decoder config:" + f" {self.config.decoder}" ) # make sure that the individual model's config refers to the shared config @@ -319,10 +321,10 @@ class TFEncoderDecoderModel(TFPreTrainedModel, TFCausalLanguageModelingLoss): from_pt = kwargs.pop("from_pt", False) if from_pt: raise ValueError( - "Initializing `TFEncoderDecoderModel` from a pytorch checkpoint is not supported currently. " - "Use a tensorflow checkpoint instead. If only the pytorch checkpoints are available, " - "create the encoder and decoder models separately, and use them to initialize `TFEncoderDecoderModel`. " - "Check `TFEncoderDecoderModel.from_encoder_decoder_pretrained()` for more details." + "Initializing `TFEncoderDecoderModel` from a pytorch checkpoint is not supported currently. Use a" + " tensorflow checkpoint instead. If only the pytorch checkpoints are available, create the encoder and" + " decoder models separately, and use them to initialize `TFEncoderDecoderModel`. Check" + " `TFEncoderDecoderModel.from_encoder_decoder_pretrained()` for more details." ) return super().from_pretrained(pretrained_model_name_or_path, *model_args, **kwargs) @@ -450,10 +452,9 @@ class TFEncoderDecoderModel(TFPreTrainedModel, TFCausalLanguageModelingLoss): decoder_config = AutoConfig.from_pretrained(decoder_pretrained_model_name_or_path) if decoder_config.is_decoder is False or decoder_config.add_cross_attention is False: logger.info( - f"Initializing {decoder_pretrained_model_name_or_path} as a decoder model. " - f"Cross attention layers are added to {decoder_pretrained_model_name_or_path} " - f"and randomly initialized if {decoder_pretrained_model_name_or_path}'s architecture allows for " - "cross attention layers." + f"Initializing {decoder_pretrained_model_name_or_path} as a decoder model. Cross attention" + f" layers are added to {decoder_pretrained_model_name_or_path} and randomly initialized if" + f" {decoder_pretrained_model_name_or_path}'s architecture allows for cross attention layers." ) decoder_config.is_decoder = True decoder_config.add_cross_attention = True @@ -702,8 +703,9 @@ class TFEncoderDecoderModel(TFPreTrainedModel, TFCausalLanguageModelingLoss): def resize_token_embeddings(self, *args, **kwargs): raise NotImplementedError( - "Resizing the embedding layers via the TFEncoderDecoderModel directly is not supported." - "Please use the respective methods of the wrapped objects (model.encoder.resize_token_embeddings(...) or model.decoder.resize_token_embeddings(...))" + "Resizing the embedding layers via the TFEncoderDecoderModel directly is not supported.Please use the" + " respective methods of the wrapped objects (model.encoder.resize_token_embeddings(...) or" + " model.decoder.resize_token_embeddings(...))" ) def _reorder_cache(self, past, beam_idx): diff --git a/src/transformers/models/flaubert/tokenization_flaubert.py b/src/transformers/models/flaubert/tokenization_flaubert.py index 828525d756..4fbb3783d8 100644 --- a/src/transformers/models/flaubert/tokenization_flaubert.py +++ b/src/transformers/models/flaubert/tokenization_flaubert.py @@ -32,16 +32,28 @@ VOCAB_FILES_NAMES = { PRETRAINED_VOCAB_FILES_MAP = { "vocab_file": { - "flaubert/flaubert_small_cased": "https://huggingface.co/flaubert/flaubert_small_cased/resolve/main/vocab.json", - "flaubert/flaubert_base_uncased": "https://huggingface.co/flaubert/flaubert_base_uncased/resolve/main/vocab.json", + "flaubert/flaubert_small_cased": ( + "https://huggingface.co/flaubert/flaubert_small_cased/resolve/main/vocab.json" + ), + "flaubert/flaubert_base_uncased": ( + "https://huggingface.co/flaubert/flaubert_base_uncased/resolve/main/vocab.json" + ), "flaubert/flaubert_base_cased": "https://huggingface.co/flaubert/flaubert_base_cased/resolve/main/vocab.json", - "flaubert/flaubert_large_cased": "https://huggingface.co/flaubert/flaubert_large_cased/resolve/main/vocab.json", + "flaubert/flaubert_large_cased": ( + "https://huggingface.co/flaubert/flaubert_large_cased/resolve/main/vocab.json" + ), }, "merges_file": { - "flaubert/flaubert_small_cased": "https://huggingface.co/flaubert/flaubert_small_cased/resolve/main/merges.txt", - "flaubert/flaubert_base_uncased": "https://huggingface.co/flaubert/flaubert_base_uncased/resolve/main/merges.txt", + "flaubert/flaubert_small_cased": ( + "https://huggingface.co/flaubert/flaubert_small_cased/resolve/main/merges.txt" + ), + "flaubert/flaubert_base_uncased": ( + "https://huggingface.co/flaubert/flaubert_base_uncased/resolve/main/merges.txt" + ), "flaubert/flaubert_base_cased": "https://huggingface.co/flaubert/flaubert_base_cased/resolve/main/merges.txt", - "flaubert/flaubert_large_cased": "https://huggingface.co/flaubert/flaubert_large_cased/resolve/main/merges.txt", + "flaubert/flaubert_large_cased": ( + "https://huggingface.co/flaubert/flaubert_large_cased/resolve/main/merges.txt" + ), }, } @@ -130,7 +142,8 @@ class FlaubertTokenizer(XLMTokenizer): lang = "fr" if lang and self.lang2id and lang not in self.lang2id: logger.error( - "Supplied language code not found in lang2id mapping. Please check that your language is supported by the loaded pretrained model." + "Supplied language code not found in lang2id mapping. Please check that your language is supported by" + " the loaded pretrained model." ) if bypass_tokenizer: diff --git a/src/transformers/models/flava/modeling_flava.py b/src/transformers/models/flava/modeling_flava.py index aa40a9174e..c0841a0e27 100644 --- a/src/transformers/models/flava/modeling_flava.py +++ b/src/transformers/models/flava/modeling_flava.py @@ -369,7 +369,8 @@ class PatchEmbeddings(nn.Module): if not interpolate_pos_encoding: if height != self.image_size[0] or width != self.image_size[1]: raise ValueError( - f"Input image size ({height}*{width}) doesn't match model ({self.image_size[0]}*{self.image_size[1]})." + f"Input image size ({height}*{width}) doesn't match model" + f" ({self.image_size[0]}*{self.image_size[1]})." ) x = self.projection(pixel_values).flatten(2).transpose(1, 2) return x @@ -1207,12 +1208,14 @@ class FlavaModel(FlavaPreTrainedModel): if not isinstance(config.text_config, FlavaTextConfig): raise ValueError( - f"config.text_config is expected to be of type FlavaTextConfig but is of type {type(config.text_config)}." + "config.text_config is expected to be of type FlavaTextConfig but is of type" + f" {type(config.text_config)}." ) if not isinstance(config.image_config, FlavaImageConfig): raise ValueError( - f"config.image_config is expected to be of type FlavaImageConfig but is of type {type(config.image_config)}." + "config.image_config is expected to be of type FlavaImageConfig but is of type" + f" {type(config.image_config)}." ) if not isinstance(config.multimodal_config, FlavaMultimodalConfig): @@ -1832,8 +1835,9 @@ class FlavaForPreTraining(FlavaPreTrainedModel): if input_ids_masked is None and input_ids is not None: logger.warning( - "`input_ids_masked` isn't passed which means MLM loss won't be calculated correctly" - "Setting it to `input_ids` so that model can work. Please pass it if this is unintentional. This is usually OKAY if you are doing inference on unmasked text..." + "`input_ids_masked` isn't passed which means MLM loss won't be calculated correctlySetting it to" + " `input_ids` so that model can work. Please pass it if this is unintentional. This is usually OKAY if" + " you are doing inference on unmasked text..." ) input_ids_masked = input_ids diff --git a/src/transformers/models/fnet/convert_fnet_original_flax_checkpoint_to_pytorch.py b/src/transformers/models/fnet/convert_fnet_original_flax_checkpoint_to_pytorch.py index ffb5667f84..27b6563e5d 100644 --- a/src/transformers/models/fnet/convert_fnet_original_flax_checkpoint_to_pytorch.py +++ b/src/transformers/models/fnet/convert_fnet_original_flax_checkpoint_to_pytorch.py @@ -147,8 +147,10 @@ if __name__ == "__main__": default=None, type=str, required=True, - help="The config json file corresponding to the pre-trained FNet model. \n" - "This specifies the model architecture.", + help=( + "The config json file corresponding to the pre-trained FNet model. \n" + "This specifies the model architecture." + ), ) parser.add_argument("--save_path", default=None, type=str, required=True, help="Path to the output model.") args = parser.parse_args() diff --git a/src/transformers/models/fnet/modeling_fnet.py b/src/transformers/models/fnet/modeling_fnet.py index 3c301727a6..8ed6718231 100755 --- a/src/transformers/models/fnet/modeling_fnet.py +++ b/src/transformers/models/fnet/modeling_fnet.py @@ -182,7 +182,8 @@ class FNetBasicFourierTransform(nn.Module): ) else: logging.warning( - "SciPy is needed for DFT matrix calculation and is not found. Using TPU optimized fast fourier transform instead." + "SciPy is needed for DFT matrix calculation and is not found. Using TPU optimized fast fourier" + " transform instead." ) self.fourier_transform = fftn else: @@ -580,7 +581,8 @@ class FNetModel(FNetPreTrainedModel): and self.config.tpu_short_seq_length != seq_length ): raise ValueError( - "The `tpu_short_seq_length` in FNetConfig should be set equal to the sequence length being passed to the model when using TPU optimizations." + "The `tpu_short_seq_length` in FNetConfig should be set equal to the sequence length being passed to" + " the model when using TPU optimizations." ) device = input_ids.device if input_ids is not None else inputs_embeds.device @@ -837,7 +839,8 @@ class FNetForNextSentencePrediction(FNetPreTrainedModel): if "next_sentence_label" in kwargs: warnings.warn( - "The `next_sentence_label` argument is deprecated and will be removed in a future version, use `labels` instead.", + "The `next_sentence_label` argument is deprecated and will be removed in a future version, use" + " `labels` instead.", FutureWarning, ) labels = kwargs.pop("next_sentence_label") diff --git a/src/transformers/models/fsmt/convert_fsmt_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/fsmt/convert_fsmt_original_pytorch_checkpoint_to_pytorch.py index 7257f7faa2..85f5290a9e 100755 --- a/src/transformers/models/fsmt/convert_fsmt_original_pytorch_checkpoint_to_pytorch.py +++ b/src/transformers/models/fsmt/convert_fsmt_original_pytorch_checkpoint_to_pytorch.py @@ -269,7 +269,10 @@ if __name__ == "__main__": default=None, type=str, required=True, - help="Path to the official PyTorch checkpoint file which is expected to reside in the dump dir with dicts, bpecodes, etc.", + help=( + "Path to the official PyTorch checkpoint file which is expected to reside in the dump dir with dicts," + " bpecodes, etc." + ), ) parser.add_argument( "--pytorch_dump_folder_path", default=None, type=str, required=True, help="Path to the output PyTorch model." diff --git a/src/transformers/models/fsmt/modeling_fsmt.py b/src/transformers/models/fsmt/modeling_fsmt.py index 14823c4352..937b8a7128 100644 --- a/src/transformers/models/fsmt/modeling_fsmt.py +++ b/src/transformers/models/fsmt/modeling_fsmt.py @@ -738,9 +738,10 @@ class FSMTDecoder(nn.Module): # check if head_mask has a correct number of layers specified if desired for attn_mask, mask_name in zip([head_mask, cross_attn_head_mask], ["head_mask", "cross_attn_head_mask"]): if attn_mask is not None: - assert attn_mask.size()[0] == ( - len(self.layers) - ), f"The `{mask_name}` should be specified for {len(self.layers)} layers, but it is for {head_mask.size()[0]}." + assert attn_mask.size()[0] == (len(self.layers)), ( + f"The `{mask_name}` should be specified for {len(self.layers)} layers, but it is for" + f" {head_mask.size()[0]}." + ) for idx, decoder_layer in enumerate(self.layers): # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description) if output_hidden_states: diff --git a/src/transformers/models/funnel/configuration_funnel.py b/src/transformers/models/funnel/configuration_funnel.py index 5684427cb7..c792b05638 100644 --- a/src/transformers/models/funnel/configuration_funnel.py +++ b/src/transformers/models/funnel/configuration_funnel.py @@ -25,8 +25,12 @@ FUNNEL_PRETRAINED_CONFIG_ARCHIVE_MAP = { "funnel-transformer/small-base": "https://huggingface.co/funnel-transformer/small-base/resolve/main/config.json", "funnel-transformer/medium": "https://huggingface.co/funnel-transformer/medium/resolve/main/config.json", "funnel-transformer/medium-base": "https://huggingface.co/funnel-transformer/medium-base/resolve/main/config.json", - "funnel-transformer/intermediate": "https://huggingface.co/funnel-transformer/intermediate/resolve/main/config.json", - "funnel-transformer/intermediate-base": "https://huggingface.co/funnel-transformer/intermediate-base/resolve/main/config.json", + "funnel-transformer/intermediate": ( + "https://huggingface.co/funnel-transformer/intermediate/resolve/main/config.json" + ), + "funnel-transformer/intermediate-base": ( + "https://huggingface.co/funnel-transformer/intermediate-base/resolve/main/config.json" + ), "funnel-transformer/large": "https://huggingface.co/funnel-transformer/large/resolve/main/config.json", "funnel-transformer/large-base": "https://huggingface.co/funnel-transformer/large-base/resolve/main/config.json", "funnel-transformer/xlarge": "https://huggingface.co/funnel-transformer/xlarge/resolve/main/config.json", diff --git a/src/transformers/models/funnel/convert_funnel_original_tf_checkpoint_to_pytorch.py b/src/transformers/models/funnel/convert_funnel_original_tf_checkpoint_to_pytorch.py index b13d6dcd10..848101f083 100755 --- a/src/transformers/models/funnel/convert_funnel_original_tf_checkpoint_to_pytorch.py +++ b/src/transformers/models/funnel/convert_funnel_original_tf_checkpoint_to_pytorch.py @@ -51,8 +51,7 @@ if __name__ == "__main__": default=None, type=str, required=True, - help="The config json file corresponding to the pre-trained model. \n" - "This specifies the model architecture.", + help="The config json file corresponding to the pre-trained model. \nThis specifies the model architecture.", ) parser.add_argument( "--pytorch_dump_path", default=None, type=str, required=True, help="Path to the output PyTorch model." diff --git a/src/transformers/models/funnel/modeling_funnel.py b/src/transformers/models/funnel/modeling_funnel.py index 267d32f2a4..5caee872dc 100644 --- a/src/transformers/models/funnel/modeling_funnel.py +++ b/src/transformers/models/funnel/modeling_funnel.py @@ -671,7 +671,7 @@ class FunnelEncoder(nn.Module): pooled_hidden, attention_inputs = self.attention_structure.pre_attention_pooling( hidden, attention_inputs ) - for (layer_index, layer) in enumerate(block): + for layer_index, layer in enumerate(block): for repeat_index in range(self.config.block_repeats[block_index]): do_pooling = (repeat_index == 0) and (layer_index == 0) and pooling_flag if do_pooling: diff --git a/src/transformers/models/funnel/modeling_tf_funnel.py b/src/transformers/models/funnel/modeling_tf_funnel.py index 4e4f95d850..92a4453d1c 100644 --- a/src/transformers/models/funnel/modeling_tf_funnel.py +++ b/src/transformers/models/funnel/modeling_tf_funnel.py @@ -623,7 +623,7 @@ class TFFunnelEncoder(tf.keras.layers.Layer): hidden, attention_inputs ) - for (layer_index, layer) in enumerate(block): + for layer_index, layer in enumerate(block): for repeat_index in range(self.block_repeats[block_index]): do_pooling = (repeat_index == 0) and (layer_index == 0) and pooling_flag if do_pooling: diff --git a/src/transformers/models/funnel/tokenization_funnel.py b/src/transformers/models/funnel/tokenization_funnel.py index bb8b7548e9..250d0d51da 100644 --- a/src/transformers/models/funnel/tokenization_funnel.py +++ b/src/transformers/models/funnel/tokenization_funnel.py @@ -42,13 +42,21 @@ PRETRAINED_VOCAB_FILES_MAP = { "funnel-transformer/small": "https://huggingface.co/funnel-transformer/small/resolve/main/vocab.txt", "funnel-transformer/small-base": "https://huggingface.co/funnel-transformer/small-base/resolve/main/vocab.txt", "funnel-transformer/medium": "https://huggingface.co/funnel-transformer/medium/resolve/main/vocab.txt", - "funnel-transformer/medium-base": "https://huggingface.co/funnel-transformer/medium-base/resolve/main/vocab.txt", - "funnel-transformer/intermediate": "https://huggingface.co/funnel-transformer/intermediate/resolve/main/vocab.txt", - "funnel-transformer/intermediate-base": "https://huggingface.co/funnel-transformer/intermediate-base/resolve/main/vocab.txt", + "funnel-transformer/medium-base": ( + "https://huggingface.co/funnel-transformer/medium-base/resolve/main/vocab.txt" + ), + "funnel-transformer/intermediate": ( + "https://huggingface.co/funnel-transformer/intermediate/resolve/main/vocab.txt" + ), + "funnel-transformer/intermediate-base": ( + "https://huggingface.co/funnel-transformer/intermediate-base/resolve/main/vocab.txt" + ), "funnel-transformer/large": "https://huggingface.co/funnel-transformer/large/resolve/main/vocab.txt", "funnel-transformer/large-base": "https://huggingface.co/funnel-transformer/large-base/resolve/main/vocab.txt", "funnel-transformer/xlarge": "https://huggingface.co/funnel-transformer/xlarge/resolve/main/vocab.txt", - "funnel-transformer/xlarge-base": "https://huggingface.co/funnel-transformer/xlarge-base/resolve/main/vocab.txt", + "funnel-transformer/xlarge-base": ( + "https://huggingface.co/funnel-transformer/xlarge-base/resolve/main/vocab.txt" + ), } } PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = {f"funnel-transformer/{name}": 512 for name in _model_names} diff --git a/src/transformers/models/funnel/tokenization_funnel_fast.py b/src/transformers/models/funnel/tokenization_funnel_fast.py index 9fa7335ea5..159184bf4b 100644 --- a/src/transformers/models/funnel/tokenization_funnel_fast.py +++ b/src/transformers/models/funnel/tokenization_funnel_fast.py @@ -43,25 +43,45 @@ PRETRAINED_VOCAB_FILES_MAP = { "funnel-transformer/small": "https://huggingface.co/funnel-transformer/small/resolve/main/vocab.txt", "funnel-transformer/small-base": "https://huggingface.co/funnel-transformer/small-base/resolve/main/vocab.txt", "funnel-transformer/medium": "https://huggingface.co/funnel-transformer/medium/resolve/main/vocab.txt", - "funnel-transformer/medium-base": "https://huggingface.co/funnel-transformer/medium-base/resolve/main/vocab.txt", - "funnel-transformer/intermediate": "https://huggingface.co/funnel-transformer/intermediate/resolve/main/vocab.txt", - "funnel-transformer/intermediate-base": "https://huggingface.co/funnel-transformer/intermediate-base/resolve/main/vocab.txt", + "funnel-transformer/medium-base": ( + "https://huggingface.co/funnel-transformer/medium-base/resolve/main/vocab.txt" + ), + "funnel-transformer/intermediate": ( + "https://huggingface.co/funnel-transformer/intermediate/resolve/main/vocab.txt" + ), + "funnel-transformer/intermediate-base": ( + "https://huggingface.co/funnel-transformer/intermediate-base/resolve/main/vocab.txt" + ), "funnel-transformer/large": "https://huggingface.co/funnel-transformer/large/resolve/main/vocab.txt", "funnel-transformer/large-base": "https://huggingface.co/funnel-transformer/large-base/resolve/main/vocab.txt", "funnel-transformer/xlarge": "https://huggingface.co/funnel-transformer/xlarge/resolve/main/vocab.txt", - "funnel-transformer/xlarge-base": "https://huggingface.co/funnel-transformer/xlarge-base/resolve/main/vocab.txt", + "funnel-transformer/xlarge-base": ( + "https://huggingface.co/funnel-transformer/xlarge-base/resolve/main/vocab.txt" + ), }, "tokenizer_file": { "funnel-transformer/small": "https://huggingface.co/funnel-transformer/small/resolve/main/tokenizer.json", - "funnel-transformer/small-base": "https://huggingface.co/funnel-transformer/small-base/resolve/main/tokenizer.json", + "funnel-transformer/small-base": ( + "https://huggingface.co/funnel-transformer/small-base/resolve/main/tokenizer.json" + ), "funnel-transformer/medium": "https://huggingface.co/funnel-transformer/medium/resolve/main/tokenizer.json", - "funnel-transformer/medium-base": "https://huggingface.co/funnel-transformer/medium-base/resolve/main/tokenizer.json", - "funnel-transformer/intermediate": "https://huggingface.co/funnel-transformer/intermediate/resolve/main/tokenizer.json", - "funnel-transformer/intermediate-base": "https://huggingface.co/funnel-transformer/intermediate-base/resolve/main/tokenizer.json", + "funnel-transformer/medium-base": ( + "https://huggingface.co/funnel-transformer/medium-base/resolve/main/tokenizer.json" + ), + "funnel-transformer/intermediate": ( + "https://huggingface.co/funnel-transformer/intermediate/resolve/main/tokenizer.json" + ), + "funnel-transformer/intermediate-base": ( + "https://huggingface.co/funnel-transformer/intermediate-base/resolve/main/tokenizer.json" + ), "funnel-transformer/large": "https://huggingface.co/funnel-transformer/large/resolve/main/tokenizer.json", - "funnel-transformer/large-base": "https://huggingface.co/funnel-transformer/large-base/resolve/main/tokenizer.json", + "funnel-transformer/large-base": ( + "https://huggingface.co/funnel-transformer/large-base/resolve/main/tokenizer.json" + ), "funnel-transformer/xlarge": "https://huggingface.co/funnel-transformer/xlarge/resolve/main/tokenizer.json", - "funnel-transformer/xlarge-base": "https://huggingface.co/funnel-transformer/xlarge-base/resolve/main/tokenizer.json", + "funnel-transformer/xlarge-base": ( + "https://huggingface.co/funnel-transformer/xlarge-base/resolve/main/tokenizer.json" + ), }, } PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = {f"funnel-transformer/{name}": 512 for name in _model_names} diff --git a/src/transformers/models/gpt2/convert_gpt2_original_tf_checkpoint_to_pytorch.py b/src/transformers/models/gpt2/convert_gpt2_original_tf_checkpoint_to_pytorch.py index 4d8b465afa..066ba06503 100755 --- a/src/transformers/models/gpt2/convert_gpt2_original_tf_checkpoint_to_pytorch.py +++ b/src/transformers/models/gpt2/convert_gpt2_original_tf_checkpoint_to_pytorch.py @@ -60,8 +60,10 @@ if __name__ == "__main__": "--gpt2_config_file", default="", type=str, - help="An optional config json file corresponding to the pre-trained OpenAI model. \n" - "This specifies the model architecture.", + help=( + "An optional config json file corresponding to the pre-trained OpenAI model. \n" + "This specifies the model architecture." + ), ) args = parser.parse_args() convert_gpt2_checkpoint_to_pytorch(args.gpt2_checkpoint_path, args.gpt2_config_file, args.pytorch_dump_folder_path) diff --git a/src/transformers/models/gpt2/modeling_gpt2.py b/src/transformers/models/gpt2/modeling_gpt2.py index c79dfd76a0..5e981bf9f2 100644 --- a/src/transformers/models/gpt2/modeling_gpt2.py +++ b/src/transformers/models/gpt2/modeling_gpt2.py @@ -146,7 +146,8 @@ class GPT2Attention(nn.Module): self.split_size = self.embed_dim if self.head_dim * self.num_heads != self.embed_dim: raise ValueError( - f"`embed_dim` must be divisible by num_heads (got `embed_dim`: {self.embed_dim} and `num_heads`: {self.num_heads})." + f"`embed_dim` must be divisible by num_heads (got `embed_dim`: {self.embed_dim} and `num_heads`:" + f" {self.num_heads})." ) self.scale_attn_weights = config.scale_attn_weights @@ -1406,7 +1407,7 @@ class GPT2ForSequenceClassification(GPT2PreTrainedModel): sequence_lengths = -1 logger.warning( f"{self.__class__.__name__} will not detect padding tokens in `inputs_embeds`. Results may be " - f"unexpected if using padding tokens in conjunction with `inputs_embeds.`" + "unexpected if using padding tokens in conjunction with `inputs_embeds.`" ) pooled_logits = logits[torch.arange(batch_size, device=self.device), sequence_lengths] diff --git a/src/transformers/models/gpt2/modeling_tf_gpt2.py b/src/transformers/models/gpt2/modeling_tf_gpt2.py index 45d29b6779..2422af5ebc 100644 --- a/src/transformers/models/gpt2/modeling_tf_gpt2.py +++ b/src/transformers/models/gpt2/modeling_tf_gpt2.py @@ -1240,7 +1240,7 @@ class TFGPT2ForSequenceClassification(TFGPT2PreTrainedModel, TFSequenceClassific sequence_lengths = -1 logger.warning( f"{self.__class__.__name__} will not detect padding tokens in `inputs_embeds`. Results may be " - f"unexpected if using padding tokens in conjunction with `inputs_embeds.`" + "unexpected if using padding tokens in conjunction with `inputs_embeds.`" ) loss = None diff --git a/src/transformers/models/gpt_neo/convert_gpt_neo_mesh_tf_to_pytorch.py b/src/transformers/models/gpt_neo/convert_gpt_neo_mesh_tf_to_pytorch.py index 7ee1c17477..4a5fddd0a9 100644 --- a/src/transformers/models/gpt_neo/convert_gpt_neo_mesh_tf_to_pytorch.py +++ b/src/transformers/models/gpt_neo/convert_gpt_neo_mesh_tf_to_pytorch.py @@ -60,8 +60,10 @@ if __name__ == "__main__": default=None, type=str, required=True, - help="The config json file corresponding to the pre-trained mesh-tf model. \n" - "This specifies the model architecture.", + help=( + "The config json file corresponding to the pre-trained mesh-tf model. \n" + "This specifies the model architecture." + ), ) parser.add_argument( "--pytorch_dump_path", default=None, type=str, required=True, help="Path to the output PyTorch model." diff --git a/src/transformers/models/gpt_neo/modeling_gpt_neo.py b/src/transformers/models/gpt_neo/modeling_gpt_neo.py index 9fcbf57c73..8d4dcd9a7c 100755 --- a/src/transformers/models/gpt_neo/modeling_gpt_neo.py +++ b/src/transformers/models/gpt_neo/modeling_gpt_neo.py @@ -155,7 +155,8 @@ class GPTNeoSelfAttention(nn.Module): self.head_dim = self.embed_dim // self.num_heads if self.head_dim * self.num_heads != self.embed_dim: raise ValueError( - f"embed_dim must be divisible by num_heads (got `embed_dim`: {self.embed_dim} and `num_heads`: {self.num_heads})." + f"embed_dim must be divisible by num_heads (got `embed_dim`: {self.embed_dim} and `num_heads`:" + f" {self.num_heads})." ) self.k_proj = nn.Linear(self.embed_dim, self.embed_dim, bias=False) @@ -883,7 +884,7 @@ class GPTNeoForSequenceClassification(GPTNeoPreTrainedModel): sequence_lengths = -1 logger.warning( f"{self.__class__.__name__} will not detect padding tokens in `inputs_embeds`. Results may be " - f"unexpected if using padding tokens in conjunction with `inputs_embeds.`" + "unexpected if using padding tokens in conjunction with `inputs_embeds.`" ) pooled_logits = logits[torch.arange(batch_size, device=self.device), sequence_lengths] diff --git a/src/transformers/models/gptj/modeling_gptj.py b/src/transformers/models/gptj/modeling_gptj.py index d10c266d3f..db58113d96 100755 --- a/src/transformers/models/gptj/modeling_gptj.py +++ b/src/transformers/models/gptj/modeling_gptj.py @@ -111,7 +111,8 @@ class GPTJAttention(nn.Module): self.head_dim = self.embed_dim // self.num_attention_heads if self.head_dim * self.num_attention_heads != self.embed_dim: raise ValueError( - f"embed_dim must be divisible by num_attention_heads (got `embed_dim`: {self.embed_dim} and `num_attention_heads`: {self.num_attention_heads})." + f"embed_dim must be divisible by num_attention_heads (got `embed_dim`: {self.embed_dim} and" + f" `num_attention_heads`: {self.num_attention_heads})." ) self.scale_attn = torch.sqrt(torch.tensor(self.head_dim, dtype=torch.float32)).to(torch.get_default_dtype()) @@ -967,7 +968,7 @@ class GPTJForSequenceClassification(GPTJPreTrainedModel): sequence_lengths = -1 logger.warning( f"{self.__class__.__name__} will not detect padding tokens in `inputs_embeds`. Results may be " - f"unexpected if using padding tokens in conjunction with `inputs_embeds.`" + "unexpected if using padding tokens in conjunction with `inputs_embeds.`" ) pooled_logits = logits[torch.arange(batch_size, device=self.device), sequence_lengths] diff --git a/src/transformers/models/gptj/modeling_tf_gptj.py b/src/transformers/models/gptj/modeling_tf_gptj.py index feaad22eff..6f18848a61 100644 --- a/src/transformers/models/gptj/modeling_tf_gptj.py +++ b/src/transformers/models/gptj/modeling_tf_gptj.py @@ -93,7 +93,8 @@ class TFGPTJAttention(tf.keras.layers.Layer): self.head_dim = self.embed_dim // self.num_attention_heads if self.head_dim * self.num_attention_heads != self.embed_dim: raise ValueError( - f"embed_dim must be divisible by num_attention_heads (got `embed_dim`: {self.embed_dim} and `num_attention_heads`: {self.num_attention_heads})." + f"embed_dim must be divisible by num_attention_heads (got `embed_dim`: {self.embed_dim} and" + f" `num_attention_heads`: {self.num_attention_heads})." ) self.scale_attn = self.head_dim**0.5 self.rotary_dim = config.rotary_dim @@ -929,7 +930,7 @@ class TFGPTJForSequenceClassification(TFGPTJPreTrainedModel, TFSequenceClassific sequence_lengths = -1 logger.warning( f"{self.__class__.__name__} will not detect padding tokens in `inputs_embeds`. Results may be " - f"unexpected if using padding tokens in conjunction with `inputs_embeds.`" + "unexpected if using padding tokens in conjunction with `inputs_embeds.`" ) loss = None diff --git a/src/transformers/models/hubert/configuration_hubert.py b/src/transformers/models/hubert/configuration_hubert.py index 9b104aa9c5..621537f493 100644 --- a/src/transformers/models/hubert/configuration_hubert.py +++ b/src/transformers/models/hubert/configuration_hubert.py @@ -233,10 +233,10 @@ class HubertConfig(PretrainedConfig): or (len(self.conv_dim) != self.num_feat_extract_layers) ): raise ValueError( - "Configuration for convolutional layers is incorrect. " - "It is required that `len(config.conv_dim)` == `len(config.conv_stride)` == `len(config.conv_kernel)`, " - f"but is `len(config.conv_dim) = {len(self.conv_dim)}`, `len(config.conv_stride) " - f"= {len(self.conv_stride)}`, `len(config.conv_kernel) = {len(self.conv_kernel)}`." + "Configuration for convolutional layers is incorrect. It is required that `len(config.conv_dim)` ==" + " `len(config.conv_stride)` == `len(config.conv_kernel)`, but is `len(config.conv_dim) =" + f" {len(self.conv_dim)}`, `len(config.conv_stride) = {len(self.conv_stride)}`," + f" `len(config.conv_kernel) = {len(self.conv_kernel)}`." ) # fine-tuning config parameters for SpecAugment: https://arxiv.org/abs/1904.08779 diff --git a/src/transformers/models/hubert/convert_distilhubert_original_s3prl_checkpoint_to_pytorch.py b/src/transformers/models/hubert/convert_distilhubert_original_s3prl_checkpoint_to_pytorch.py index c1963faa73..d7ba74feda 100644 --- a/src/transformers/models/hubert/convert_distilhubert_original_s3prl_checkpoint_to_pytorch.py +++ b/src/transformers/models/hubert/convert_distilhubert_original_s3prl_checkpoint_to_pytorch.py @@ -51,9 +51,10 @@ def set_recursively(hf_pointer, key, value, full_name, weight_type): else: hf_shape = hf_pointer.shape - assert ( - hf_shape == value.shape - ), f"Shape of hf {key + '.' + weight_type if weight_type is not None else ''} is {hf_shape}, but should be {value.shape} for {full_name}" + assert hf_shape == value.shape, ( + f"Shape of hf {key + '.' + weight_type if weight_type is not None else ''} is {hf_shape}, but should be" + f" {value.shape} for {full_name}" + ) if weight_type == "weight": hf_pointer.weight.data = value @@ -121,28 +122,32 @@ def load_conv_layer(full_name, value, feature_extractor, unused_weights, use_gro if type_id == 0: if "bias" in name: - assert ( - value.shape == feature_extractor.conv_layers[layer_id].conv.bias.data.shape - ), f"{full_name} has size {value.shape}, but {feature_extractor.conv_layers[layer_id].conv.bias.data.shape} was found." + assert value.shape == feature_extractor.conv_layers[layer_id].conv.bias.data.shape, ( + f"{full_name} has size {value.shape}, but" + f" {feature_extractor.conv_layers[layer_id].conv.bias.data.shape} was found." + ) feature_extractor.conv_layers[layer_id].conv.bias.data = value logger.info(f"Feat extract conv layer {layer_id} was initialized from {full_name}.") elif "weight" in name: - assert ( - value.shape == feature_extractor.conv_layers[layer_id].conv.weight.data.shape - ), f"{full_name} has size {value.shape}, but {feature_extractor.conv_layers[layer_id].conv.weight.data.shape} was found." + assert value.shape == feature_extractor.conv_layers[layer_id].conv.weight.data.shape, ( + f"{full_name} has size {value.shape}, but" + f" {feature_extractor.conv_layers[layer_id].conv.weight.data.shape} was found." + ) feature_extractor.conv_layers[layer_id].conv.weight.data = value logger.info(f"Feat extract conv layer {layer_id} was initialized from {full_name}.") elif (type_id == 2 and not use_group_norm) or (type_id == 2 and layer_id == 0 and use_group_norm): if "bias" in name: - assert ( - value.shape == feature_extractor.conv_layers[layer_id].layer_norm.bias.data.shape - ), f"{full_name} has size {value.shape}, but {feature_extractor[layer_id].layer_norm.bias.data.shape} was found." + assert value.shape == feature_extractor.conv_layers[layer_id].layer_norm.bias.data.shape, ( + f"{full_name} has size {value.shape}, but {feature_extractor[layer_id].layer_norm.bias.data.shape} was" + " found." + ) feature_extractor.conv_layers[layer_id].layer_norm.bias.data = value logger.info(f"Feat extract layer norm weight of layer {layer_id} was initialized from {full_name}.") elif "weight" in name: - assert ( - value.shape == feature_extractor.conv_layers[layer_id].layer_norm.weight.data.shape - ), f"{full_name} has size {value.shape}, but {feature_extractor[layer_id].layer_norm.weight.data.shape} was found." + assert value.shape == feature_extractor.conv_layers[layer_id].layer_norm.weight.data.shape, ( + f"{full_name} has size {value.shape}, but" + f" {feature_extractor[layer_id].layer_norm.weight.data.shape} was found." + ) feature_extractor.conv_layers[layer_id].layer_norm.weight.data = value logger.info(f"Feat extract layer norm weight of layer {layer_id} was initialized from {full_name}.") else: diff --git a/src/transformers/models/hubert/convert_hubert_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/hubert/convert_hubert_original_pytorch_checkpoint_to_pytorch.py index dee823e094..9a70fb6db7 100644 --- a/src/transformers/models/hubert/convert_hubert_original_pytorch_checkpoint_to_pytorch.py +++ b/src/transformers/models/hubert/convert_hubert_original_pytorch_checkpoint_to_pytorch.py @@ -64,9 +64,10 @@ def set_recursively(hf_pointer, key, value, full_name, weight_type): else: hf_shape = hf_pointer.shape - assert ( - hf_shape == value.shape - ), f"Shape of hf {key + '.' + weight_type if weight_type is not None else ''} is {hf_shape}, but should be {value.shape} for {full_name}" + assert hf_shape == value.shape, ( + f"Shape of hf {key + '.' + weight_type if weight_type is not None else ''} is {hf_shape}, but should be" + f" {value.shape} for {full_name}" + ) if weight_type == "weight": hf_pointer.weight.data = value @@ -134,28 +135,32 @@ def load_conv_layer(full_name, value, feature_extractor, unused_weights, use_gro if type_id == 0: if "bias" in name: - assert ( - value.shape == feature_extractor.conv_layers[layer_id].conv.bias.data.shape - ), f"{full_name} has size {value.shape}, but {feature_extractor.conv_layers[layer_id].conv.bias.data.shape} was found." + assert value.shape == feature_extractor.conv_layers[layer_id].conv.bias.data.shape, ( + f"{full_name} has size {value.shape}, but" + f" {feature_extractor.conv_layers[layer_id].conv.bias.data.shape} was found." + ) feature_extractor.conv_layers[layer_id].conv.bias.data = value logger.info(f"Feat extract conv layer {layer_id} was initialized from {full_name}.") elif "weight" in name: - assert ( - value.shape == feature_extractor.conv_layers[layer_id].conv.weight.data.shape - ), f"{full_name} has size {value.shape}, but {feature_extractor.conv_layers[layer_id].conv.weight.data.shape} was found." + assert value.shape == feature_extractor.conv_layers[layer_id].conv.weight.data.shape, ( + f"{full_name} has size {value.shape}, but" + f" {feature_extractor.conv_layers[layer_id].conv.weight.data.shape} was found." + ) feature_extractor.conv_layers[layer_id].conv.weight.data = value logger.info(f"Feat extract conv layer {layer_id} was initialized from {full_name}.") elif (type_id == 2 and not use_group_norm) or (type_id == 2 and layer_id == 0 and use_group_norm): if "bias" in name: - assert ( - value.shape == feature_extractor.conv_layers[layer_id].layer_norm.bias.data.shape - ), f"{full_name} has size {value.shape}, but {feature_extractor[layer_id].layer_norm.bias.data.shape} was found." + assert value.shape == feature_extractor.conv_layers[layer_id].layer_norm.bias.data.shape, ( + f"{full_name} has size {value.shape}, but {feature_extractor[layer_id].layer_norm.bias.data.shape} was" + " found." + ) feature_extractor.conv_layers[layer_id].layer_norm.bias.data = value logger.info(f"Feat extract layer norm weight of layer {layer_id} was initialized from {full_name}.") elif "weight" in name: - assert ( - value.shape == feature_extractor.conv_layers[layer_id].layer_norm.weight.data.shape - ), f"{full_name} has size {value.shape}, but {feature_extractor[layer_id].layer_norm.weight.data.shape} was found." + assert value.shape == feature_extractor.conv_layers[layer_id].layer_norm.weight.data.shape, ( + f"{full_name} has size {value.shape}, but" + f" {feature_extractor[layer_id].layer_norm.weight.data.shape} was found." + ) feature_extractor.conv_layers[layer_id].layer_norm.weight.data = value logger.info(f"Feat extract layer norm weight of layer {layer_id} was initialized from {full_name}.") else: diff --git a/src/transformers/models/hubert/modeling_hubert.py b/src/transformers/models/hubert/modeling_hubert.py index 5af0197fb9..de2a3b33f3 100755 --- a/src/transformers/models/hubert/modeling_hubert.py +++ b/src/transformers/models/hubert/modeling_hubert.py @@ -488,7 +488,8 @@ class HubertAttention(nn.Module): if attn_weights.size() != (bsz * self.num_heads, tgt_len, src_len): raise ValueError( - f"Attention weights should be of size {(bsz * self.num_heads, tgt_len, src_len)}, but is {attn_weights.size()}" + f"Attention weights should be of size {(bsz * self.num_heads, tgt_len, src_len)}, but is" + f" {attn_weights.size()}" ) if attention_mask is not None: @@ -504,7 +505,8 @@ class HubertAttention(nn.Module): if layer_head_mask is not None: if layer_head_mask.size() != (self.num_heads,): raise ValueError( - f"Head mask for a single layer should be of size {(self.num_heads,)}, but is {layer_head_mask.size()}" + f"Head mask for a single layer should be of size {(self.num_heads,)}, but is" + f" {layer_head_mask.size()}" ) attn_weights = layer_head_mask.view(1, -1, 1, 1) * attn_weights.view(bsz, self.num_heads, tgt_len, src_len) attn_weights = attn_weights.view(bsz * self.num_heads, tgt_len, src_len) @@ -525,7 +527,8 @@ class HubertAttention(nn.Module): if attn_output.size() != (bsz * self.num_heads, tgt_len, self.head_dim): raise ValueError( - f"`attn_output` should be of size {(bsz, self.num_heads, tgt_len, self.head_dim)}, but is {attn_output.size()}" + f"`attn_output` should be of size {(bsz, self.num_heads, tgt_len, self.head_dim)}, but is" + f" {attn_output.size()}" ) attn_output = attn_output.view(bsz, self.num_heads, tgt_len, self.head_dim) diff --git a/src/transformers/models/hubert/modeling_tf_hubert.py b/src/transformers/models/hubert/modeling_tf_hubert.py index 540090871f..d659d2cacb 100644 --- a/src/transformers/models/hubert/modeling_tf_hubert.py +++ b/src/transformers/models/hubert/modeling_tf_hubert.py @@ -95,12 +95,14 @@ def input_values_processing(func, config, input_values, **kwargs): output[parameter_names[i]] = input else: raise ValueError( - f"Data of type {type(input)} is not allowed only {allowed_types} is accepted for {parameter_names[i]}." + f"Data of type {type(input)} is not allowed only {allowed_types} is accepted for" + f" {parameter_names[i]}." ) elif isinstance(input_values, Mapping): if "inputs" in input_values: warnings.warn( - "The `inputs` argument is deprecated and will be removed in a future version, use `input_values` instead.", + "The `inputs` argument is deprecated and will be removed in a future version, use `input_values`" + " instead.", FutureWarning, ) @@ -108,7 +110,8 @@ def input_values_processing(func, config, input_values, **kwargs): if "decoder_cached_states" in input_values: warnings.warn( - "The `decoder_cached_states` argument is deprecated and will be removed in a future version, use `past_key_values` instead.", + "The `decoder_cached_states` argument is deprecated and will be removed in a future version, use" + " `past_key_values` instead.", FutureWarning, ) output["past_key_values"] = input_values.pop("decoder_cached_states") @@ -128,7 +131,8 @@ def input_values_processing(func, config, input_values, **kwargs): output[parameter_names[0]] = input_values else: raise ValueError( - f"Data of type {type(input_values)} is not allowed only {allowed_types} is accepted for {parameter_names[0]}." + f"Data of type {type(input_values)} is not allowed only {allowed_types} is accepted for" + f" {parameter_names[0]}." ) for name in parameter_names: @@ -219,7 +223,8 @@ def _compute_mask_indices( if mask_length > sequence_length: raise ValueError( - f"`mask_length` has to be smaller than `sequence_length`, but got `mask_length`: {mask_length} and `sequence_length`: {sequence_length}`" + f"`mask_length` has to be smaller than `sequence_length`, but got `mask_length`: {mask_length} and" + f" `sequence_length`: {sequence_length}`" ) # compute number of masked spans in batch num_masked_spans = int(mask_prob * sequence_length / mask_length + tf.random.uniform((1,))) @@ -408,9 +413,11 @@ class TFHubertGroupNorm(tf.keras.layers.Layer): dim = input_shape[self.axis] if dim is None: raise ValueError( - "Axis " + str(self.axis) + " of " - "input tensor should have a defined dimension " - "but the layer received an input with shape " + str(input_shape) + "." + "Axis " + + str(self.axis) + + " of input tensor should have a defined dimension but the layer received an input with shape " + + str(input_shape) + + "." ) def _set_number_of_groups_for_instance_norm(self, input_shape): @@ -424,22 +431,27 @@ class TFHubertGroupNorm(tf.keras.layers.Layer): dim = input_shape[self.axis] if dim < self.groups: raise ValueError( - "Number of groups (" + str(self.groups) + ") cannot be " - "more than the number of channels (" + str(dim) + ")." + "Number of groups (" + + str(self.groups) + + ") cannot be more than the number of channels (" + + str(dim) + + ")." ) if dim % self.groups != 0: raise ValueError( - "Number of groups (" + str(self.groups) + ") must be a " - "multiple of the number of channels (" + str(dim) + ")." + "Number of groups (" + + str(self.groups) + + ") must be a multiple of the number of channels (" + + str(dim) + + ")." ) def _check_axis(self): if self.axis == 0: raise ValueError( - "You are trying to normalize your batch axis. Do you want to " - "use tf.layer.batch_normalization instead" + "You are trying to normalize your batch axis. Do you want to use tf.layer.batch_normalization instead" ) def _create_input_spec(self, input_shape): @@ -809,7 +821,10 @@ class TFHubertAttention(tf.keras.layers.Layer): tf.debugging.assert_equal( shape_list(attn_weights), [bsz * self.num_heads, tgt_len, src_len], - message=f"Attention weights should be of size {(bsz * self.num_heads, tgt_len, src_len)}, but is {shape_list(attn_weights)}", + message=( + f"Attention weights should be of size {(bsz * self.num_heads, tgt_len, src_len)}, but is" + f" {shape_list(attn_weights)}" + ), ) if attention_mask is not None: @@ -819,7 +834,10 @@ class TFHubertAttention(tf.keras.layers.Layer): tf.debugging.assert_equal( shape_list(attention_mask), [bsz, 1, tgt_len, src_len], - message=f"Attention mask should be of size {(bsz, 1, tgt_len, src_len)}, but is {shape_list(attention_mask)}", + message=( + f"Attention mask should be of size {(bsz, 1, tgt_len, src_len)}, but is" + f" {shape_list(attention_mask)}" + ), ) attention_mask = tf.cast(attention_mask, dtype=attn_weights.dtype) @@ -835,7 +853,10 @@ class TFHubertAttention(tf.keras.layers.Layer): tf.debugging.assert_equal( shape_list(layer_head_mask), [self.num_heads], - message=f"Head mask for a single layer should be of size {(self.num_heads)}, but is {shape_list(layer_head_mask)}", + message=( + f"Head mask for a single layer should be of size {(self.num_heads)}, but is" + f" {shape_list(layer_head_mask)}" + ), ) attn_weights = tf.reshape(layer_head_mask, (1, -1, 1, 1)) * tf.reshape( @@ -852,7 +873,10 @@ class TFHubertAttention(tf.keras.layers.Layer): tf.debugging.assert_equal( shape_list(attn_output), [bsz * self.num_heads, tgt_len, self.head_dim], - message=f"`attn_output` should be of size {(bsz, self.num_heads, tgt_len, self.head_dim)}, but is {shape_list(attn_output)}", + message=( + f"`attn_output` should be of size {(bsz, self.num_heads, tgt_len, self.head_dim)}, but is" + f" {shape_list(attn_output)}" + ), ) attn_output = tf.transpose( diff --git a/src/transformers/models/ibert/configuration_ibert.py b/src/transformers/models/ibert/configuration_ibert.py index 17f6d37e7d..32d4d2e56a 100644 --- a/src/transformers/models/ibert/configuration_ibert.py +++ b/src/transformers/models/ibert/configuration_ibert.py @@ -29,7 +29,9 @@ logger = logging.get_logger(__name__) IBERT_PRETRAINED_CONFIG_ARCHIVE_MAP = { "kssteven/ibert-roberta-base": "https://huggingface.co/kssteven/ibert-roberta-base/resolve/main/config.json", "kssteven/ibert-roberta-large": "https://huggingface.co/kssteven/ibert-roberta-large/resolve/main/config.json", - "kssteven/ibert-roberta-large-mnli": "https://huggingface.co/kssteven/ibert-roberta-large-mnli/resolve/main/config.json", + "kssteven/ibert-roberta-large-mnli": ( + "https://huggingface.co/kssteven/ibert-roberta-large-mnli/resolve/main/config.json" + ), } diff --git a/src/transformers/models/imagegpt/modeling_imagegpt.py b/src/transformers/models/imagegpt/modeling_imagegpt.py index 22186a6159..c51dada0ed 100755 --- a/src/transformers/models/imagegpt/modeling_imagegpt.py +++ b/src/transformers/models/imagegpt/modeling_imagegpt.py @@ -200,7 +200,8 @@ class ImageGPTAttention(nn.Module): self.split_size = self.embed_dim if self.head_dim * self.num_heads != self.embed_dim: raise ValueError( - f"`embed_dim` must be divisible by num_heads (got `embed_dim`: {self.embed_dim} and `num_heads`: {self.num_heads})." + f"`embed_dim` must be divisible by num_heads (got `embed_dim`: {self.embed_dim} and `num_heads`:" + f" {self.num_heads})." ) self.scale_attn_weights = config.scale_attn_weights @@ -699,14 +700,14 @@ class ImageGPTModel(ImageGPTPreTrainedModel): if "pixel_values" in kwargs: warnings.warn( - "The `pixel_values` argument is deprecated and will be removed in a future version, use `input_ids` instead.", + "The `pixel_values` argument is deprecated and will be removed in a future version, use `input_ids`" + " instead.", FutureWarning, ) if input_ids is not None: raise ValueError( - "You cannot pass both `pixel_values` and `input_ids`. " - "Please make sure to only pass `input_ids`." + "You cannot pass both `pixel_values` and `input_ids`. Please make sure to only pass `input_ids`." ) input_ids = kwargs.pop("pixel_values") @@ -1010,14 +1011,14 @@ class ImageGPTForCausalImageModeling(ImageGPTPreTrainedModel): if "pixel_values" in kwargs: warnings.warn( - "The `pixel_values` argument is deprecated and will be removed in a future version, use `input_ids` instead.", + "The `pixel_values` argument is deprecated and will be removed in a future version, use `input_ids`" + " instead.", FutureWarning, ) if input_ids is not None: raise ValueError( - "You cannot pass both `pixel_values` and `input_ids`. " - "Please make sure to only pass `input_ids`." + "You cannot pass both `pixel_values` and `input_ids`. Please make sure to only pass `input_ids`." ) input_ids = kwargs.pop("pixel_values") @@ -1143,14 +1144,14 @@ class ImageGPTForImageClassification(ImageGPTPreTrainedModel): if "pixel_values" in kwargs: warnings.warn( - "The `pixel_values` argument is deprecated and will be removed in a future version, use `input_ids` instead.", + "The `pixel_values` argument is deprecated and will be removed in a future version, use `input_ids`" + " instead.", FutureWarning, ) if input_ids is not None: raise ValueError( - "You cannot pass both `pixel_values` and `input_ids`. " - "Please make sure to only pass `input_ids`." + "You cannot pass both `pixel_values` and `input_ids`. Please make sure to only pass `input_ids`." ) input_ids = kwargs.pop("pixel_values") diff --git a/src/transformers/models/layoutlm/configuration_layoutlm.py b/src/transformers/models/layoutlm/configuration_layoutlm.py index 9b77b2ce3f..94100791d3 100644 --- a/src/transformers/models/layoutlm/configuration_layoutlm.py +++ b/src/transformers/models/layoutlm/configuration_layoutlm.py @@ -27,8 +27,12 @@ from ..bert.configuration_bert import BertConfig logger = logging.get_logger(__name__) LAYOUTLM_PRETRAINED_CONFIG_ARCHIVE_MAP = { - "microsoft/layoutlm-base-uncased": "https://huggingface.co/microsoft/layoutlm-base-uncased/resolve/main/config.json", - "microsoft/layoutlm-large-uncased": "https://huggingface.co/microsoft/layoutlm-large-uncased/resolve/main/config.json", + "microsoft/layoutlm-base-uncased": ( + "https://huggingface.co/microsoft/layoutlm-base-uncased/resolve/main/config.json" + ), + "microsoft/layoutlm-large-uncased": ( + "https://huggingface.co/microsoft/layoutlm-large-uncased/resolve/main/config.json" + ), } diff --git a/src/transformers/models/layoutlm/modeling_layoutlm.py b/src/transformers/models/layoutlm/modeling_layoutlm.py index 174813ffb2..25c9db5d57 100644 --- a/src/transformers/models/layoutlm/modeling_layoutlm.py +++ b/src/transformers/models/layoutlm/modeling_layoutlm.py @@ -398,7 +398,8 @@ class LayoutLMLayer(nn.Module): if self.is_decoder and encoder_hidden_states is not None: if not hasattr(self, "crossattention"): raise ValueError( - f"If `encoder_hidden_states` are passed, {self} has to be instantiated with cross-attention layers by setting `config.add_cross_attention=True`" + f"If `encoder_hidden_states` are passed, {self} has to be instantiated with cross-attention layers" + " by setting `config.add_cross_attention=True`" ) # cross_attn cached key/values tuple is at positions 3,4 of past_key_value tuple diff --git a/src/transformers/models/layoutlm/modeling_tf_layoutlm.py b/src/transformers/models/layoutlm/modeling_tf_layoutlm.py index b184cb352e..d15fc29b73 100644 --- a/src/transformers/models/layoutlm/modeling_tf_layoutlm.py +++ b/src/transformers/models/layoutlm/modeling_tf_layoutlm.py @@ -453,8 +453,8 @@ class TFLayoutLMLayer(tf.keras.layers.Layer): if self.is_decoder and encoder_hidden_states is not None: if not hasattr(self, "crossattention"): raise ValueError( - f"If `encoder_hidden_states` are passed, {self} has to be instantiated with cross-attention layers " - "by setting `config.add_cross_attention=True`" + f"If `encoder_hidden_states` are passed, {self} has to be instantiated with cross-attention layers" + " by setting `config.add_cross_attention=True`" ) # cross_attn cached key/values tuple is at positions 3,4 of past_key_value tuple diff --git a/src/transformers/models/layoutlm/tokenization_layoutlm.py b/src/transformers/models/layoutlm/tokenization_layoutlm.py index 6ef9a9c3a0..1cd0a5f6e0 100644 --- a/src/transformers/models/layoutlm/tokenization_layoutlm.py +++ b/src/transformers/models/layoutlm/tokenization_layoutlm.py @@ -25,8 +25,12 @@ VOCAB_FILES_NAMES = {"vocab_file": "vocab.txt"} PRETRAINED_VOCAB_FILES_MAP = { "vocab_file": { - "microsoft/layoutlm-base-uncased": "https://huggingface.co/microsoft/layoutlm-base-uncased/resolve/main/vocab.txt", - "microsoft/layoutlm-large-uncased": "https://huggingface.co/microsoft/layoutlm-large-uncased/resolve/main/vocab.txt", + "microsoft/layoutlm-base-uncased": ( + "https://huggingface.co/microsoft/layoutlm-base-uncased/resolve/main/vocab.txt" + ), + "microsoft/layoutlm-large-uncased": ( + "https://huggingface.co/microsoft/layoutlm-large-uncased/resolve/main/vocab.txt" + ), } } diff --git a/src/transformers/models/layoutlm/tokenization_layoutlm_fast.py b/src/transformers/models/layoutlm/tokenization_layoutlm_fast.py index 90ba0a94fe..a614c3e615 100644 --- a/src/transformers/models/layoutlm/tokenization_layoutlm_fast.py +++ b/src/transformers/models/layoutlm/tokenization_layoutlm_fast.py @@ -26,12 +26,20 @@ VOCAB_FILES_NAMES = {"vocab_file": "vocab.txt", "tokenizer_file": "tokenizer.jso PRETRAINED_VOCAB_FILES_MAP = { "vocab_file": { - "microsoft/layoutlm-base-uncased": "https://huggingface.co/microsoft/layoutlm-base-uncased/resolve/main/vocab.txt", - "microsoft/layoutlm-large-uncased": "https://huggingface.co/microsoft/layoutlm-large-uncased/resolve/main/vocab.txt", + "microsoft/layoutlm-base-uncased": ( + "https://huggingface.co/microsoft/layoutlm-base-uncased/resolve/main/vocab.txt" + ), + "microsoft/layoutlm-large-uncased": ( + "https://huggingface.co/microsoft/layoutlm-large-uncased/resolve/main/vocab.txt" + ), }, "tokenizer_file": { - "microsoft/layoutlm-base-uncased": "https://huggingface.co/microsoft/layoutlm-base-uncased/resolve/main/tokenizer.json", - "microsoft/layoutlm-large-uncased": "https://huggingface.co/microsoft/layoutlm-large-uncased/resolve/main/tokenizer.json", + "microsoft/layoutlm-base-uncased": ( + "https://huggingface.co/microsoft/layoutlm-base-uncased/resolve/main/tokenizer.json" + ), + "microsoft/layoutlm-large-uncased": ( + "https://huggingface.co/microsoft/layoutlm-large-uncased/resolve/main/tokenizer.json" + ), }, } diff --git a/src/transformers/models/layoutlmv2/tokenization_layoutlmv2.py b/src/transformers/models/layoutlmv2/tokenization_layoutlmv2.py index b750ede185..f00420e640 100644 --- a/src/transformers/models/layoutlmv2/tokenization_layoutlmv2.py +++ b/src/transformers/models/layoutlmv2/tokenization_layoutlmv2.py @@ -38,8 +38,12 @@ VOCAB_FILES_NAMES = {"vocab_file": "vocab.txt"} PRETRAINED_VOCAB_FILES_MAP = { "vocab_file": { - "microsoft/layoutlmv2-base-uncased": "https://huggingface.co/microsoft/layoutlmv2-base-uncased/resolve/main/vocab.txt", - "microsoft/layoutlmv2-large-uncased": "https://huggingface.co/microsoft/layoutlmv2-large-uncased/resolve/main/vocab.txt", + "microsoft/layoutlmv2-base-uncased": ( + "https://huggingface.co/microsoft/layoutlmv2-base-uncased/resolve/main/vocab.txt" + ), + "microsoft/layoutlmv2-large-uncased": ( + "https://huggingface.co/microsoft/layoutlmv2-large-uncased/resolve/main/vocab.txt" + ), } } @@ -255,8 +259,8 @@ class LayoutLMv2Tokenizer(PreTrainedTokenizer): if not os.path.isfile(vocab_file): raise ValueError( - f"Can't find a vocabulary file at path '{vocab_file}'. To load the vocabulary from a Google pretrained " - "model use `tokenizer = BertTokenizer.from_pretrained(PRETRAINED_MODEL_NAME)`" + f"Can't find a vocabulary file at path '{vocab_file}'. To load the vocabulary from a Google pretrained" + " model use `tokenizer = BertTokenizer.from_pretrained(PRETRAINED_MODEL_NAME)`" ) self.vocab = load_vocab(vocab_file) self.ids_to_tokens = collections.OrderedDict([(ids, tok) for tok, ids in self.vocab.items()]) @@ -508,7 +512,8 @@ class LayoutLMv2Tokenizer(PreTrainedTokenizer): if is_batched: if text_pair is not None and len(text) != len(text_pair): raise ValueError( - f"batch length of `text`: {len(text)} does not match batch length of `text_pair`: {len(text_pair)}." + f"batch length of `text`: {len(text)} does not match batch length of `text_pair`:" + f" {len(text_pair)}." ) batch_text_or_text_pairs = list(zip(text, text_pair)) if text_pair is not None else text is_pair = bool(text_pair is not None) @@ -1200,16 +1205,17 @@ class LayoutLMv2Tokenizer(PreTrainedTokenizer): ) if truncation_strategy == TruncationStrategy.ONLY_FIRST: error_msg = ( - error_msg + "Please select another truncation strategy than " + error_msg + + "Please select another truncation strategy than " f"{truncation_strategy}, for instance 'longest_first' or 'only_second'." ) logger.error(error_msg) elif truncation_strategy == TruncationStrategy.LONGEST_FIRST: logger.warning( - f"Be aware, overflowing tokens are not returned for the setting you have chosen," + "Be aware, overflowing tokens are not returned for the setting you have chosen," f" i.e. sequence pairs with the '{TruncationStrategy.LONGEST_FIRST.value}' " - f"truncation strategy. So the returned list will always be empty even if some " - f"tokens have been removed." + "truncation strategy. So the returned list will always be empty even if some " + "tokens have been removed." ) for _ in range(num_tokens_to_remove): if pair_ids is None or len(ids) > len(pair_ids): @@ -1231,7 +1237,7 @@ class LayoutLMv2Tokenizer(PreTrainedTokenizer): f"We need to remove {num_tokens_to_remove} to truncate the input " f"but the second sequence has a length {len(pair_ids)}. " f"Please select another truncation strategy than {truncation_strategy}, " - f"for instance 'longest_first' or 'only_first'." + "for instance 'longest_first' or 'only_first'." ) return ( diff --git a/src/transformers/models/layoutlmv2/tokenization_layoutlmv2_fast.py b/src/transformers/models/layoutlmv2/tokenization_layoutlmv2_fast.py index 2cc0de63ad..27a9849548 100644 --- a/src/transformers/models/layoutlmv2/tokenization_layoutlmv2_fast.py +++ b/src/transformers/models/layoutlmv2/tokenization_layoutlmv2_fast.py @@ -47,10 +47,14 @@ VOCAB_FILES_NAMES = {"vocab_file": "vocab.txt", "tokenizer_file": "tokenizer.jso PRETRAINED_VOCAB_FILES_MAP = { "vocab_file": { - "microsoft/layoutlmv2-base-uncased": "https://huggingface.co/microsoft/layoutlmv2-base-uncased/resolve/main/vocab.txt", + "microsoft/layoutlmv2-base-uncased": ( + "https://huggingface.co/microsoft/layoutlmv2-base-uncased/resolve/main/vocab.txt" + ), }, "tokenizer_file": { - "microsoft/layoutlmv2-base-uncased": "https://huggingface.co/microsoft/layoutlmv2-base-uncased/resolve/main/tokenizer.json", + "microsoft/layoutlmv2-base-uncased": ( + "https://huggingface.co/microsoft/layoutlmv2-base-uncased/resolve/main/tokenizer.json" + ), }, } @@ -269,7 +273,8 @@ class LayoutLMv2TokenizerFast(PreTrainedTokenizerFast): if is_batched: if text_pair is not None and len(text) != len(text_pair): raise ValueError( - f"batch length of `text`: {len(text)} does not match batch length of `text_pair`: {len(text_pair)}." + f"batch length of `text`: {len(text)} does not match batch length of `text_pair`:" + f" {len(text_pair)}." ) batch_text_or_text_pairs = list(zip(text, text_pair)) if text_pair is not None else text is_pair = bool(text_pair is not None) diff --git a/src/transformers/models/layoutxlm/processing_layoutxlm.py b/src/transformers/models/layoutxlm/processing_layoutxlm.py index 99245ccc17..6f45ee0659 100644 --- a/src/transformers/models/layoutxlm/processing_layoutxlm.py +++ b/src/transformers/models/layoutxlm/processing_layoutxlm.py @@ -86,8 +86,7 @@ class LayoutXLMProcessor(ProcessorMixin): if self.feature_extractor.apply_ocr and (word_labels is not None): raise ValueError( - "You cannot provide word labels " - "if you initialized the feature extractor with apply_ocr set to True." + "You cannot provide word labels if you initialized the feature extractor with apply_ocr set to True." ) # first, apply the feature extractor diff --git a/src/transformers/models/layoutxlm/tokenization_layoutxlm.py b/src/transformers/models/layoutxlm/tokenization_layoutxlm.py index 8fded39284..c0c9acfe47 100644 --- a/src/transformers/models/layoutxlm/tokenization_layoutxlm.py +++ b/src/transformers/models/layoutxlm/tokenization_layoutxlm.py @@ -438,7 +438,8 @@ class LayoutXLMTokenizer(PreTrainedTokenizer): if is_batched: if text_pair is not None and len(text) != len(text_pair): raise ValueError( - f"batch length of `text`: {len(text)} does not match batch length of `text_pair`: {len(text_pair)}." + f"batch length of `text`: {len(text)} does not match batch length of `text_pair`:" + f" {len(text_pair)}." ) batch_text_or_text_pairs = list(zip(text, text_pair)) if text_pair is not None else text is_pair = bool(text_pair is not None) @@ -960,7 +961,7 @@ class LayoutXLMTokenizer(PreTrainedTokenizer): f"We need to remove {num_tokens_to_remove} to truncate the input " f"but the first sequence has a length {len(ids)}. " f"Please select another truncation strategy than {truncation_strategy}, " - f"for instance 'longest_first' or 'only_second'." + "for instance 'longest_first' or 'only_second'." ) elif truncation_strategy == TruncationStrategy.ONLY_SECOND and pair_ids is not None: if len(pair_ids) > num_tokens_to_remove: @@ -974,7 +975,7 @@ class LayoutXLMTokenizer(PreTrainedTokenizer): f"We need to remove {num_tokens_to_remove} to truncate the input " f"but the second sequence has a length {len(pair_ids)}. " f"Please select another truncation strategy than {truncation_strategy}, " - f"for instance 'longest_first' or 'only_first'." + "for instance 'longest_first' or 'only_first'." ) return ( diff --git a/src/transformers/models/layoutxlm/tokenization_layoutxlm_fast.py b/src/transformers/models/layoutxlm/tokenization_layoutxlm_fast.py index 35b4383877..1477d06b80 100644 --- a/src/transformers/models/layoutxlm/tokenization_layoutxlm_fast.py +++ b/src/transformers/models/layoutxlm/tokenization_layoutxlm_fast.py @@ -265,7 +265,8 @@ class LayoutXLMTokenizerFast(PreTrainedTokenizerFast): if is_batched: if text_pair is not None and len(text) != len(text_pair): raise ValueError( - f"batch length of `text`: {len(text)} does not match batch length of `text_pair`: {len(text_pair)}." + f"batch length of `text`: {len(text)} does not match batch length of `text_pair`:" + f" {len(text_pair)}." ) batch_text_or_text_pairs = list(zip(text, text_pair)) if text_pair is not None else text is_pair = bool(text_pair is not None) diff --git a/src/transformers/models/led/modeling_led.py b/src/transformers/models/led/modeling_led.py index 162c1066f6..ab1ffba943 100755 --- a/src/transformers/models/led/modeling_led.py +++ b/src/transformers/models/led/modeling_led.py @@ -222,7 +222,10 @@ class LEDEncoderSelfAttention(nn.Module): seq_len, self.num_heads, self.one_sided_attn_window_size * 2 + 1, - ], f"local_attn_probs should be of size ({batch_size}, {seq_len}, {self.num_heads}, {self.one_sided_attn_window_size * 2 + 1}), but is of size {attn_scores.size()}" + ], ( + f"local_attn_probs should be of size ({batch_size}, {seq_len}, {self.num_heads}," + f" {self.one_sided_attn_window_size * 2 + 1}), but is of size {attn_scores.size()}" + ) # compute local attention probs from global attention keys and contact over window dim if is_global_attn: @@ -662,7 +665,11 @@ class LEDEncoderSelfAttention(nn.Module): batch_size * self.num_heads, max_num_global_attn_indices, seq_len, - ], f"global_attn_scores have the wrong size. Size should be {(batch_size * self.num_heads, max_num_global_attn_indices, seq_len)}, but is {global_attn_scores.size()}." + ], ( + "global_attn_scores have the wrong size. Size should be" + f" {(batch_size * self.num_heads, max_num_global_attn_indices, seq_len)}, but is" + f" {global_attn_scores.size()}." + ) global_attn_scores = global_attn_scores.view(batch_size, self.num_heads, max_num_global_attn_indices, seq_len) @@ -705,7 +712,11 @@ class LEDEncoderSelfAttention(nn.Module): batch_size * self.num_heads, max_num_global_attn_indices, self.head_dim, - ], f"global_attn_output tensor has the wrong size. Size should be {(batch_size * self.num_heads, max_num_global_attn_indices, self.head_dim)}, but is {global_attn_output.size()}." + ], ( + "global_attn_output tensor has the wrong size. Size should be" + f" {(batch_size * self.num_heads, max_num_global_attn_indices, self.head_dim)}, but is" + f" {global_attn_output.size()}." + ) global_attn_probs = global_attn_probs.view(batch_size, self.num_heads, max_num_global_attn_indices, seq_len) global_attn_output = global_attn_output.view( @@ -766,7 +777,8 @@ class LEDDecoderAttention(nn.Module): self.head_dim = embed_dim // num_heads if self.head_dim * num_heads != self.embed_dim: raise ValueError( - f"embed_dim must be divisible by num_heads (got `embed_dim`: {self.embed_dim} and `num_heads`: {num_heads})." + f"embed_dim must be divisible by num_heads (got `embed_dim`: {self.embed_dim} and `num_heads`:" + f" {num_heads})." ) self.scaling = self.head_dim**-0.5 self.is_decoder = is_decoder @@ -837,7 +849,8 @@ class LEDDecoderAttention(nn.Module): if attn_weights.size() != (bsz * self.num_heads, tgt_len, src_len): raise ValueError( - f"Attention weights should be of size {(bsz * self.num_heads, tgt_len, src_len)}, but is {attn_weights.size()}" + f"Attention weights should be of size {(bsz * self.num_heads, tgt_len, src_len)}, but is" + f" {attn_weights.size()}" ) if attention_mask is not None: @@ -852,7 +865,8 @@ class LEDDecoderAttention(nn.Module): if layer_head_mask is not None: if layer_head_mask.size() != (self.num_heads,): raise ValueError( - f"Head mask for a single layer should be of size {(self.num_heads,)}, but is {layer_head_mask.size()}" + f"Head mask for a single layer should be of size {(self.num_heads,)}, but is" + f" {layer_head_mask.size()}" ) attn_weights = layer_head_mask.view(1, -1, 1, 1) * attn_weights.view(bsz, self.num_heads, tgt_len, src_len) attn_weights = attn_weights.view(bsz * self.num_heads, tgt_len, src_len) @@ -873,7 +887,8 @@ class LEDDecoderAttention(nn.Module): if attn_output.size() != (bsz * self.num_heads, tgt_len, self.head_dim): raise ValueError( - f"`attn_output` should be of size {(bsz, self.num_heads, tgt_len, self.head_dim)}, but is {attn_output.size()}" + f"`attn_output` should be of size {(bsz, self.num_heads, tgt_len, self.head_dim)}, but is" + f" {attn_output.size()}" ) attn_output = ( @@ -1813,7 +1828,8 @@ class LEDEncoder(LEDPreTrainedModel): if head_mask is not None: if head_mask.size()[0] != len(self.layers): raise ValueError( - f"The head_mask should be specified for {len(self.layers)} layers, but it is for {head_mask.size()[0]}." + f"The head_mask should be specified for {len(self.layers)} layers, but it is for" + f" {head_mask.size()[0]}." ) for idx, encoder_layer in enumerate(self.layers): if output_hidden_states: @@ -2069,7 +2085,8 @@ class LEDDecoder(LEDPreTrainedModel): if attn_mask is not None: if attn_mask.size()[0] != len(self.layers): raise ValueError( - f"The `{mask_name}` should be specified for {len(self.layers)} layers, but it is for {head_mask.size()[0]}." + f"The `{mask_name}` should be specified for {len(self.layers)} layers, but it is for" + f" {head_mask.size()[0]}." ) for idx, decoder_layer in enumerate(self.layers): # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description) diff --git a/src/transformers/models/led/modeling_tf_led.py b/src/transformers/models/led/modeling_tf_led.py index d44a35e445..83a71a0dfe 100644 --- a/src/transformers/models/led/modeling_tf_led.py +++ b/src/transformers/models/led/modeling_tf_led.py @@ -246,7 +246,10 @@ class TFLEDEncoderSelfAttention(tf.keras.layers.Layer): tf.debugging.assert_equal( shape_list(attn_scores), [batch_size, seq_len, self.num_heads, self.one_sided_attn_window_size * 2 + 1], - message=f"attn_probs should be of size ({batch_size}, {seq_len}, {self.num_heads}, {self.one_sided_attn_window_size * 2 + 1}), but is of size {shape_list(attn_scores)}", + message=( + f"attn_probs should be of size ({batch_size}, {seq_len}, {self.num_heads}," + f" {self.one_sided_attn_window_size * 2 + 1}), but is of size {shape_list(attn_scores)}" + ), ) # compute global attn indices required through out forward fn @@ -299,7 +302,10 @@ class TFLEDEncoderSelfAttention(tf.keras.layers.Layer): tf.debugging.assert_equal( shape_list(layer_head_mask), [self.num_heads], - message=f"Head mask for a single layer should be of size {(self.num_heads)}, but is {shape_list(layer_head_mask)}", + message=( + f"Head mask for a single layer should be of size {(self.num_heads)}, but is" + f" {shape_list(layer_head_mask)}" + ), ) attn_probs = tf.reshape(layer_head_mask, (1, 1, -1, 1)) * attn_probs @@ -392,7 +398,10 @@ class TFLEDEncoderSelfAttention(tf.keras.layers.Layer): tf.debugging.assert_equal( shape_list(query), shape_list(key), - message=f"Shape of query and key should be equal, but got query: {shape_list(query)} and key: {shape_list(key)}", + message=( + f"Shape of query and key should be equal, but got query: {shape_list(query)} and key:" + f" {shape_list(key)}" + ), ) chunks_count = seq_len // window_overlap - 1 @@ -677,7 +686,10 @@ class TFLEDEncoderSelfAttention(tf.keras.layers.Layer): tf.debugging.assert_equal( shape_list(chunked_hidden_states), [batch_size, num_output_chunks, frame_size], - message=f"Make sure chunking is correctly applied. `Chunked hidden states should have output dimension {[batch_size, frame_size, num_output_chunks]}, but got {shape_list(chunked_hidden_states)}.", + message=( + "Make sure chunking is correctly applied. `Chunked hidden states should have output dimension" + f" {[batch_size, frame_size, num_output_chunks]}, but got {shape_list(chunked_hidden_states)}." + ), ) chunked_hidden_states = tf.reshape( @@ -855,7 +867,11 @@ class TFLEDEncoderSelfAttention(tf.keras.layers.Layer): tf.debugging.assert_equal( shape_list(global_attn_scores), [batch_size * self.num_heads, max_num_global_attn_indices, seq_len], - message=f"global_attn_scores have the wrong size. Size should be {(batch_size * self.num_heads, max_num_global_attn_indices, seq_len)}, but is {shape_list(global_attn_scores)}.", + message=( + "global_attn_scores have the wrong size. Size should be" + f" {(batch_size * self.num_heads, max_num_global_attn_indices, seq_len)}, but is" + f" {shape_list(global_attn_scores)}." + ), ) global_attn_scores = tf.reshape( @@ -894,7 +910,10 @@ class TFLEDEncoderSelfAttention(tf.keras.layers.Layer): tf.debugging.assert_equal( shape_list(layer_head_mask), [self.num_heads], - message=f"Head mask for a single layer should be of size {(self.num_heads)}, but is {shape_list(layer_head_mask)}", + message=( + f"Head mask for a single layer should be of size {(self.num_heads)}, but is" + f" {shape_list(layer_head_mask)}" + ), ) global_attn_probs_float = tf.reshape(layer_head_mask, (1, -1, 1, 1)) * tf.reshape( global_attn_probs_float, (batch_size, self.num_heads, max_num_global_attn_indices, seq_len) @@ -913,7 +932,11 @@ class TFLEDEncoderSelfAttention(tf.keras.layers.Layer): tf.debugging.assert_equal( shape_list(global_attn_output), [batch_size * self.num_heads, max_num_global_attn_indices, self.head_dim], - message=f"global_attn_output tensor has the wrong size. Size should be {(batch_size * self.num_heads, max_num_global_attn_indices, self.head_dim)}, but is {shape_list(global_attn_output)}.", + message=( + "global_attn_output tensor has the wrong size. Size should be" + f" {(batch_size * self.num_heads, max_num_global_attn_indices, self.head_dim)}, but is" + f" {shape_list(global_attn_output)}." + ), ) global_attn_output = tf.reshape( @@ -1069,7 +1092,10 @@ class TFLEDDecoderAttention(tf.keras.layers.Layer): tf.debugging.assert_equal( shape_list(attn_weights), [bsz * self.num_heads, tgt_len, src_len], - message=f"Attention weights should be of size {(bsz * self.num_heads, tgt_len, src_len)}, but is {shape_list(attn_weights)}", + message=( + f"Attention weights should be of size {(bsz * self.num_heads, tgt_len, src_len)}, but is" + f" {shape_list(attn_weights)}" + ), ) if attention_mask is not None: @@ -1077,7 +1103,10 @@ class TFLEDDecoderAttention(tf.keras.layers.Layer): tf.debugging.assert_equal( shape_list(attention_mask), [bsz, 1, tgt_len, src_len], - message=f"Attention mask should be of size {(bsz, 1, tgt_len, src_len)}, but is {shape_list(attention_mask)}", + message=( + f"Attention mask should be of size {(bsz, 1, tgt_len, src_len)}, but is" + f" {shape_list(attention_mask)}" + ), ) attn_weights = tf.reshape(attn_weights, (bsz, self.num_heads, tgt_len, src_len)) + tf.cast( @@ -1092,7 +1121,10 @@ class TFLEDDecoderAttention(tf.keras.layers.Layer): tf.debugging.assert_equal( shape_list(layer_head_mask), [self.num_heads], - message=f"Head mask for a single layer should be of size {(self.num_heads)}, but is {shape_list(layer_head_mask)}", + message=( + f"Head mask for a single layer should be of size {(self.num_heads)}, but is" + f" {shape_list(layer_head_mask)}" + ), ) attn_weights = tf.reshape(layer_head_mask, (1, -1, 1, 1)) * tf.reshape( @@ -1108,7 +1140,10 @@ class TFLEDDecoderAttention(tf.keras.layers.Layer): tf.debugging.assert_equal( shape_list(attn_output), [bsz * self.num_heads, tgt_len, self.head_dim], - message=f"`attn_output` should be of size {(bsz, self.num_heads, tgt_len, self.head_dim)}, but is {shape_list(attn_output)}", + message=( + f"`attn_output` should be of size {(bsz, self.num_heads, tgt_len, self.head_dim)}, but is" + f" {shape_list(attn_output)}" + ), ) attn_output = tf.transpose( @@ -1753,7 +1788,10 @@ class TFLEDEncoder(tf.keras.layers.Layer): tf.debugging.assert_equal( shape_list(head_mask)[0], len(self.layers), - message=f"The head_mask should be specified for {len(self.layers)} layers, but it is for {shape_list(head_mask)[0]}.", + message=( + f"The head_mask should be specified for {len(self.layers)} layers, but it is for" + f" {shape_list(head_mask)[0]}." + ), ) # encoder layers @@ -2013,7 +2051,10 @@ class TFLEDDecoder(tf.keras.layers.Layer): tf.debugging.assert_equal( shape_list(head_mask)[0], len(self.layers), - message=f"The head_mask should be specified for {len(self.layers)} layers, but it is for {shape_list(head_mask)[0]}.", + message=( + f"The head_mask should be specified for {len(self.layers)} layers, but it is for" + f" {shape_list(head_mask)[0]}." + ), ) for idx, decoder_layer in enumerate(self.layers): diff --git a/src/transformers/models/longformer/configuration_longformer.py b/src/transformers/models/longformer/configuration_longformer.py index 2c9fd17b35..53ceeafb64 100644 --- a/src/transformers/models/longformer/configuration_longformer.py +++ b/src/transformers/models/longformer/configuration_longformer.py @@ -24,9 +24,15 @@ logger = logging.get_logger(__name__) LONGFORMER_PRETRAINED_CONFIG_ARCHIVE_MAP = { "allenai/longformer-base-4096": "https://huggingface.co/allenai/longformer-base-4096/resolve/main/config.json", "allenai/longformer-large-4096": "https://huggingface.co/allenai/longformer-large-4096/resolve/main/config.json", - "allenai/longformer-large-4096-finetuned-triviaqa": "https://huggingface.co/allenai/longformer-large-4096-finetuned-triviaqa/resolve/main/config.json", - "allenai/longformer-base-4096-extra.pos.embd.only": "https://huggingface.co/allenai/longformer-base-4096-extra.pos.embd.only/resolve/main/config.json", - "allenai/longformer-large-4096-extra.pos.embd.only": "https://huggingface.co/allenai/longformer-large-4096-extra.pos.embd.only/resolve/main/config.json", + "allenai/longformer-large-4096-finetuned-triviaqa": ( + "https://huggingface.co/allenai/longformer-large-4096-finetuned-triviaqa/resolve/main/config.json" + ), + "allenai/longformer-base-4096-extra.pos.embd.only": ( + "https://huggingface.co/allenai/longformer-base-4096-extra.pos.embd.only/resolve/main/config.json" + ), + "allenai/longformer-large-4096-extra.pos.embd.only": ( + "https://huggingface.co/allenai/longformer-large-4096-extra.pos.embd.only/resolve/main/config.json" + ), } diff --git a/src/transformers/models/longformer/modeling_longformer.py b/src/transformers/models/longformer/modeling_longformer.py index 20a2e9d239..e6971d7369 100755 --- a/src/transformers/models/longformer/modeling_longformer.py +++ b/src/transformers/models/longformer/modeling_longformer.py @@ -388,9 +388,10 @@ def _get_question_end_index(input_ids, sep_token_id): batch_size = input_ids.shape[0] assert sep_token_indices.shape[1] == 2, "`input_ids` should have two dimensions" - assert ( - sep_token_indices.shape[0] == 3 * batch_size - ), f"There should be exactly three separator tokens: {sep_token_id} in every sample for questions answering. You might also consider to set `global_attention_mask` manually in the forward function to avoid this error." + assert sep_token_indices.shape[0] == 3 * batch_size, ( + f"There should be exactly three separator tokens: {sep_token_id} in every sample for questions answering. You" + " might also consider to set `global_attention_mask` manually in the forward function to avoid this error." + ) return sep_token_indices.view(batch_size, 3, 2)[:, 0, 1] @@ -600,7 +601,10 @@ class LongformerSelfAttention(nn.Module): seq_len, self.num_heads, self.one_sided_attn_window_size * 2 + 1, - ], f"local_attn_probs should be of size ({batch_size}, {seq_len}, {self.num_heads}, {self.one_sided_attn_window_size * 2 + 1}), but is of size {attn_scores.size()}" + ], ( + f"local_attn_probs should be of size ({batch_size}, {seq_len}, {self.num_heads}," + f" {self.one_sided_attn_window_size * 2 + 1}), but is of size {attn_scores.size()}" + ) # compute local attention probs from global attention keys and contact over window dim if is_global_attn: @@ -1040,7 +1044,11 @@ class LongformerSelfAttention(nn.Module): batch_size * self.num_heads, max_num_global_attn_indices, seq_len, - ], f"global_attn_scores have the wrong size. Size should be {(batch_size * self.num_heads, max_num_global_attn_indices, seq_len)}, but is {global_attn_scores.size()}." + ], ( + "global_attn_scores have the wrong size. Size should be" + f" {(batch_size * self.num_heads, max_num_global_attn_indices, seq_len)}, but is" + f" {global_attn_scores.size()}." + ) global_attn_scores = global_attn_scores.view(batch_size, self.num_heads, max_num_global_attn_indices, seq_len) @@ -1083,7 +1091,11 @@ class LongformerSelfAttention(nn.Module): batch_size * self.num_heads, max_num_global_attn_indices, self.head_dim, - ], f"global_attn_output tensor has the wrong size. Size should be {(batch_size * self.num_heads, max_num_global_attn_indices, self.head_dim)}, but is {global_attn_output.size()}." + ], ( + "global_attn_output tensor has the wrong size. Size should be" + f" {(batch_size * self.num_heads, max_num_global_attn_indices, self.head_dim)}, but is" + f" {global_attn_output.size()}." + ) global_attn_probs = global_attn_probs.view(batch_size, self.num_heads, max_num_global_attn_indices, seq_len) global_attn_output = global_attn_output.view( @@ -2032,7 +2044,8 @@ class LongformerForQuestionAnswering(LongformerPreTrainedModel): if global_attention_mask is None: if input_ids is None: logger.warning( - "It is not possible to automatically generate the `global_attention_mask` because input_ids is None. Please make sure that it is correctly set." + "It is not possible to automatically generate the `global_attention_mask` because input_ids is" + " None. Please make sure that it is correctly set." ) else: # set global attention on question tokens automatically diff --git a/src/transformers/models/longformer/modeling_tf_longformer.py b/src/transformers/models/longformer/modeling_tf_longformer.py index 124fe2c06f..e4390083ee 100644 --- a/src/transformers/models/longformer/modeling_tf_longformer.py +++ b/src/transformers/models/longformer/modeling_tf_longformer.py @@ -775,7 +775,10 @@ class TFLongformerSelfAttention(tf.keras.layers.Layer): tf.debugging.assert_equal( shape_list(attn_scores), [batch_size, seq_len, self.num_heads, self.one_sided_attn_window_size * 2 + 1], - message=f"attn_probs should be of size ({batch_size}, {seq_len}, {self.num_heads}, {self.one_sided_attn_window_size * 2 + 1}), but is of size {shape_list(attn_scores)}", + message=( + f"attn_probs should be of size ({batch_size}, {seq_len}, {self.num_heads}," + f" {self.one_sided_attn_window_size * 2 + 1}), but is of size {shape_list(attn_scores)}" + ), ) # compute global attn indices required through out forward fn @@ -828,7 +831,10 @@ class TFLongformerSelfAttention(tf.keras.layers.Layer): tf.debugging.assert_equal( shape_list(layer_head_mask), [self.num_heads], - message=f"Head mask for a single layer should be of size {(self.num_heads)}, but is {shape_list(layer_head_mask)}", + message=( + f"Head mask for a single layer should be of size {(self.num_heads)}, but is" + f" {shape_list(layer_head_mask)}" + ), ) attn_probs = tf.reshape(layer_head_mask, (1, 1, -1, 1)) * attn_probs @@ -921,7 +927,10 @@ class TFLongformerSelfAttention(tf.keras.layers.Layer): tf.debugging.assert_equal( shape_list(query), shape_list(key), - message=f"Shape of query and key should be equal, but got query: {shape_list(query)} and key: {shape_list(key)}", + message=( + f"Shape of query and key should be equal, but got query: {shape_list(query)} and key:" + f" {shape_list(key)}" + ), ) chunks_count = seq_len // window_overlap - 1 @@ -1206,7 +1215,10 @@ class TFLongformerSelfAttention(tf.keras.layers.Layer): tf.debugging.assert_equal( shape_list(chunked_hidden_states), [batch_size, num_output_chunks, frame_size], - message=f"Make sure chunking is correctly applied. `Chunked hidden states should have output dimension {[batch_size, frame_size, num_output_chunks]}, but got {shape_list(chunked_hidden_states)}.", + message=( + "Make sure chunking is correctly applied. `Chunked hidden states should have output dimension" + f" {[batch_size, frame_size, num_output_chunks]}, but got {shape_list(chunked_hidden_states)}." + ), ) chunked_hidden_states = tf.reshape( @@ -1384,7 +1396,11 @@ class TFLongformerSelfAttention(tf.keras.layers.Layer): tf.debugging.assert_equal( shape_list(global_attn_scores), [batch_size * self.num_heads, max_num_global_attn_indices, seq_len], - message=f"global_attn_scores have the wrong size. Size should be {(batch_size * self.num_heads, max_num_global_attn_indices, seq_len)}, but is {shape_list(global_attn_scores)}.", + message=( + "global_attn_scores have the wrong size. Size should be" + f" {(batch_size * self.num_heads, max_num_global_attn_indices, seq_len)}, but is" + f" {shape_list(global_attn_scores)}." + ), ) global_attn_scores = tf.reshape( @@ -1423,7 +1439,10 @@ class TFLongformerSelfAttention(tf.keras.layers.Layer): tf.debugging.assert_equal( shape_list(layer_head_mask), [self.num_heads], - message=f"Head mask for a single layer should be of size {(self.num_heads)}, but is {shape_list(layer_head_mask)}", + message=( + f"Head mask for a single layer should be of size {(self.num_heads)}, but is" + f" {shape_list(layer_head_mask)}" + ), ) global_attn_probs_float = tf.reshape(layer_head_mask, (1, -1, 1, 1)) * tf.reshape( global_attn_probs_float, (batch_size, self.num_heads, max_num_global_attn_indices, seq_len) @@ -1442,7 +1461,11 @@ class TFLongformerSelfAttention(tf.keras.layers.Layer): tf.debugging.assert_equal( shape_list(global_attn_output), [batch_size * self.num_heads, max_num_global_attn_indices, self.head_dim], - message=f"global_attn_output tensor has the wrong size. Size should be {(batch_size * self.num_heads, max_num_global_attn_indices, self.head_dim)}, but is {shape_list(global_attn_output)}.", + message=( + "global_attn_output tensor has the wrong size. Size should be" + f" {(batch_size * self.num_heads, max_num_global_attn_indices, self.head_dim)}, but is" + f" {shape_list(global_attn_output)}." + ), ) global_attn_output = tf.reshape( @@ -2207,7 +2230,10 @@ class TFLongformerForQuestionAnswering(TFLongformerPreTrainedModel, TFQuestionAn if global_attention_mask is None and input_ids is not None: if shape_list(tf.where(input_ids == self.config.sep_token_id))[0] != 3 * shape_list(input_ids)[0]: logger.warning( - f"There should be exactly three separator tokens: {self.config.sep_token_id} in every sample for questions answering. You might also consider to set `global_attention_mask` manually in the forward function to avoid this. This is most likely an error. The global attention is disabled for this forward pass." + f"There should be exactly three separator tokens: {self.config.sep_token_id} in every sample for" + " questions answering. You might also consider to set `global_attention_mask` manually in the" + " forward function to avoid this. This is most likely an error. The global attention is disabled" + " for this forward pass." ) global_attention_mask = tf.fill(shape_list(input_ids), value=0) else: diff --git a/src/transformers/models/longformer/tokenization_longformer.py b/src/transformers/models/longformer/tokenization_longformer.py index 19445622b8..b594580647 100644 --- a/src/transformers/models/longformer/tokenization_longformer.py +++ b/src/transformers/models/longformer/tokenization_longformer.py @@ -25,17 +25,33 @@ VOCAB_FILES_NAMES = {"vocab_file": "vocab.json", "merges_file": "merges.txt"} PRETRAINED_VOCAB_FILES_MAP = { "vocab_file": { "allenai/longformer-base-4096": "https://huggingface.co/allenai/longformer-base-4096/resolve/main/vocab.json", - "allenai/longformer-large-4096": "https://huggingface.co/allenai/longformer-large-4096/resolve/main/vocab.json", - "allenai/longformer-large-4096-finetuned-triviaqa": "https://huggingface.co/allenai/longformer-large-4096-finetuned-triviaqa/resolve/main/vocab.json", - "allenai/longformer-base-4096-extra.pos.embd.only": "https://huggingface.co/allenai/longformer-base-4096-extra.pos.embd.only/resolve/main/vocab.json", - "allenai/longformer-large-4096-extra.pos.embd.only": "https://huggingface.co/allenai/longformer-large-4096-extra.pos.embd.only/resolve/main/vocab.json", + "allenai/longformer-large-4096": ( + "https://huggingface.co/allenai/longformer-large-4096/resolve/main/vocab.json" + ), + "allenai/longformer-large-4096-finetuned-triviaqa": ( + "https://huggingface.co/allenai/longformer-large-4096-finetuned-triviaqa/resolve/main/vocab.json" + ), + "allenai/longformer-base-4096-extra.pos.embd.only": ( + "https://huggingface.co/allenai/longformer-base-4096-extra.pos.embd.only/resolve/main/vocab.json" + ), + "allenai/longformer-large-4096-extra.pos.embd.only": ( + "https://huggingface.co/allenai/longformer-large-4096-extra.pos.embd.only/resolve/main/vocab.json" + ), }, "merges_file": { "allenai/longformer-base-4096": "https://huggingface.co/allenai/longformer-base-4096/resolve/main/merges.txt", - "allenai/longformer-large-4096": "https://huggingface.co/allenai/longformer-large-4096/resolve/main/merges.txt", - "allenai/longformer-large-4096-finetuned-triviaqa": "https://huggingface.co/allenai/longformer-large-4096-finetuned-triviaqa/resolve/main/merges.txt", - "allenai/longformer-base-4096-extra.pos.embd.only": "https://huggingface.co/allenai/longformer-base-4096-extra.pos.embd.only/resolve/main/merges.txt", - "allenai/longformer-large-4096-extra.pos.embd.only": "https://huggingface.co/allenai/longformer-large-4096-extra.pos.embd.only/resolve/main/merges.txt", + "allenai/longformer-large-4096": ( + "https://huggingface.co/allenai/longformer-large-4096/resolve/main/merges.txt" + ), + "allenai/longformer-large-4096-finetuned-triviaqa": ( + "https://huggingface.co/allenai/longformer-large-4096-finetuned-triviaqa/resolve/main/merges.txt" + ), + "allenai/longformer-base-4096-extra.pos.embd.only": ( + "https://huggingface.co/allenai/longformer-base-4096-extra.pos.embd.only/resolve/main/merges.txt" + ), + "allenai/longformer-large-4096-extra.pos.embd.only": ( + "https://huggingface.co/allenai/longformer-large-4096-extra.pos.embd.only/resolve/main/merges.txt" + ), }, } diff --git a/src/transformers/models/longformer/tokenization_longformer_fast.py b/src/transformers/models/longformer/tokenization_longformer_fast.py index a7d06b1fc3..45a8883971 100644 --- a/src/transformers/models/longformer/tokenization_longformer_fast.py +++ b/src/transformers/models/longformer/tokenization_longformer_fast.py @@ -26,24 +26,50 @@ VOCAB_FILES_NAMES = {"vocab_file": "vocab.json", "merges_file": "merges.txt", "t PRETRAINED_VOCAB_FILES_MAP = { "vocab_file": { "allenai/longformer-base-4096": "https://huggingface.co/allenai/longformer-base-4096/resolve/main/vocab.json", - "allenai/longformer-large-4096": "https://huggingface.co/allenai/longformer-large-4096/resolve/main/vocab.json", - "allenai/longformer-large-4096-finetuned-triviaqa": "https://huggingface.co/allenai/longformer-large-4096-finetuned-triviaqa/resolve/main/vocab.json", - "allenai/longformer-base-4096-extra.pos.embd.only": "https://huggingface.co/allenai/longformer-base-4096-extra.pos.embd.only/resolve/main/vocab.json", - "allenai/longformer-large-4096-extra.pos.embd.only": "https://huggingface.co/allenai/longformer-large-4096-extra.pos.embd.only/resolve/main/vocab.json", + "allenai/longformer-large-4096": ( + "https://huggingface.co/allenai/longformer-large-4096/resolve/main/vocab.json" + ), + "allenai/longformer-large-4096-finetuned-triviaqa": ( + "https://huggingface.co/allenai/longformer-large-4096-finetuned-triviaqa/resolve/main/vocab.json" + ), + "allenai/longformer-base-4096-extra.pos.embd.only": ( + "https://huggingface.co/allenai/longformer-base-4096-extra.pos.embd.only/resolve/main/vocab.json" + ), + "allenai/longformer-large-4096-extra.pos.embd.only": ( + "https://huggingface.co/allenai/longformer-large-4096-extra.pos.embd.only/resolve/main/vocab.json" + ), }, "merges_file": { "allenai/longformer-base-4096": "https://huggingface.co/allenai/longformer-base-4096/resolve/main/merges.txt", - "allenai/longformer-large-4096": "https://huggingface.co/allenai/longformer-large-4096/resolve/main/merges.txt", - "allenai/longformer-large-4096-finetuned-triviaqa": "https://huggingface.co/allenai/longformer-large-4096-finetuned-triviaqa/resolve/main/merges.txt", - "allenai/longformer-base-4096-extra.pos.embd.only": "https://huggingface.co/allenai/longformer-base-4096-extra.pos.embd.only/resolve/main/merges.txt", - "allenai/longformer-large-4096-extra.pos.embd.only": "https://huggingface.co/allenai/longformer-large-4096-extra.pos.embd.only/resolve/main/merges.txt", + "allenai/longformer-large-4096": ( + "https://huggingface.co/allenai/longformer-large-4096/resolve/main/merges.txt" + ), + "allenai/longformer-large-4096-finetuned-triviaqa": ( + "https://huggingface.co/allenai/longformer-large-4096-finetuned-triviaqa/resolve/main/merges.txt" + ), + "allenai/longformer-base-4096-extra.pos.embd.only": ( + "https://huggingface.co/allenai/longformer-base-4096-extra.pos.embd.only/resolve/main/merges.txt" + ), + "allenai/longformer-large-4096-extra.pos.embd.only": ( + "https://huggingface.co/allenai/longformer-large-4096-extra.pos.embd.only/resolve/main/merges.txt" + ), }, "tokenizer_file": { - "allenai/longformer-base-4096": "https://huggingface.co/allenai/longformer-base-4096/resolve/main/tokenizer.json", - "allenai/longformer-large-4096": "https://huggingface.co/allenai/longformer-large-4096/resolve/main/tokenizer.json", - "allenai/longformer-large-4096-finetuned-triviaqa": "https://huggingface.co/allenai/longformer-large-4096-finetuned-triviaqa/resolve/main/tokenizer.json", - "allenai/longformer-base-4096-extra.pos.embd.only": "https://huggingface.co/allenai/longformer-base-4096-extra.pos.embd.only/resolve/main/tokenizer.json", - "allenai/longformer-large-4096-extra.pos.embd.only": "https://huggingface.co/allenai/longformer-large-4096-extra.pos.embd.only/resolve/main/tokenizer.json", + "allenai/longformer-base-4096": ( + "https://huggingface.co/allenai/longformer-base-4096/resolve/main/tokenizer.json" + ), + "allenai/longformer-large-4096": ( + "https://huggingface.co/allenai/longformer-large-4096/resolve/main/tokenizer.json" + ), + "allenai/longformer-large-4096-finetuned-triviaqa": ( + "https://huggingface.co/allenai/longformer-large-4096-finetuned-triviaqa/resolve/main/tokenizer.json" + ), + "allenai/longformer-base-4096-extra.pos.embd.only": ( + "https://huggingface.co/allenai/longformer-base-4096-extra.pos.embd.only/resolve/main/tokenizer.json" + ), + "allenai/longformer-large-4096-extra.pos.embd.only": ( + "https://huggingface.co/allenai/longformer-large-4096-extra.pos.embd.only/resolve/main/tokenizer.json" + ), }, } diff --git a/src/transformers/models/luke/convert_luke_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/luke/convert_luke_original_pytorch_checkpoint_to_pytorch.py index 520ae61b43..d2b2323b28 100644 --- a/src/transformers/models/luke/convert_luke_original_pytorch_checkpoint_to_pytorch.py +++ b/src/transformers/models/luke/convert_luke_original_pytorch_checkpoint_to_pytorch.py @@ -77,13 +77,17 @@ def convert_luke_checkpoint(checkpoint_path, metadata_path, entity_vocab_path, p raise ValueError(f"Missing keys {', '.join(missing_keys)}. Expected only missing embeddings.position_ids") if not (all(key.startswith("entity_predictions") or key.startswith("lm_head") for key in unexpected_keys)): raise ValueError( - f"Unexpected keys {', '.join([key for key in unexpected_keys if not (key.startswith('entity_predictions') or key.startswith('lm_head'))])}" + "Unexpected keys" + f" {', '.join([key for key in unexpected_keys if not (key.startswith('entity_predictions') or key.startswith('lm_head'))])}" ) # Check outputs tokenizer = LukeTokenizer.from_pretrained(pytorch_dump_folder_path, task="entity_classification") - text = "Top seed Ana Ivanovic said on Thursday she could hardly believe her luck as a fortuitous netcord helped the new world number one avoid a humiliating second- round exit at Wimbledon ." + text = ( + "Top seed Ana Ivanovic said on Thursday she could hardly believe her luck as a fortuitous netcord helped the" + " new world number one avoid a humiliating second- round exit at Wimbledon ." + ) span = (39, 42) encoding = tokenizer(text, entity_spans=[span], add_prefix_space=True, return_tensors="pt") @@ -116,7 +120,8 @@ def convert_luke_checkpoint(checkpoint_path, metadata_path, entity_vocab_path, p if not (outputs.entity_last_hidden_state.shape != expected_shape): raise ValueError( - f"Outputs.entity_last_hidden_state.shape is {outputs.entity_last_hidden_state.shape}, Expected shape is {expected_shape}" + f"Outputs.entity_last_hidden_state.shape is {outputs.entity_last_hidden_state.shape}, Expected shape is" + f" {expected_shape}" ) if not torch.allclose(outputs.entity_last_hidden_state[0, :3, :3], expected_slice, atol=1e-4): raise ValueError @@ -129,7 +134,7 @@ def convert_luke_checkpoint(checkpoint_path, metadata_path, entity_vocab_path, p def load_entity_vocab(entity_vocab_path): entity_vocab = {} with open(entity_vocab_path, "r", encoding="utf-8") as f: - for (index, line) in enumerate(f): + for index, line in enumerate(f): title, _ = line.rstrip().split("\t") entity_vocab[title] = index diff --git a/src/transformers/models/luke/modeling_luke.py b/src/transformers/models/luke/modeling_luke.py index 7388e2031b..f7c36ff93d 100644 --- a/src/transformers/models/luke/modeling_luke.py +++ b/src/transformers/models/luke/modeling_luke.py @@ -874,7 +874,8 @@ LUKE_INPUTS_DOCSTRING = r""" @add_start_docstrings( - "The bare LUKE model transformer outputting raw hidden-states for both word tokens and entities without any specific head on top.", + "The bare LUKE model transformer outputting raw hidden-states for both word tokens and entities without any" + " specific head on top.", LUKE_START_DOCSTRING, ) class LukeModel(LukePreTrainedModel): diff --git a/src/transformers/models/luke/tokenization_luke.py b/src/transformers/models/luke/tokenization_luke.py index e75fda42ca..afec3f2690 100644 --- a/src/transformers/models/luke/tokenization_luke.py +++ b/src/transformers/models/luke/tokenization_luke.py @@ -253,7 +253,8 @@ class LukeTokenizer(RobertaTokenizer): self.max_entity_length = 2 else: raise ValueError( - f"Task {task} not supported. Select task from ['entity_classification', 'entity_pair_classification', 'entity_span_classification'] only." + f"Task {task} not supported. Select task from ['entity_classification', 'entity_pair_classification'," + " 'entity_span_classification'] only." ) self.max_mention_length = max_mention_length @@ -598,7 +599,7 @@ class LukeTokenizer(RobertaTokenizer): raise ValueError("entity_spans should be given as a list") elif len(entity_spans) > 0 and not isinstance(entity_spans[0], tuple): raise ValueError( - "entity_spans should be given as a list of tuples " "containing the start and end character indices" + "entity_spans should be given as a list of tuples containing the start and end character indices" ) if entities is not None: @@ -1007,7 +1008,8 @@ class LukeTokenizer(RobertaTokenizer): if num_invalid_entities != 0: logger.warning( - f"{num_invalid_entities} entities are ignored because their entity spans are invalid due to the truncation of input tokens" + f"{num_invalid_entities} entities are ignored because their entity spans are invalid due to the" + " truncation of input tokens" ) if truncation_strategy != TruncationStrategy.DO_NOT_TRUNCATE and total_entity_len > max_entity_length: @@ -1032,7 +1034,7 @@ class LukeTokenizer(RobertaTokenizer): entity_position_ids = [] entity_start_positions = [] entity_end_positions = [] - for (token_spans, offset) in ( + for token_spans, offset in ( (valid_entity_token_spans, entity_token_offset), (valid_pair_entity_token_spans, pair_entity_token_offset), ): @@ -1181,7 +1183,7 @@ class LukeTokenizer(RobertaTokenizer): else: raise ValueError( f"type of {first_element} unknown: {type(first_element)}. " - f"Should be one of a python, numpy, pytorch or tensorflow object." + "Should be one of a python, numpy, pytorch or tensorflow object." ) for key, value in encoded_inputs.items(): diff --git a/src/transformers/models/lxmert/convert_lxmert_original_tf_checkpoint_to_pytorch.py b/src/transformers/models/lxmert/convert_lxmert_original_tf_checkpoint_to_pytorch.py index 7debd71af3..f8eb86f1d1 100755 --- a/src/transformers/models/lxmert/convert_lxmert_original_tf_checkpoint_to_pytorch.py +++ b/src/transformers/models/lxmert/convert_lxmert_original_tf_checkpoint_to_pytorch.py @@ -51,8 +51,7 @@ if __name__ == "__main__": default=None, type=str, required=True, - help="The config json file corresponding to the pre-trained model. \n" - "This specifies the model architecture.", + help="The config json file corresponding to the pre-trained model. \nThis specifies the model architecture.", ) parser.add_argument( "--pytorch_dump_path", default=None, type=str, required=True, help="Path to the output PyTorch model." diff --git a/src/transformers/models/lxmert/modeling_lxmert.py b/src/transformers/models/lxmert/modeling_lxmert.py index c9b2541251..823fcdb545 100644 --- a/src/transformers/models/lxmert/modeling_lxmert.py +++ b/src/transformers/models/lxmert/modeling_lxmert.py @@ -1193,7 +1193,8 @@ class LxmertForPreTraining(LxmertPreTrainedModel): if "masked_lm_labels" in kwargs: warnings.warn( - "The `masked_lm_labels` argument is deprecated and will be removed in a future version, use `labels` instead.", + "The `masked_lm_labels` argument is deprecated and will be removed in a future version, use `labels`" + " instead.", FutureWarning, ) labels = kwargs.pop("masked_lm_labels") diff --git a/src/transformers/models/lxmert/tokenization_lxmert_fast.py b/src/transformers/models/lxmert/tokenization_lxmert_fast.py index 9e88bc1581..8cfa20a9a2 100644 --- a/src/transformers/models/lxmert/tokenization_lxmert_fast.py +++ b/src/transformers/models/lxmert/tokenization_lxmert_fast.py @@ -24,7 +24,9 @@ PRETRAINED_VOCAB_FILES_MAP = { "unc-nlp/lxmert-base-uncased": "https://huggingface.co/unc-nlp/lxmert-base-uncased/resolve/main/vocab.txt", }, "tokenizer_file": { - "unc-nlp/lxmert-base-uncased": "https://huggingface.co/unc-nlp/lxmert-base-uncased/resolve/main/tokenizer.json", + "unc-nlp/lxmert-base-uncased": ( + "https://huggingface.co/unc-nlp/lxmert-base-uncased/resolve/main/tokenizer.json" + ), }, } diff --git a/src/transformers/models/m2m_100/modeling_m2m_100.py b/src/transformers/models/m2m_100/modeling_m2m_100.py index 309b666930..1dc7f6144c 100755 --- a/src/transformers/models/m2m_100/modeling_m2m_100.py +++ b/src/transformers/models/m2m_100/modeling_m2m_100.py @@ -288,7 +288,8 @@ class M2M100Attention(nn.Module): if attn_weights.size() != (bsz * self.num_heads, tgt_len, src_len): raise ValueError( - f"Attention weights should be of size {(bsz * self.num_heads, tgt_len, src_len)}, but is {attn_weights.size()}" + f"Attention weights should be of size {(bsz * self.num_heads, tgt_len, src_len)}, but is" + f" {attn_weights.size()}" ) if attention_mask is not None: @@ -304,7 +305,8 @@ class M2M100Attention(nn.Module): if layer_head_mask is not None: if layer_head_mask.size() != (self.num_heads,): raise ValueError( - f"Head mask for a single layer should be of size {(self.num_heads,)}, but is {layer_head_mask.size()}" + f"Head mask for a single layer should be of size {(self.num_heads,)}, but is" + f" {layer_head_mask.size()}" ) attn_weights = layer_head_mask.view(1, -1, 1, 1) * attn_weights.view(bsz, self.num_heads, tgt_len, src_len) attn_weights = attn_weights.view(bsz * self.num_heads, tgt_len, src_len) @@ -325,7 +327,8 @@ class M2M100Attention(nn.Module): if attn_output.size() != (bsz * self.num_heads, tgt_len, self.head_dim): raise ValueError( - f"`attn_output` should be of size {(bsz, self.num_heads, tgt_len, self.head_dim)}, but is {attn_output.size()}" + f"`attn_output` should be of size {(bsz, self.num_heads, tgt_len, self.head_dim)}, but is" + f" {attn_output.size()}" ) attn_output = attn_output.view(bsz, self.num_heads, tgt_len, self.head_dim) @@ -793,7 +796,8 @@ class M2M100Encoder(M2M100PreTrainedModel): if head_mask is not None: if head_mask.size()[0] != len(self.layers): raise ValueError( - f"The head_mask should be specified for {len(self.layers)} layers, but it is for {head_mask.size()[0]}." + f"The head_mask should be specified for {len(self.layers)} layers, but it is for" + f" {head_mask.size()[0]}." ) deepspeed_zero3_is_enabled = is_deepspeed_zero3_enabled() @@ -1025,7 +1029,8 @@ class M2M100Decoder(M2M100PreTrainedModel): if attn_mask is not None: if attn_mask.size()[0] != len(self.layers): raise ValueError( - f"The `{mask_name}` should be specified for {len(self.layers)} layers, but it is for {head_mask.size()[0]}." + f"The `{mask_name}` should be specified for {len(self.layers)} layers, but it is for" + f" {head_mask.size()[0]}." ) deepspeed_zero3_is_enabled = is_deepspeed_zero3_enabled() @@ -1046,7 +1051,8 @@ class M2M100Decoder(M2M100PreTrainedModel): if use_cache: logger.warning( - "`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..." + "`use_cache=True` is incompatible with gradient checkpointing. Setting" + " `use_cache=False`..." ) use_cache = False diff --git a/src/transformers/models/marian/convert_marian_to_pytorch.py b/src/transformers/models/marian/convert_marian_to_pytorch.py index bd8490cb2d..1fb5a34f06 100644 --- a/src/transformers/models/marian/convert_marian_to_pytorch.py +++ b/src/transformers/models/marian/convert_marian_to_pytorch.py @@ -140,17 +140,21 @@ GROUP_TO_OPUS_NAME = { "opus-mt-NORTH_EU-NORTH_EU": "de+nl+fy+af+da+fo+is+no+nb+nn+sv-de+nl+fy+af+da+fo+is+no+nb+nn+sv", "opus-mt-de-ZH": "de-cmn+cn+yue+ze_zh+zh_cn+zh_CN+zh_HK+zh_tw+zh_TW+zh_yue+zhs+zht+zh", "opus-mt-en_el_es_fi-en_el_es_fi": "en+el+es+fi-en+el+es+fi", - "opus-mt-en-ROMANCE": "en-fr+fr_BE+fr_CA+fr_FR+wa+frp+oc+ca+rm+lld+fur+lij+lmo+es+es_AR+es_CL+es_CO+es_CR+es_DO" - "+es_EC+es_ES+es_GT+es_HN+es_MX+es_NI+es_PA+es_PE+es_PR+es_SV+es_UY+es_VE+pt+pt_br+pt_BR" - "+pt_PT+gl+lad+an+mwl+it+it_IT+co+nap+scn+vec+sc+ro+la", + "opus-mt-en-ROMANCE": ( + "en-fr+fr_BE+fr_CA+fr_FR+wa+frp+oc+ca+rm+lld+fur+lij+lmo+es+es_AR+es_CL+es_CO+es_CR+es_DO" + "+es_EC+es_ES+es_GT+es_HN+es_MX+es_NI+es_PA+es_PE+es_PR+es_SV+es_UY+es_VE+pt+pt_br+pt_BR" + "+pt_PT+gl+lad+an+mwl+it+it_IT+co+nap+scn+vec+sc+ro+la" + ), "opus-mt-en-CELTIC": "en-ga+cy+br+gd+kw+gv", "opus-mt-es-NORWAY": "es-nb_NO+nb+nn_NO+nn+nog+no_nb+no", "opus-mt-fi_nb_no_nn_ru_sv_en-SAMI": "fi+nb+no+nn+ru+sv+en-se+sma+smj+smn+sms", "opus-mt-fi-ZH": "fi-cmn+cn+yue+ze_zh+zh_cn+zh_CN+zh_HK+zh_tw+zh_TW+zh_yue+zhs+zht+zh", "opus-mt-fi-NORWAY": "fi-nb_NO+nb+nn_NO+nn+nog+no_nb+no", - "opus-mt-ROMANCE-en": "fr+fr_BE+fr_CA+fr_FR+wa+frp+oc+ca+rm+lld+fur+lij+lmo+es+es_AR+es_CL+es_CO+es_CR+es_DO" - "+es_EC+es_ES+es_GT+es_HN+es_MX+es_NI+es_PA+es_PE+es_PR+es_SV+es_UY+es_VE+pt+pt_br+pt_BR" - "+pt_PT+gl+lad+an+mwl+it+it_IT+co+nap+scn+vec+sc+ro+la-en", + "opus-mt-ROMANCE-en": ( + "fr+fr_BE+fr_CA+fr_FR+wa+frp+oc+ca+rm+lld+fur+lij+lmo+es+es_AR+es_CL+es_CO+es_CR+es_DO" + "+es_EC+es_ES+es_GT+es_HN+es_MX+es_NI+es_PA+es_PE+es_PR+es_SV+es_UY+es_VE+pt+pt_br+pt_BR" + "+pt_PT+gl+lad+an+mwl+it+it_IT+co+nap+scn+vec+sc+ro+la-en" + ), "opus-mt-CELTIC-en": "ga+cy+br+gd+kw+gv-en", "opus-mt-sv-ZH": "sv-cmn+cn+yue+ze_zh+zh_cn+zh_CN+zh_HK+zh_tw+zh_TW+zh_yue+zhs+zht+zh", "opus-mt-sv-NORWAY": "sv-nb_NO+nb+nn_NO+nn+nog+no_nb+no", diff --git a/src/transformers/models/marian/modeling_marian.py b/src/transformers/models/marian/modeling_marian.py index 65a471d641..b8f82275a8 100755 --- a/src/transformers/models/marian/modeling_marian.py +++ b/src/transformers/models/marian/modeling_marian.py @@ -233,7 +233,8 @@ class MarianAttention(nn.Module): if attn_weights.size() != (bsz * self.num_heads, tgt_len, src_len): raise ValueError( - f"Attention weights should be of size {(bsz * self.num_heads, tgt_len, src_len)}, but is {attn_weights.size()}" + f"Attention weights should be of size {(bsz * self.num_heads, tgt_len, src_len)}, but is" + f" {attn_weights.size()}" ) if attention_mask is not None: @@ -249,7 +250,8 @@ class MarianAttention(nn.Module): if layer_head_mask is not None: if layer_head_mask.size() != (self.num_heads,): raise ValueError( - f"Head mask for a single layer should be of size {(self.num_heads,)}, but is {layer_head_mask.size()}" + f"Head mask for a single layer should be of size {(self.num_heads,)}, but is" + f" {layer_head_mask.size()}" ) attn_weights = layer_head_mask.view(1, -1, 1, 1) * attn_weights.view(bsz, self.num_heads, tgt_len, src_len) attn_weights = attn_weights.view(bsz * self.num_heads, tgt_len, src_len) @@ -270,7 +272,8 @@ class MarianAttention(nn.Module): if attn_output.size() != (bsz * self.num_heads, tgt_len, self.head_dim): raise ValueError( - f"`attn_output` should be of size {(bsz, self.num_heads, tgt_len, self.head_dim)}, but is {attn_output.size()}" + f"`attn_output` should be of size {(bsz, self.num_heads, tgt_len, self.head_dim)}, but is" + f" {attn_output.size()}" ) attn_output = attn_output.view(bsz, self.num_heads, tgt_len, self.head_dim) @@ -993,9 +996,10 @@ class MarianDecoder(MarianPreTrainedModel): # check if head_mask/cross_attn_head_mask has a correct number of layers specified if desired for attn_mask, mask_name in zip([head_mask, cross_attn_head_mask], ["head_mask", "cross_attn_head_mask"]): if attn_mask is not None: - assert attn_mask.size()[0] == ( - len(self.layers) - ), f"The `{mask_name}` should be specified for {len(self.layers)} layers, but it is for {head_mask.size()[0]}." + assert attn_mask.size()[0] == (len(self.layers)), ( + f"The `{mask_name}` should be specified for {len(self.layers)} layers, but it is for" + f" {head_mask.size()[0]}." + ) for idx, decoder_layer in enumerate(self.layers): # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description) if output_hidden_states: diff --git a/src/transformers/models/marian/modeling_tf_marian.py b/src/transformers/models/marian/modeling_tf_marian.py index 04a24ac9f9..d5f41abe13 100644 --- a/src/transformers/models/marian/modeling_tf_marian.py +++ b/src/transformers/models/marian/modeling_tf_marian.py @@ -267,7 +267,10 @@ class TFMarianAttention(tf.keras.layers.Layer): tf.debugging.assert_equal( shape_list(attn_weights), [bsz * self.num_heads, tgt_len, src_len], - message=f"Attention weights should be of size {(bsz * self.num_heads, tgt_len, src_len)}, but is {shape_list(attn_weights)}", + message=( + f"Attention weights should be of size {(bsz * self.num_heads, tgt_len, src_len)}, but is" + f" {shape_list(attn_weights)}" + ), ) if attention_mask is not None: @@ -277,7 +280,10 @@ class TFMarianAttention(tf.keras.layers.Layer): tf.debugging.assert_equal( shape_list(attention_mask), [bsz, 1, tgt_len, src_len], - message=f"Attention mask should be of size {(bsz, 1, tgt_len, src_len)}, but is {shape_list(attention_mask)}", + message=( + f"Attention mask should be of size {(bsz, 1, tgt_len, src_len)}, but is" + f" {shape_list(attention_mask)}" + ), ) attention_mask = tf.cast(attention_mask, dtype=attn_weights.dtype) @@ -293,7 +299,10 @@ class TFMarianAttention(tf.keras.layers.Layer): tf.debugging.assert_equal( shape_list(layer_head_mask), [self.num_heads], - message=f"Head mask for a single layer should be of size {(self.num_heads)}, but is {shape_list(layer_head_mask)}", + message=( + f"Head mask for a single layer should be of size {(self.num_heads)}, but is" + f" {shape_list(layer_head_mask)}" + ), ) attn_weights = tf.reshape(layer_head_mask, (1, -1, 1, 1)) * tf.reshape( @@ -310,7 +319,10 @@ class TFMarianAttention(tf.keras.layers.Layer): tf.debugging.assert_equal( shape_list(attn_output), [bsz * self.num_heads, tgt_len, self.head_dim], - message=f"`attn_output` should be of size {(bsz, self.num_heads, tgt_len, self.head_dim)}, but is {shape_list(attn_output)}", + message=( + f"`attn_output` should be of size {(bsz, self.num_heads, tgt_len, self.head_dim)}, but is" + f" {shape_list(attn_output)}" + ), ) attn_output = tf.transpose( @@ -784,7 +796,10 @@ class TFMarianEncoder(tf.keras.layers.Layer): tf.debugging.assert_equal( shape_list(head_mask)[0], len(self.layers), - message=f"The head_mask should be specified for {len(self.layers)} layers, but it is for {shape_list(head_mask)[0]}.", + message=( + f"The head_mask should be specified for {len(self.layers)} layers, but it is for" + f" {shape_list(head_mask)[0]}." + ), ) # encoder layers @@ -983,7 +998,10 @@ class TFMarianDecoder(tf.keras.layers.Layer): tf.debugging.assert_equal( shape_list(attn_mask)[0], len(self.layers), - message=f"The {attn_name} should be specified for {len(self.layers)} layers, but it is for {shape_list(attn_mask)[0]}.", + message=( + f"The {attn_name} should be specified for {len(self.layers)} layers, but it is for" + f" {shape_list(attn_mask)[0]}." + ), ) for idx, decoder_layer in enumerate(self.layers): diff --git a/src/transformers/models/marian/tokenization_marian.py b/src/transformers/models/marian/tokenization_marian.py index 3579d5dffa..62f145e7b7 100644 --- a/src/transformers/models/marian/tokenization_marian.py +++ b/src/transformers/models/marian/tokenization_marian.py @@ -47,7 +47,9 @@ PRETRAINED_VOCAB_FILES_MAP = { "Helsinki-NLP/opus-mt-en-de": "https://huggingface.co/Helsinki-NLP/opus-mt-en-de/resolve/main/vocab.json" }, "tokenizer_config_file": { - "Helsinki-NLP/opus-mt-en-de": "https://huggingface.co/Helsinki-NLP/opus-mt-en-de/resolve/main/tokenizer_config.json" + "Helsinki-NLP/opus-mt-en-de": ( + "https://huggingface.co/Helsinki-NLP/opus-mt-en-de/resolve/main/tokenizer_config.json" + ) }, } diff --git a/src/transformers/models/maskformer/configuration_maskformer.py b/src/transformers/models/maskformer/configuration_maskformer.py index 50ad6880ad..ab68de3f04 100644 --- a/src/transformers/models/maskformer/configuration_maskformer.py +++ b/src/transformers/models/maskformer/configuration_maskformer.py @@ -24,7 +24,9 @@ from ..swin import SwinConfig MASKFORMER_PRETRAINED_CONFIG_ARCHIVE_MAP = { - "facebook/maskformer-swin-base-ade": "https://huggingface.co/facebook/maskformer-swin-base-ade/blob/main/config.json" + "facebook/maskformer-swin-base-ade": ( + "https://huggingface.co/facebook/maskformer-swin-base-ade/blob/main/config.json" + ) # See all MaskFormer models at https://huggingface.co/models?filter=maskformer } @@ -130,7 +132,8 @@ class MaskFormerConfig(PretrainedConfig): backbone_model_type = backbone_config.pop("model_type") if backbone_model_type not in self.backbones_supported: raise ValueError( - f"Backbone {backbone_model_type} not supported, please use one of {','.join(self.backbones_supported)}" + f"Backbone {backbone_model_type} not supported, please use one of" + f" {','.join(self.backbones_supported)}" ) backbone_config = AutoConfig.for_model(backbone_model_type, **backbone_config) @@ -141,7 +144,8 @@ class MaskFormerConfig(PretrainedConfig): decoder_type = decoder_config.pop("model_type") if decoder_type not in self.decoders_supported: raise ValueError( - f"Transformer Decoder {decoder_type} not supported, please use one of {','.join(self.decoders_supported)}" + f"Transformer Decoder {decoder_type} not supported, please use one of" + f" {','.join(self.decoders_supported)}" ) decoder_config = AutoConfig.for_model(decoder_type, **decoder_config) diff --git a/src/transformers/models/maskformer/convert_maskformer_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/maskformer/convert_maskformer_original_pytorch_checkpoint_to_pytorch.py index 045d2bc0f5..c08591e044 100644 --- a/src/transformers/models/maskformer/convert_maskformer_original_pytorch_checkpoint_to_pytorch.py +++ b/src/transformers/models/maskformer/convert_maskformer_original_pytorch_checkpoint_to_pytorch.py @@ -188,7 +188,7 @@ class OriginalMaskFormerCheckpointToOursConverter: self.config = config def pop_all(self, renamed_keys: List[Tuple[str, str]], dst_state_dict: StateDict, src_state_dict: StateDict): - for (src_key, dst_key) in renamed_keys: + for src_key, dst_key in renamed_keys: dst_state_dict[dst_key] = src_state_dict.pop(src_key) def replace_backbone(self, dst_state_dict: StateDict, src_state_dict: StateDict, config: MaskFormerConfig): @@ -643,12 +643,18 @@ if __name__ == "__main__": parser.add_argument( "--checkpoints_dir", type=Path, - help="A directory containing the model's checkpoints. The directory has to have the following structure: //.pkl", + help=( + "A directory containing the model's checkpoints. The directory has to have the following structure:" + " //.pkl" + ), ) parser.add_argument( "--configs_dir", type=Path, - help="A directory containing the model's configs, see detectron2 doc. The directory has to have the following structure: //.yaml", + help=( + "A directory containing the model's configs, see detectron2 doc. The directory has to have the following" + " structure: //.yaml" + ), ) parser.add_argument( "--pytorch_dump_folder_path", @@ -660,7 +666,10 @@ if __name__ == "__main__": "--maskformer_dir", required=True, type=Path, - help="A path to MaskFormer's original implementation directory. You can download from here: https://github.com/facebookresearch/MaskFormer", + help=( + "A path to MaskFormer's original implementation directory. You can download from here:" + " https://github.com/facebookresearch/MaskFormer" + ), ) args = parser.parse_args() diff --git a/src/transformers/models/maskformer/feature_extraction_maskformer.py b/src/transformers/models/maskformer/feature_extraction_maskformer.py index 5e466f2ddb..3a5fd49d80 100644 --- a/src/transformers/models/maskformer/feature_extraction_maskformer.py +++ b/src/transformers/models/maskformer/feature_extraction_maskformer.py @@ -253,8 +253,9 @@ class MaskFormerFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionM if not valid_segmentation_maps: raise ValueError( - "Segmentation maps must of type `PIL.Image.Image`, `np.ndarray` or `torch.Tensor` (single example)," - "`List[PIL.Image.Image]`, `List[np.ndarray]` or `List[torch.Tensor]` (batch of examples)." + "Segmentation maps must of type `PIL.Image.Image`, `np.ndarray` or `torch.Tensor` (single" + " example),`List[PIL.Image.Image]`, `List[np.ndarray]` or `List[torch.Tensor]` (batch of" + " examples)." ) is_batched = bool( @@ -591,7 +592,7 @@ class MaskFormerFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionM # mask probs has shape [BATCH, QUERIES, HEIGHT, WIDTH] # now, we need to iterate over the batch size to correctly process the segmentation we got from the queries using our thresholds. Even if the original predicted masks have the same shape across the batch, they won't after thresholding so batch-wise operations are impossible results: List[Dict[str, Tensor]] = [] - for (mask_probs, pred_scores, pred_labels) in zip(mask_probs, pred_scores, pred_labels): + for mask_probs, pred_scores, pred_labels in zip(mask_probs, pred_scores, pred_labels): mask_probs, pred_scores, pred_labels = self.remove_low_and_no_objects( mask_probs, pred_scores, pred_labels, object_mask_threshold, num_labels ) diff --git a/src/transformers/models/maskformer/modeling_maskformer.py b/src/transformers/models/maskformer/modeling_maskformer.py index 0d3538b04f..478d4d4103 100644 --- a/src/transformers/models/maskformer/modeling_maskformer.py +++ b/src/transformers/models/maskformer/modeling_maskformer.py @@ -664,7 +664,7 @@ class MaskFormerSwinSelfAttention(nn.Module): super().__init__() if dim % num_heads != 0: raise ValueError( - f"The hidden size ({dim}) is not a multiple of the number of attention " f"heads ({num_heads})" + f"The hidden size ({dim}) is not a multiple of the number of attention heads ({num_heads})" ) self.num_attention_heads = num_heads @@ -1194,7 +1194,8 @@ class DetrAttention(nn.Module): self.head_dim = embed_dim // num_heads if self.head_dim * num_heads != self.embed_dim: raise ValueError( - f"embed_dim must be divisible by num_heads (got `embed_dim`: {self.embed_dim} and `num_heads`: {num_heads})." + f"embed_dim must be divisible by num_heads (got `embed_dim`: {self.embed_dim} and `num_heads`:" + f" {num_heads})." ) self.scaling = self.head_dim**-0.5 @@ -1258,7 +1259,8 @@ class DetrAttention(nn.Module): if attn_weights.size() != (bsz * self.num_heads, tgt_len, src_len): raise ValueError( - f"Attention weights should be of size {(bsz * self.num_heads, tgt_len, src_len)}, but is {attn_weights.size()}" + f"Attention weights should be of size {(bsz * self.num_heads, tgt_len, src_len)}, but is" + f" {attn_weights.size()}" ) if attention_mask is not None: @@ -1287,7 +1289,8 @@ class DetrAttention(nn.Module): if attn_output.size() != (bsz * self.num_heads, tgt_len, self.head_dim): raise ValueError( - f"`attn_output` should be of size {(bsz, self.num_heads, tgt_len, self.head_dim)}, but is {attn_output.size()}" + f"`attn_output` should be of size {(bsz, self.num_heads, tgt_len, self.head_dim)}, but is" + f" {attn_output.size()}" ) attn_output = attn_output.view(bsz, self.num_heads, tgt_len, self.head_dim) diff --git a/src/transformers/models/mbart/modeling_mbart.py b/src/transformers/models/mbart/modeling_mbart.py index 78d094922b..72ee66a45b 100755 --- a/src/transformers/models/mbart/modeling_mbart.py +++ b/src/transformers/models/mbart/modeling_mbart.py @@ -236,7 +236,8 @@ class MBartAttention(nn.Module): if attn_weights.size() != (bsz * self.num_heads, tgt_len, src_len): raise ValueError( - f"Attention weights should be of size {(bsz * self.num_heads, tgt_len, src_len)}, but is {attn_weights.size()}" + f"Attention weights should be of size {(bsz * self.num_heads, tgt_len, src_len)}, but is" + f" {attn_weights.size()}" ) if attention_mask is not None: @@ -252,7 +253,8 @@ class MBartAttention(nn.Module): if layer_head_mask is not None: if layer_head_mask.size() != (self.num_heads,): raise ValueError( - f"Head mask for a single layer should be of size {(self.num_heads,)}, but is {layer_head_mask.size()}" + f"Head mask for a single layer should be of size {(self.num_heads,)}, but is" + f" {layer_head_mask.size()}" ) attn_weights = layer_head_mask.view(1, -1, 1, 1) * attn_weights.view(bsz, self.num_heads, tgt_len, src_len) attn_weights = attn_weights.view(bsz * self.num_heads, tgt_len, src_len) @@ -273,7 +275,8 @@ class MBartAttention(nn.Module): if attn_output.size() != (bsz * self.num_heads, tgt_len, self.head_dim): raise ValueError( - f"`attn_output` should be of size {(bsz, self.num_heads, tgt_len, self.head_dim)}, but is {attn_output.size()}" + f"`attn_output` should be of size {(bsz, self.num_heads, tgt_len, self.head_dim)}, but is" + f" {attn_output.size()}" ) attn_output = attn_output.view(bsz, self.num_heads, tgt_len, self.head_dim) @@ -808,7 +811,8 @@ class MBartEncoder(MBartPreTrainedModel): if head_mask is not None: if head_mask.size()[0] != len(self.layers): raise ValueError( - f"The head_mask should be specified for {len(self.layers)} layers, but it is for {head_mask.size()[0]}." + f"The head_mask should be specified for {len(self.layers)} layers, but it is for" + f" {head_mask.size()[0]}." ) for idx, encoder_layer in enumerate(self.layers): if output_hidden_states: @@ -1048,7 +1052,8 @@ class MBartDecoder(MBartPreTrainedModel): if attn_mask is not None: if attn_mask.size()[0] != len(self.layers): raise ValueError( - f"The `{mask_name}` should be specified for {len(self.layers)} layers, but it is for {head_mask.size()[0]}." + f"The `{mask_name}` should be specified for {len(self.layers)} layers, but it is for" + f" {head_mask.size()[0]}." ) for idx, decoder_layer in enumerate(self.layers): # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description) diff --git a/src/transformers/models/mbart/modeling_tf_mbart.py b/src/transformers/models/mbart/modeling_tf_mbart.py index b7de8be6e6..fa19d711a3 100644 --- a/src/transformers/models/mbart/modeling_tf_mbart.py +++ b/src/transformers/models/mbart/modeling_tf_mbart.py @@ -229,7 +229,10 @@ class TFMBartAttention(tf.keras.layers.Layer): tf.debugging.assert_equal( shape_list(attn_weights), [bsz * self.num_heads, tgt_len, src_len], - message=f"Attention weights should be of size {(bsz * self.num_heads, tgt_len, src_len)}, but is {shape_list(attn_weights)}", + message=( + f"Attention weights should be of size {(bsz * self.num_heads, tgt_len, src_len)}, but is" + f" {shape_list(attn_weights)}" + ), ) if attention_mask is not None: @@ -239,7 +242,10 @@ class TFMBartAttention(tf.keras.layers.Layer): tf.debugging.assert_equal( shape_list(attention_mask), [bsz, 1, tgt_len, src_len], - message=f"Attention mask should be of size {(bsz, 1, tgt_len, src_len)}, but is {shape_list(attention_mask)}", + message=( + f"Attention mask should be of size {(bsz, 1, tgt_len, src_len)}, but is" + f" {shape_list(attention_mask)}" + ), ) attention_mask = tf.cast(attention_mask, dtype=attn_weights.dtype) @@ -255,7 +261,10 @@ class TFMBartAttention(tf.keras.layers.Layer): tf.debugging.assert_equal( shape_list(layer_head_mask), [self.num_heads], - message=f"Head mask for a single layer should be of size {(self.num_heads)}, but is {shape_list(layer_head_mask)}", + message=( + f"Head mask for a single layer should be of size {(self.num_heads)}, but is" + f" {shape_list(layer_head_mask)}" + ), ) attn_weights = tf.reshape(layer_head_mask, (1, -1, 1, 1)) * tf.reshape( @@ -272,7 +281,10 @@ class TFMBartAttention(tf.keras.layers.Layer): tf.debugging.assert_equal( shape_list(attn_output), [bsz * self.num_heads, tgt_len, self.head_dim], - message=f"`attn_output` should be of size {(bsz, self.num_heads, tgt_len, self.head_dim)}, but is {shape_list(attn_output)}", + message=( + f"`attn_output` should be of size {(bsz, self.num_heads, tgt_len, self.head_dim)}, but is" + f" {shape_list(attn_output)}" + ), ) attn_output = tf.transpose( @@ -763,7 +775,10 @@ class TFMBartEncoder(tf.keras.layers.Layer): tf.debugging.assert_equal( shape_list(head_mask)[0], len(self.layers), - message=f"The head_mask should be specified for {len(self.layers)} layers, but it is for {shape_list(head_mask)[0]}.", + message=( + f"The head_mask should be specified for {len(self.layers)} layers, but it is for" + f" {shape_list(head_mask)[0]}." + ), ) # encoder layers @@ -969,7 +984,10 @@ class TFMBartDecoder(tf.keras.layers.Layer): tf.debugging.assert_equal( shape_list(attn_mask)[0], len(self.layers), - message=f"The {attn_mask_name} should be specified for {len(self.layers)} layers, but it is for {shape_list(attn_mask)[0]}.", + message=( + f"The {attn_mask_name} should be specified for {len(self.layers)} layers, but it is for" + f" {shape_list(attn_mask)[0]}." + ), ) for idx, decoder_layer in enumerate(self.layers): diff --git a/src/transformers/models/mbart/tokenization_mbart.py b/src/transformers/models/mbart/tokenization_mbart.py index d6ea6260ae..2517dfb584 100644 --- a/src/transformers/models/mbart/tokenization_mbart.py +++ b/src/transformers/models/mbart/tokenization_mbart.py @@ -32,8 +32,12 @@ VOCAB_FILES_NAMES = {"vocab_file": "sentencepiece.bpe.model"} PRETRAINED_VOCAB_FILES_MAP = { "vocab_file": { - "facebook/mbart-large-en-ro": "https://huggingface.co/facebook/mbart-large-en-ro/resolve/main/sentencepiece.bpe.model", - "facebook/mbart-large-cc25": "https://huggingface.co/facebook/mbart-large-cc25/resolve/main/sentencepiece.bpe.model", + "facebook/mbart-large-en-ro": ( + "https://huggingface.co/facebook/mbart-large-en-ro/resolve/main/sentencepiece.bpe.model" + ), + "facebook/mbart-large-cc25": ( + "https://huggingface.co/facebook/mbart-large-cc25/resolve/main/sentencepiece.bpe.model" + ), } } diff --git a/src/transformers/models/mbart/tokenization_mbart_fast.py b/src/transformers/models/mbart/tokenization_mbart_fast.py index a172d37913..52902e3a40 100644 --- a/src/transformers/models/mbart/tokenization_mbart_fast.py +++ b/src/transformers/models/mbart/tokenization_mbart_fast.py @@ -38,8 +38,12 @@ VOCAB_FILES_NAMES = {"vocab_file": "sentencepiece.bpe.model", "tokenizer_file": PRETRAINED_VOCAB_FILES_MAP = { "vocab_file": { - "facebook/mbart-large-en-ro": "https://huggingface.co/facebook/mbart-large-en-ro/resolve/main/sentencepiece.bpe.model", - "facebook/mbart-large-cc25": "https://huggingface.co/facebook/mbart-large-cc25/resolve/main/sentencepiece.bpe.model", + "facebook/mbart-large-en-ro": ( + "https://huggingface.co/facebook/mbart-large-en-ro/resolve/main/sentencepiece.bpe.model" + ), + "facebook/mbart-large-cc25": ( + "https://huggingface.co/facebook/mbart-large-cc25/resolve/main/sentencepiece.bpe.model" + ), }, "tokenizer_file": { "facebook/mbart-large-en-ro": "https://huggingface.co/facebook/mbart-large-en-ro/resolve/main/tokenizer.json", diff --git a/src/transformers/models/mbart50/tokenization_mbart50.py b/src/transformers/models/mbart50/tokenization_mbart50.py index c7e53c6149..145a546c18 100644 --- a/src/transformers/models/mbart50/tokenization_mbart50.py +++ b/src/transformers/models/mbart50/tokenization_mbart50.py @@ -32,7 +32,9 @@ VOCAB_FILES_NAMES = {"vocab_file": "sentencepiece.bpe.model"} PRETRAINED_VOCAB_FILES_MAP = { "vocab_file": { - "facebook/mbart-large-50-one-to-many-mmt": "https://huggingface.co/facebook/mbart-large-50-one-to-many-mmt/resolve/main/sentencepiece.bpe.model", + "facebook/mbart-large-50-one-to-many-mmt": ( + "https://huggingface.co/facebook/mbart-large-50-one-to-many-mmt/resolve/main/sentencepiece.bpe.model" + ), } } diff --git a/src/transformers/models/mbart50/tokenization_mbart50_fast.py b/src/transformers/models/mbart50/tokenization_mbart50_fast.py index 97e2584a0d..28fb726c47 100644 --- a/src/transformers/models/mbart50/tokenization_mbart50_fast.py +++ b/src/transformers/models/mbart50/tokenization_mbart50_fast.py @@ -37,10 +37,14 @@ VOCAB_FILES_NAMES = {"vocab_file": "sentencepiece.bpe.model", "tokenizer_file": PRETRAINED_VOCAB_FILES_MAP = { "vocab_file": { - "facebook/mbart-large-50-one-to-many-mmt": "https://huggingface.co/facebook/mbart-large-50-one-to-many-mmt/resolve/main/sentencepiece.bpe.model", + "facebook/mbart-large-50-one-to-many-mmt": ( + "https://huggingface.co/facebook/mbart-large-50-one-to-many-mmt/resolve/main/sentencepiece.bpe.model" + ), }, "tokenizer_file": { - "facebook/mbart-large-50-one-to-many-mmt": "https://huggingface.co/facebook/mbart-large-50-one-to-many-mmt/resolve/main/tokenizer.json", + "facebook/mbart-large-50-one-to-many-mmt": ( + "https://huggingface.co/facebook/mbart-large-50-one-to-many-mmt/resolve/main/tokenizer.json" + ), }, } diff --git a/src/transformers/models/megatron_bert/modeling_megatron_bert.py b/src/transformers/models/megatron_bert/modeling_megatron_bert.py index e914822736..371782c297 100755 --- a/src/transformers/models/megatron_bert/modeling_megatron_bert.py +++ b/src/transformers/models/megatron_bert/modeling_megatron_bert.py @@ -460,7 +460,8 @@ class MegatronBertLayer(nn.Module): if self.is_decoder and encoder_hidden_states is not None: if not hasattr(self, "crossattention"): raise AttributeError( - f"If `encoder_hidden_states` are passed, {self} has to be instantiated with cross-attention layers by setting `config.add_cross_attention=True`" + f"If `encoder_hidden_states` are passed, {self} has to be instantiated with cross-attention layers" + " by setting `config.add_cross_attention=True`" ) # cross_attn cached key/values tuple is at positions 3,4 of past_key_value tuple @@ -1426,7 +1427,8 @@ class MegatronBertForNextSentencePrediction(MegatronBertPreTrainedModel): if "next_sentence_label" in kwargs: warnings.warn( - "The `next_sentence_label` argument is deprecated and will be removed in a future version, use `labels` instead.", + "The `next_sentence_label` argument is deprecated and will be removed in a future version, use" + " `labels` instead.", FutureWarning, ) labels = kwargs.pop("next_sentence_label") diff --git a/src/transformers/models/mluke/convert_mluke_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/mluke/convert_mluke_original_pytorch_checkpoint_to_pytorch.py index c75a710cee..9d61c3bc8e 100644 --- a/src/transformers/models/mluke/convert_mluke_original_pytorch_checkpoint_to_pytorch.py +++ b/src/transformers/models/mluke/convert_mluke_original_pytorch_checkpoint_to_pytorch.py @@ -153,7 +153,8 @@ def convert_luke_checkpoint(checkpoint_path, metadata_path, entity_vocab_path, p if not (outputs.entity_last_hidden_state.shape == expected_shape): raise ValueError( - f"Outputs.entity_last_hidden_state.shape is {outputs.entity_last_hidden_state.shape}, Expected shape is {expected_shape}" + f"Outputs.entity_last_hidden_state.shape is {outputs.entity_last_hidden_state.shape}, Expected shape is" + f" {expected_shape}" ) if not torch.allclose(outputs.entity_last_hidden_state[0, :3, :3], expected_slice, atol=1e-4): raise ValueError diff --git a/src/transformers/models/mluke/tokenization_mluke.py b/src/transformers/models/mluke/tokenization_mluke.py index 24a6304fc1..161c6b8d0f 100644 --- a/src/transformers/models/mluke/tokenization_mluke.py +++ b/src/transformers/models/mluke/tokenization_mluke.py @@ -342,7 +342,8 @@ class MLukeTokenizer(PreTrainedTokenizer): self.max_entity_length = 2 else: raise ValueError( - f"Task {task} not supported. Select task from ['entity_classification', 'entity_pair_classification', 'entity_span_classification'] only." + f"Task {task} not supported. Select task from ['entity_classification', 'entity_pair_classification'," + " 'entity_span_classification'] only." ) self.max_mention_length = max_mention_length @@ -707,7 +708,7 @@ class MLukeTokenizer(PreTrainedTokenizer): raise ValueError("entity_spans should be given as a list") elif len(entity_spans) > 0 and not isinstance(entity_spans[0], tuple): raise ValueError( - "entity_spans should be given as a list of tuples " "containing the start and end character indices" + "entity_spans should be given as a list of tuples containing the start and end character indices" ) if entities is not None: @@ -1119,7 +1120,8 @@ class MLukeTokenizer(PreTrainedTokenizer): if num_invalid_entities != 0: logger.warning( - f"{num_invalid_entities} entities are ignored because their entity spans are invalid due to the truncation of input tokens" + f"{num_invalid_entities} entities are ignored because their entity spans are invalid due to the" + " truncation of input tokens" ) if truncation_strategy != TruncationStrategy.DO_NOT_TRUNCATE and total_entity_len > max_entity_length: @@ -1144,7 +1146,7 @@ class MLukeTokenizer(PreTrainedTokenizer): entity_position_ids = [] entity_start_positions = [] entity_end_positions = [] - for (token_spans, offset) in ( + for token_spans, offset in ( (valid_entity_token_spans, entity_token_offset), (valid_pair_entity_token_spans, pair_entity_token_offset), ): @@ -1294,7 +1296,7 @@ class MLukeTokenizer(PreTrainedTokenizer): else: raise ValueError( f"type of {first_element} unknown: {type(first_element)}. " - f"Should be one of a python, numpy, pytorch or tensorflow object." + "Should be one of a python, numpy, pytorch or tensorflow object." ) for key, value in encoded_inputs.items(): diff --git a/src/transformers/models/mobilebert/convert_mobilebert_original_tf_checkpoint_to_pytorch.py b/src/transformers/models/mobilebert/convert_mobilebert_original_tf_checkpoint_to_pytorch.py index 5c03331eb3..022a9d036c 100644 --- a/src/transformers/models/mobilebert/convert_mobilebert_original_tf_checkpoint_to_pytorch.py +++ b/src/transformers/models/mobilebert/convert_mobilebert_original_tf_checkpoint_to_pytorch.py @@ -46,8 +46,10 @@ if __name__ == "__main__": default=None, type=str, required=True, - help="The config json file corresponding to the pre-trained MobileBERT model. \n" - "This specifies the model architecture.", + help=( + "The config json file corresponding to the pre-trained MobileBERT model. \n" + "This specifies the model architecture." + ), ) parser.add_argument( "--pytorch_dump_path", default=None, type=str, required=True, help="Path to the output PyTorch model." diff --git a/src/transformers/models/mobilebert/modeling_mobilebert.py b/src/transformers/models/mobilebert/modeling_mobilebert.py index 6d2b2d3ce2..4e4b0d963b 100644 --- a/src/transformers/models/mobilebert/modeling_mobilebert.py +++ b/src/transformers/models/mobilebert/modeling_mobilebert.py @@ -1188,7 +1188,8 @@ class MobileBertForNextSentencePrediction(MobileBertPreTrainedModel): if "next_sentence_label" in kwargs: warnings.warn( - "The `next_sentence_label` argument is deprecated and will be removed in a future version, use `labels` instead.", + "The `next_sentence_label` argument is deprecated and will be removed in a future version, use" + " `labels` instead.", FutureWarning, ) labels = kwargs.pop("next_sentence_label") diff --git a/src/transformers/models/mpnet/tokenization_mpnet.py b/src/transformers/models/mpnet/tokenization_mpnet.py index f092e6a311..713a528d55 100644 --- a/src/transformers/models/mpnet/tokenization_mpnet.py +++ b/src/transformers/models/mpnet/tokenization_mpnet.py @@ -175,8 +175,8 @@ class MPNetTokenizer(PreTrainedTokenizer): if not os.path.isfile(vocab_file): raise ValueError( - f"Can't find a vocabulary file at path '{vocab_file}'. To load the vocabulary from a Google pretrained " - "model use `tokenizer = AutoTokenizer.from_pretrained(PRETRAINED_MODEL_NAME)`" + f"Can't find a vocabulary file at path '{vocab_file}'. To load the vocabulary from a Google pretrained" + " model use `tokenizer = AutoTokenizer.from_pretrained(PRETRAINED_MODEL_NAME)`" ) self.vocab = load_vocab(vocab_file) self.ids_to_tokens = collections.OrderedDict([(ids, tok) for tok, ids in self.vocab.items()]) diff --git a/src/transformers/models/openai/convert_openai_original_tf_checkpoint_to_pytorch.py b/src/transformers/models/openai/convert_openai_original_tf_checkpoint_to_pytorch.py index b57f2dd033..1b101aea0c 100755 --- a/src/transformers/models/openai/convert_openai_original_tf_checkpoint_to_pytorch.py +++ b/src/transformers/models/openai/convert_openai_original_tf_checkpoint_to_pytorch.py @@ -64,8 +64,10 @@ if __name__ == "__main__": "--openai_config_file", default="", type=str, - help="An optional config json file corresponding to the pre-trained OpenAI model. \n" - "This specifies the model architecture.", + help=( + "An optional config json file corresponding to the pre-trained OpenAI model. \n" + "This specifies the model architecture." + ), ) args = parser.parse_args() convert_openai_checkpoint_to_pytorch( diff --git a/src/transformers/models/openai/modeling_openai.py b/src/transformers/models/openai/modeling_openai.py index 2262db9aa8..f513678137 100644 --- a/src/transformers/models/openai/modeling_openai.py +++ b/src/transformers/models/openai/modeling_openai.py @@ -81,12 +81,14 @@ def load_tf_weights_in_openai_gpt(model, config, openai_checkpoint_folder_path): # Check that the token and position embeddings weight dimensions map those of the init parameters. if model.tokens_embed.weight.shape != init_params[1].shape: raise ValueError( - f"tokens_embed.weight.shape: {model.tokens_embed.weight.shape} does not match init_param[1].shape: {init_params[1].shape}" + f"tokens_embed.weight.shape: {model.tokens_embed.weight.shape} does not match init_param[1].shape:" + f" {init_params[1].shape}" ) if model.positions_embed.weight.shape != init_params[0].shape: raise ValueError( - f"positions_embed.weight.shape: {model.positions_embed.weight.shape} does not match init_param[0].shape: {init_params[0].shape}" + f"positions_embed.weight.shape: {model.positions_embed.weight.shape} does not match init_param[0].shape:" + f" {init_params[0].shape}" ) model.tokens_embed.weight.data = torch.from_numpy(init_params[1]) @@ -812,7 +814,7 @@ class OpenAIGPTForSequenceClassification(OpenAIGPTPreTrainedModel): sequence_lengths = -1 logger.warning( f"{self.__class__.__name__} will not detect padding tokens in `inputs_embeds`. Results may be " - f"unexpected if using padding tokens in conjunction with `inputs_embeds.`" + "unexpected if using padding tokens in conjunction with `inputs_embeds.`" ) pooled_logits = logits[range(batch_size), sequence_lengths] diff --git a/src/transformers/models/openai/modeling_tf_openai.py b/src/transformers/models/openai/modeling_tf_openai.py index 24a7935eb0..528494836a 100644 --- a/src/transformers/models/openai/modeling_tf_openai.py +++ b/src/transformers/models/openai/modeling_tf_openai.py @@ -851,7 +851,7 @@ class TFOpenAIGPTForSequenceClassification(TFOpenAIGPTPreTrainedModel, TFSequenc sequence_lengths = -1 logger.warning( f"{self.__class__.__name__} will not detect padding tokens in `inputs_embeds`. Results may be " - f"unexpected if using padding tokens in conjunction with `inputs_embeds.`" + "unexpected if using padding tokens in conjunction with `inputs_embeds.`" ) loss = None diff --git a/src/transformers/models/opt/convert_opt_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/opt/convert_opt_original_pytorch_checkpoint_to_pytorch.py index 97a434eb46..5992dc7e9a 100644 --- a/src/transformers/models/opt/convert_opt_original_pytorch_checkpoint_to_pytorch.py +++ b/src/transformers/models/opt/convert_opt_original_pytorch_checkpoint_to_pytorch.py @@ -82,7 +82,10 @@ if __name__ == "__main__": parser.add_argument( "--fairseq_path", type=str, - help="path to fairseq checkpoint in correct format. You can find all checkpoints in the correct format here: https://huggingface.co/models?other=opt_metasq", + help=( + "path to fairseq checkpoint in correct format. You can find all checkpoints in the correct format here:" + " https://huggingface.co/models?other=opt_metasq" + ), ) parser.add_argument("--pytorch_dump_folder_path", default=None, type=str, help="Path to the output PyTorch model.") parser.add_argument("--hf_config", default=None, type=str, help="Define HF config.") diff --git a/src/transformers/models/opt/modeling_opt.py b/src/transformers/models/opt/modeling_opt.py index f0e9770c6c..7cb171425c 100644 --- a/src/transformers/models/opt/modeling_opt.py +++ b/src/transformers/models/opt/modeling_opt.py @@ -227,7 +227,8 @@ class OPTAttention(nn.Module): if attn_weights.size() != (bsz * self.num_heads, tgt_len, src_len): raise ValueError( - f"Attention weights should be of size {(bsz * self.num_heads, tgt_len, src_len)}, but is {attn_weights.size()}" + f"Attention weights should be of size {(bsz * self.num_heads, tgt_len, src_len)}, but is" + f" {attn_weights.size()}" ) if attention_mask is not None: @@ -243,7 +244,8 @@ class OPTAttention(nn.Module): if layer_head_mask is not None: if layer_head_mask.size() != (self.num_heads,): raise ValueError( - f"Head mask for a single layer should be of size {(self.num_heads,)}, but is {layer_head_mask.size()}" + f"Head mask for a single layer should be of size {(self.num_heads,)}, but is" + f" {layer_head_mask.size()}" ) attn_weights = layer_head_mask.view(1, -1, 1, 1) * attn_weights.view(bsz, self.num_heads, tgt_len, src_len) attn_weights = attn_weights.view(bsz * self.num_heads, tgt_len, src_len) @@ -264,7 +266,8 @@ class OPTAttention(nn.Module): if attn_output.size() != (bsz * self.num_heads, tgt_len, self.head_dim): raise ValueError( - f"`attn_output` should be of size {(bsz, self.num_heads, tgt_len, self.head_dim)}, but is {attn_output.size()}" + f"`attn_output` should be of size {(bsz, self.num_heads, tgt_len, self.head_dim)}, but is" + f" {attn_output.size()}" ) attn_output = attn_output.view(bsz, self.num_heads, tgt_len, self.head_dim) @@ -679,7 +682,8 @@ class OPTDecoder(OPTPreTrainedModel): if attn_mask is not None: if attn_mask.size()[0] != (len(self.layers)): raise ValueError( - f"The `{mask_name}` should be specified for {len(self.layers)} layers, but it is for {head_mask.size()[0]}." + f"The `{mask_name}` should be specified for {len(self.layers)} layers, but it is for" + f" {head_mask.size()[0]}." ) for idx, decoder_layer in enumerate(self.layers): diff --git a/src/transformers/models/pegasus/modeling_pegasus.py b/src/transformers/models/pegasus/modeling_pegasus.py index 2f79fa93fe..99ff97b269 100755 --- a/src/transformers/models/pegasus/modeling_pegasus.py +++ b/src/transformers/models/pegasus/modeling_pegasus.py @@ -233,7 +233,8 @@ class PegasusAttention(nn.Module): if attn_weights.size() != (bsz * self.num_heads, tgt_len, src_len): raise ValueError( - f"Attention weights should be of size {(bsz * self.num_heads, tgt_len, src_len)}, but is {attn_weights.size()}" + f"Attention weights should be of size {(bsz * self.num_heads, tgt_len, src_len)}, but is" + f" {attn_weights.size()}" ) if attention_mask is not None: @@ -249,7 +250,8 @@ class PegasusAttention(nn.Module): if layer_head_mask is not None: if layer_head_mask.size() != (self.num_heads,): raise ValueError( - f"Head mask for a single layer should be of size {(self.num_heads,)}, but is {layer_head_mask.size()}" + f"Head mask for a single layer should be of size {(self.num_heads,)}, but is" + f" {layer_head_mask.size()}" ) attn_weights = layer_head_mask.view(1, -1, 1, 1) * attn_weights.view(bsz, self.num_heads, tgt_len, src_len) attn_weights = attn_weights.view(bsz * self.num_heads, tgt_len, src_len) @@ -270,7 +272,8 @@ class PegasusAttention(nn.Module): if attn_output.size() != (bsz * self.num_heads, tgt_len, self.head_dim): raise ValueError( - f"`attn_output` should be of size {(bsz, self.num_heads, tgt_len, self.head_dim)}, but is {attn_output.size()}" + f"`attn_output` should be of size {(bsz, self.num_heads, tgt_len, self.head_dim)}, but is" + f" {attn_output.size()}" ) attn_output = attn_output.view(bsz, self.num_heads, tgt_len, self.head_dim) @@ -775,7 +778,8 @@ class PegasusEncoder(PegasusPreTrainedModel): if head_mask is not None: if head_mask.size()[0] != len(self.layers): raise ValueError( - f"The head_mask should be specified for {len(self.layers)} layers, but it is for {head_mask.size()[0]}." + f"The head_mask should be specified for {len(self.layers)} layers, but it is for" + f" {head_mask.size()[0]}." ) for idx, encoder_layer in enumerate(self.layers): if output_hidden_states: @@ -1043,7 +1047,8 @@ class PegasusDecoder(PegasusPreTrainedModel): if attn_mask is not None: if attn_mask.size()[0] != len(self.layers): raise ValueError( - f"The `{mask_name}` should be specified for {len(self.layers)} layers, but it is for {head_mask.size()[0]}." + f"The `{mask_name}` should be specified for {len(self.layers)} layers, but it is for" + f" {head_mask.size()[0]}." ) for idx, decoder_layer in enumerate(self.layers): # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description) diff --git a/src/transformers/models/pegasus/modeling_tf_pegasus.py b/src/transformers/models/pegasus/modeling_tf_pegasus.py index 2a1b7994b6..2c5696f94d 100644 --- a/src/transformers/models/pegasus/modeling_tf_pegasus.py +++ b/src/transformers/models/pegasus/modeling_tf_pegasus.py @@ -268,7 +268,10 @@ class TFPegasusAttention(tf.keras.layers.Layer): tf.debugging.assert_equal( shape_list(attn_weights), [bsz * self.num_heads, tgt_len, src_len], - message=f"Attention weights should be of size {(bsz * self.num_heads, tgt_len, src_len)}, but is {shape_list(attn_weights)}", + message=( + f"Attention weights should be of size {(bsz * self.num_heads, tgt_len, src_len)}, but is" + f" {shape_list(attn_weights)}" + ), ) if attention_mask is not None: @@ -278,7 +281,10 @@ class TFPegasusAttention(tf.keras.layers.Layer): tf.debugging.assert_equal( shape_list(attention_mask), [bsz, 1, tgt_len, src_len], - message=f"Attention mask should be of size {(bsz, 1, tgt_len, src_len)}, but is {shape_list(attention_mask)}", + message=( + f"Attention mask should be of size {(bsz, 1, tgt_len, src_len)}, but is" + f" {shape_list(attention_mask)}" + ), ) attention_mask = tf.cast(attention_mask, dtype=attn_weights.dtype) @@ -294,7 +300,10 @@ class TFPegasusAttention(tf.keras.layers.Layer): tf.debugging.assert_equal( shape_list(layer_head_mask), [self.num_heads], - message=f"Head mask for a single layer should be of size {(self.num_heads)}, but is {shape_list(layer_head_mask)}", + message=( + f"Head mask for a single layer should be of size {(self.num_heads)}, but is" + f" {shape_list(layer_head_mask)}" + ), ) attn_weights = tf.reshape(layer_head_mask, (1, -1, 1, 1)) * tf.reshape( @@ -311,7 +320,10 @@ class TFPegasusAttention(tf.keras.layers.Layer): tf.debugging.assert_equal( shape_list(attn_output), [bsz * self.num_heads, tgt_len, self.head_dim], - message=f"`attn_output` should be of size {(bsz, self.num_heads, tgt_len, self.head_dim)}, but is {shape_list(attn_output)}", + message=( + f"`attn_output` should be of size {(bsz, self.num_heads, tgt_len, self.head_dim)}, but is" + f" {shape_list(attn_output)}" + ), ) attn_output = tf.transpose( @@ -787,7 +799,10 @@ class TFPegasusEncoder(tf.keras.layers.Layer): tf.debugging.assert_equal( shape_list(head_mask)[0], len(self.layers), - message=f"The head_mask should be specified for {len(self.layers)} layers, but it is for {shape_list(head_mask)[0]}.", + message=( + f"The head_mask should be specified for {len(self.layers)} layers, but it is for" + f" {shape_list(head_mask)[0]}." + ), ) # encoder layers @@ -989,7 +1004,10 @@ class TFPegasusDecoder(tf.keras.layers.Layer): tf.debugging.assert_equal( shape_list(attn_mask)[0], len(self.layers), - message=f"The {attn_mask_name} should be specified for {len(self.layers)} layers, but it is for {shape_list(attn_mask)[0]}.", + message=( + f"The {attn_mask_name} should be specified for {len(self.layers)} layers, but it is for" + f" {shape_list(attn_mask)[0]}." + ), ) for idx, decoder_layer in enumerate(self.layers): diff --git a/src/transformers/models/pegasus/tokenization_pegasus.py b/src/transformers/models/pegasus/tokenization_pegasus.py index a6a9167e66..b4d1cdc198 100644 --- a/src/transformers/models/pegasus/tokenization_pegasus.py +++ b/src/transformers/models/pegasus/tokenization_pegasus.py @@ -119,7 +119,8 @@ class PegasusTokenizer(PreTrainedTokenizer): if additional_special_tokens is not None: if not isinstance(additional_special_tokens, list): raise TypeError( - f"additional_special_tokens should be of type {type(list)}, but is {type(additional_special_tokens)}" + f"additional_special_tokens should be of type {type(list)}, but is" + f" {type(additional_special_tokens)}" ) additional_special_tokens_extended = ( @@ -134,7 +135,8 @@ class PegasusTokenizer(PreTrainedTokenizer): if len(set(additional_special_tokens_extended)) != len(additional_special_tokens_extended): raise ValueError( - f"Please make sure that the provided additional_special_tokens do not contain an incorrectly shifted list of tokens. Found {additional_special_tokens_extended}." + "Please make sure that the provided additional_special_tokens do not contain an incorrectly" + f" shifted list of tokens. Found {additional_special_tokens_extended}." ) additional_special_tokens = additional_special_tokens_extended else: diff --git a/src/transformers/models/pegasus/tokenization_pegasus_fast.py b/src/transformers/models/pegasus/tokenization_pegasus_fast.py index 14399988f0..22c6018385 100644 --- a/src/transformers/models/pegasus/tokenization_pegasus_fast.py +++ b/src/transformers/models/pegasus/tokenization_pegasus_fast.py @@ -115,7 +115,8 @@ class PegasusTokenizerFast(PreTrainedTokenizerFast): if additional_special_tokens is not None: if not isinstance(additional_special_tokens, list): raise TypeError( - f"additional_special_tokens should be of type {type(list)}, but is {type(additional_special_tokens)}" + f"additional_special_tokens should be of type {type(list)}, but is" + f" {type(additional_special_tokens)}" ) additional_special_tokens_extended = ( @@ -130,7 +131,8 @@ class PegasusTokenizerFast(PreTrainedTokenizerFast): if len(set(additional_special_tokens_extended)) != len(additional_special_tokens_extended): raise ValueError( - f"Please make sure that the provided additional_special_tokens do not contain an incorrectly shifted list of tokens. Found {additional_special_tokens_extended}." + "Please make sure that the provided additional_special_tokens do not contain an incorrectly" + f" shifted list of tokens. Found {additional_special_tokens_extended}." ) additional_special_tokens = additional_special_tokens_extended else: @@ -158,7 +160,8 @@ class PegasusTokenizerFast(PreTrainedTokenizerFast): if all_special_ids != set(range(len(self.additional_special_tokens) + 3)): raise ValueError( - f"There should be 3 special tokens: mask_token, pad_token, and eos_token + {len(self.additional_special_tokens)} additional_special_tokens, but got {all_special_ids}" + "There should be 3 special tokens: mask_token, pad_token, and eos_token +" + f" {len(self.additional_special_tokens)} additional_special_tokens, but got {all_special_ids}" ) return [1 if x in all_special_ids else 0 for x in seq] diff --git a/src/transformers/models/perceiver/modeling_perceiver.py b/src/transformers/models/perceiver/modeling_perceiver.py index 6dc1563b47..d5b66bd80b 100755 --- a/src/transformers/models/perceiver/modeling_perceiver.py +++ b/src/transformers/models/perceiver/modeling_perceiver.py @@ -864,8 +864,8 @@ class PerceiverModel(PerceiverPreTrainedModel): inputs_without_pos = None if inputs.size()[-1] != self.config.d_model: raise ValueError( - f"Last dimension of the inputs: {inputs.size()[-1]} doesn't correspond to config.d_model: {self.config.d_model}. " - "Make sure to set config.d_model appropriately." + f"Last dimension of the inputs: {inputs.size()[-1]} doesn't correspond to config.d_model:" + f" {self.config.d_model}. Make sure to set config.d_model appropriately." ) batch_size, seq_length, _ = inputs.size() diff --git a/src/transformers/models/plbart/modeling_plbart.py b/src/transformers/models/plbart/modeling_plbart.py index 97e3ec680c..8f341e6399 100755 --- a/src/transformers/models/plbart/modeling_plbart.py +++ b/src/transformers/models/plbart/modeling_plbart.py @@ -233,7 +233,8 @@ class PLBartAttention(nn.Module): if attn_weights.size() != (bsz * self.num_heads, tgt_len, src_len): raise ValueError( - f"Attention weights should be of size {(bsz * self.num_heads, tgt_len, src_len)}, but is {attn_weights.size()}" + f"Attention weights should be of size {(bsz * self.num_heads, tgt_len, src_len)}, but is" + f" {attn_weights.size()}" ) if attention_mask is not None: @@ -249,7 +250,8 @@ class PLBartAttention(nn.Module): if layer_head_mask is not None: if layer_head_mask.size() != (self.num_heads,): raise ValueError( - f"Head mask for a single layer should be of size {(self.num_heads,)}, but is {layer_head_mask.size()}" + f"Head mask for a single layer should be of size {(self.num_heads,)}, but is" + f" {layer_head_mask.size()}" ) attn_weights = layer_head_mask.view(1, -1, 1, 1) * attn_weights.view(bsz, self.num_heads, tgt_len, src_len) attn_weights = attn_weights.view(bsz * self.num_heads, tgt_len, src_len) @@ -270,7 +272,8 @@ class PLBartAttention(nn.Module): if attn_output.size() != (bsz * self.num_heads, tgt_len, self.head_dim): raise ValueError( - f"`attn_output` should be of size {(bsz, self.num_heads, tgt_len, self.head_dim)}, but is {attn_output.size()}" + f"`attn_output` should be of size {(bsz, self.num_heads, tgt_len, self.head_dim)}, but is" + f" {attn_output.size()}" ) attn_output = attn_output.view(bsz, self.num_heads, tgt_len, self.head_dim) @@ -784,7 +787,8 @@ class PLBartEncoder(PLBartPreTrainedModel): if head_mask is not None: if head_mask.size()[0] != (len(self.layers)): raise ValueError( - f"The head_mask should be specified for {len(self.layers)} layers, but it is for {head_mask.size()[0]}." + f"The head_mask should be specified for {len(self.layers)} layers, but it is for" + f" {head_mask.size()[0]}." ) for idx, encoder_layer in enumerate(self.layers): @@ -1022,7 +1026,8 @@ class PLBartDecoder(PLBartPreTrainedModel): if attn_mask is not None: if attn_mask.size()[0] != (len(self.layers)): raise ValueError( - f"The `{mask_name}` should be specified for {len(self.layers)} layers, but it is for {head_mask.size()[0]}." + f"The `{mask_name}` should be specified for {len(self.layers)} layers, but it is for" + f" {head_mask.size()[0]}." ) for idx, decoder_layer in enumerate(self.layers): diff --git a/src/transformers/models/plbart/tokenization_plbart.py b/src/transformers/models/plbart/tokenization_plbart.py index 4c302e8b62..4a3ee1cdcd 100644 --- a/src/transformers/models/plbart/tokenization_plbart.py +++ b/src/transformers/models/plbart/tokenization_plbart.py @@ -33,19 +33,41 @@ VOCAB_FILES_NAMES = {"vocab_file": "sentencepiece.bpe.model", "tokenizer_file": PRETRAINED_VOCAB_FILES_MAP = { "vocab_file": { "uclanlp/plbart-base": "https://huggingface.co/uclanlp/plbart-base/resolve/main/sentencepiece.bpe.model", - "uclanlp/plbart-c-cpp-defect-detection": "https://huggingface.co/uclanlp/plbart-c-cpp-defect-detection/resolve/main/sentencepiece.bpe.model", + "uclanlp/plbart-c-cpp-defect-detection": ( + "https://huggingface.co/uclanlp/plbart-c-cpp-defect-detection/resolve/main/sentencepiece.bpe.model" + ), "uclanlp/plbart-cs-java": "https://huggingface.co/uclanlp/plbart-cs-java/resolve/main/sentencepiece.bpe.model", - "uclanlp/plbart-en_XX-java": "https://huggingface.co/uclanlp/plbart-en_XX-java/resolve/main/sentencepiece.bpe.model", - "uclanlp/plbart-go-en_XX": "https://huggingface.co/uclanlp/plbart-go-en_XX/resolve/main/sentencepiece.bpe.model", - "uclanlp/plbart-java-clone-detection": "https://huggingface.co/uclanlp/plbart-java-clone-detection/resolve/main/sentencepiece.bpe.model", + "uclanlp/plbart-en_XX-java": ( + "https://huggingface.co/uclanlp/plbart-en_XX-java/resolve/main/sentencepiece.bpe.model" + ), + "uclanlp/plbart-go-en_XX": ( + "https://huggingface.co/uclanlp/plbart-go-en_XX/resolve/main/sentencepiece.bpe.model" + ), + "uclanlp/plbart-java-clone-detection": ( + "https://huggingface.co/uclanlp/plbart-java-clone-detection/resolve/main/sentencepiece.bpe.model" + ), "uclanlp/plbart-java-cs": "https://huggingface.co/uclanlp/plbart-java-cs/resolve/main/sentencepiece.bpe.model", - "uclanlp/plbart-java-en_XX": "https://huggingface.co/uclanlp/plbart-java-en_XX/resolve/main/sentencepiece.bpe.model", - "uclanlp/plbart-javascript-en_XX": "https://huggingface.co/uclanlp/plbart-javascript-en_XX/resolve/main/sentencepiece.bpe.model", - "uclanlp/plbart-php-en_XX": "https://huggingface.co/uclanlp/plbart-php-en_XX/resolve/main/sentencepiece.bpe.model", - "uclanlp/plbart-python-en_XX": "https://huggingface.co/uclanlp/plbart-python-en_XX/resolve/main/sentencepiece.bpe.model", - "uclanlp/plbart-refine-java-medium": "https://huggingface.co/uclanlp/plbart-refine-java-medium/resolve/main/sentencepiece.bpe.model", - "uclanlp/plbart-refine-java-small": "https://huggingface.co/uclanlp/plbart-refine-java-small/resolve/main/sentencepiece.bpe.model", - "uclanlp/plbart-ruby-en_XX": "https://huggingface.co/uclanlp/plbart-ruby-en_XX/resolve/main/sentencepiece.bpe.model", + "uclanlp/plbart-java-en_XX": ( + "https://huggingface.co/uclanlp/plbart-java-en_XX/resolve/main/sentencepiece.bpe.model" + ), + "uclanlp/plbart-javascript-en_XX": ( + "https://huggingface.co/uclanlp/plbart-javascript-en_XX/resolve/main/sentencepiece.bpe.model" + ), + "uclanlp/plbart-php-en_XX": ( + "https://huggingface.co/uclanlp/plbart-php-en_XX/resolve/main/sentencepiece.bpe.model" + ), + "uclanlp/plbart-python-en_XX": ( + "https://huggingface.co/uclanlp/plbart-python-en_XX/resolve/main/sentencepiece.bpe.model" + ), + "uclanlp/plbart-refine-java-medium": ( + "https://huggingface.co/uclanlp/plbart-refine-java-medium/resolve/main/sentencepiece.bpe.model" + ), + "uclanlp/plbart-refine-java-small": ( + "https://huggingface.co/uclanlp/plbart-refine-java-small/resolve/main/sentencepiece.bpe.model" + ), + "uclanlp/plbart-ruby-en_XX": ( + "https://huggingface.co/uclanlp/plbart-ruby-en_XX/resolve/main/sentencepiece.bpe.model" + ), } } diff --git a/src/transformers/models/prophetnet/configuration_prophetnet.py b/src/transformers/models/prophetnet/configuration_prophetnet.py index 9a6574c84d..9c9b0beb5f 100644 --- a/src/transformers/models/prophetnet/configuration_prophetnet.py +++ b/src/transformers/models/prophetnet/configuration_prophetnet.py @@ -22,7 +22,9 @@ from ...utils import logging logger = logging.get_logger(__name__) PROPHETNET_PRETRAINED_CONFIG_ARCHIVE_MAP = { - "microsoft/prophetnet-large-uncased": "https://huggingface.co/microsoft/prophetnet-large-uncased/resolve/main/config.json", + "microsoft/prophetnet-large-uncased": ( + "https://huggingface.co/microsoft/prophetnet-large-uncased/resolve/main/config.json" + ), } @@ -174,5 +176,6 @@ class ProphetNetConfig(PretrainedConfig): @num_hidden_layers.setter def num_hidden_layers(self, value): raise NotImplementedError( - "This model does not support the setting of `num_hidden_layers`. Please set `num_encoder_layers` and `num_decoder_layers`." + "This model does not support the setting of `num_hidden_layers`. Please set `num_encoder_layers` and" + " `num_decoder_layers`." ) diff --git a/src/transformers/models/prophetnet/modeling_prophetnet.py b/src/transformers/models/prophetnet/modeling_prophetnet.py index c869d6373b..2c8b4e3177 100644 --- a/src/transformers/models/prophetnet/modeling_prophetnet.py +++ b/src/transformers/models/prophetnet/modeling_prophetnet.py @@ -326,7 +326,8 @@ class ProphetNetSeq2SeqLMOutput(ModelOutput): @property def decoder_cross_attentions(self): warnings.warn( - "`decoder_cross_attentions` is deprecated and will be removed soon. Please use `cross_attentions` instead.", + "`decoder_cross_attentions` is deprecated and will be removed soon. Please use `cross_attentions`" + " instead.", FutureWarning, ) return self.cross_attentions @@ -411,7 +412,8 @@ class ProphetNetSeq2SeqModelOutput(ModelOutput): @property def decoder_cross_attentions(self): warnings.warn( - "`decoder_cross_attentions` is deprecated and will be removed soon. Please use `cross_attentions` instead.", + "`decoder_cross_attentions` is deprecated and will be removed soon. Please use `cross_attentions`" + " instead.", FutureWarning, ) return self.cross_attentions @@ -562,9 +564,10 @@ class ProphetNetPreTrainedModel(PreTrainedModel): decoder_start_token_id = self.config.decoder_start_token_id pad_token_id = self.config.pad_token_id - assert ( - decoder_start_token_id is not None - ), "self.model.config.decoder_start_token_id has to be defined. In ProphetNet it is usually set to the pad_token_id. See ProphetNet docs for more information" + assert decoder_start_token_id is not None, ( + "self.model.config.decoder_start_token_id has to be defined. In ProphetNet it is usually set to the" + " pad_token_id. See ProphetNet docs for more information" + ) # shift inputs to the right shifted_input_ids = input_ids.new_zeros(input_ids.shape) @@ -639,9 +642,10 @@ class ProphetNetAttention(nn.Module): self.num_attn_heads = num_attn_heads self.head_dim = hidden_size // num_attn_heads - assert ( - self.head_dim * num_attn_heads == hidden_size - ), "`config.hidden_size` must be divisible by `config.num_encoder_attention_heads` and `config.num_decoder_attention_heads`" + assert self.head_dim * num_attn_heads == hidden_size, ( + "`config.hidden_size` must be divisible by `config.num_encoder_attention_heads` and" + " `config.num_decoder_attention_heads`" + ) self.key_proj = nn.Linear(hidden_size, hidden_size) self.value_proj = nn.Linear(hidden_size, hidden_size) @@ -708,7 +712,10 @@ class ProphetNetAttention(nn.Module): batch_size * self.num_attn_heads, tgt_len, src_len, - ), f"`attn_weights` should be of size {batch_size * self.num_attn_heads, tgt_len, src_len}, but is of size {attn_weights.shape}" + ), ( + f"`attn_weights` should be of size {batch_size * self.num_attn_heads, tgt_len, src_len}, but is of size" + f" {attn_weights.shape}" + ) # This is part of a workaround to get around fork/join parallelism not supporting Optional types. if attention_mask is not None and attention_mask.dim() == 0: @@ -717,7 +724,10 @@ class ProphetNetAttention(nn.Module): self.num_attn_heads * batch_size, 1, src_len, - ), f"`attention_mask` should be `None` or of shape attention_mask.size() == {batch_size * self.num_attn_heads, 1, src_len}, but is {attention_mask.shape}" + ), ( + "`attention_mask` should be `None` or of shape attention_mask.size() ==" + f" {batch_size * self.num_attn_heads, 1, src_len}, but is {attention_mask.shape}" + ) if attention_mask is not None: # don't attend to padding symbols attn_weights = attn_weights + attention_mask @@ -735,9 +745,10 @@ class ProphetNetAttention(nn.Module): attn_weights = nn.functional.softmax(attn_weights, dim=-1) if layer_head_mask is not None: - assert layer_head_mask.size() == ( - self.num_attn_heads, - ), f"Head mask for a single layer should be of size {(self.num_attn_heads,)}, but is {layer_head_mask.size()}" + assert layer_head_mask.size() == (self.num_attn_heads,), ( + f"Head mask for a single layer should be of size {(self.num_attn_heads,)}, but is" + f" {layer_head_mask.size()}" + ) attn_weights = layer_head_mask.view(1, -1, 1, 1) * attn_weights.view( batch_size, self.num_attn_heads, tgt_len, src_len ) @@ -757,7 +768,10 @@ class ProphetNetAttention(nn.Module): batch_size * self.num_attn_heads, tgt_len, self.head_dim, - ), f"`attn_output` should be of shape {batch_size * self.num_attn_heads, tgt_len, self.head_dim}, but is of shape {attn_output.size()}" + ), ( + f"`attn_output` should be of shape {batch_size * self.num_attn_heads, tgt_len, self.head_dim}, but is of" + f" shape {attn_output.size()}" + ) attn_output = ( attn_output.view(batch_size, self.num_attn_heads, tgt_len, self.head_dim) @@ -847,7 +861,10 @@ class ProphetNetNgramSelfAttention(nn.Module): batch_size, ngram_sequence_length, hidden_size, - ], f"`hidden_states` should be of shape {batch_size, ngram_sequence_length, hidden_size}, but is of shape {hidden_states.shape}" + ], ( + f"`hidden_states` should be of shape {batch_size, ngram_sequence_length, hidden_size}, but is of shape" + f" {hidden_states.shape}" + ) # project query_states = self.query_proj(hidden_states) @@ -916,9 +933,10 @@ class ProphetNetNgramSelfAttention(nn.Module): ).type_as(main_attn_weights) if layer_head_mask is not None: - assert layer_head_mask.size() == ( - self.num_attn_heads, - ), f"Head mask for a single layer should be of size {(self.num_attn_heads,)}, but is {layer_head_mask.size()}" + assert layer_head_mask.size() == (self.num_attn_heads,), ( + f"Head mask for a single layer should be of size {(self.num_attn_heads,)}, but is" + f" {layer_head_mask.size()}" + ) main_attn_probs = layer_head_mask.view(1, -1, 1, 1) * main_attn_probs.view( batch_size, self.num_attn_heads, -1, sequence_length ) @@ -979,9 +997,10 @@ class ProphetNetNgramSelfAttention(nn.Module): ).type_as(predict_attn_weights) if layer_head_mask is not None: - assert layer_head_mask.size() == ( - self.num_attn_heads, - ), f"Head mask for a single layer should be of size {(self.num_attn_heads,)}, but is {layer_head_mask.size()}" + assert layer_head_mask.size() == (self.num_attn_heads,), ( + f"Head mask for a single layer should be of size {(self.num_attn_heads,)}, but is" + f" {layer_head_mask.size()}" + ) predict_attn_probs = layer_head_mask.view(1, 1, -1, 1, 1) * predict_attn_probs.view( self.ngram, batch_size, self.num_attn_heads, sequence_length, 2 * sequence_length ) @@ -1559,9 +1578,10 @@ class ProphetNetDecoder(ProphetNetPreTrainedModel): # check if head_mask/cross_attn_head_mask has a correct number of layers specified if desired for attn_mask, mask_name in zip([head_mask, cross_attn_head_mask], ["head_mask", "cross_attn_head_mask"]): if attn_mask is not None: - assert attn_mask.size()[0] == ( - len(self.layers) - ), f"The `{mask_name}` should be specified for {len(self.layers)} layers, but it is for {head_mask.size()[0]}." + assert attn_mask.size()[0] == (len(self.layers)), ( + f"The `{mask_name}` should be specified for {len(self.layers)} layers, but it is for" + f" {head_mask.size()[0]}." + ) for idx, decoder_layer in enumerate(self.layers): if output_hidden_states: # grad cannot be kept because tensor is sliced @@ -2081,7 +2101,8 @@ class ProphetNetForConditionalGeneration(ProphetNetPreTrainedModel): @add_start_docstrings( - "The standalone decoder part of the ProphetNetModel with a lm head on top. The model can be used for causal language modeling.", + "The standalone decoder part of the ProphetNetModel with a lm head on top. The model can be used for causal" + " language modeling.", PROPHETNET_START_DOCSTRING, ) class ProphetNetForCausalLM(ProphetNetPreTrainedModel): diff --git a/src/transformers/models/prophetnet/tokenization_prophetnet.py b/src/transformers/models/prophetnet/tokenization_prophetnet.py index 5bc3951b79..06f432da2e 100644 --- a/src/transformers/models/prophetnet/tokenization_prophetnet.py +++ b/src/transformers/models/prophetnet/tokenization_prophetnet.py @@ -28,7 +28,9 @@ VOCAB_FILES_NAMES = {"vocab_file": "prophetnet.tokenizer"} PRETRAINED_VOCAB_FILES_MAP = { "vocab_file": { - "microsoft/prophetnet-large-uncased": "https://huggingface.co/microsoft/prophetnet-large-uncased/resolve/main/prophetnet.tokenizer", + "microsoft/prophetnet-large-uncased": ( + "https://huggingface.co/microsoft/prophetnet-large-uncased/resolve/main/prophetnet.tokenizer" + ), } } @@ -139,8 +141,8 @@ class ProphetNetTokenizer(PreTrainedTokenizer): if not os.path.isfile(vocab_file): raise ValueError( - f"Can't find a vocabulary file at path '{vocab_file}'. To load the vocabulary from a Google pretrained " - "model use `tokenizer = AutoTokenizer.from_pretrained(PRETRAINED_MODEL_NAME)`" + f"Can't find a vocabulary file at path '{vocab_file}'. To load the vocabulary from a Google pretrained" + " model use `tokenizer = AutoTokenizer.from_pretrained(PRETRAINED_MODEL_NAME)`" ) self.vocab = load_vocab(vocab_file) self.ids_to_tokens = collections.OrderedDict([(ids, tok) for tok, ids in self.vocab.items()]) diff --git a/src/transformers/models/qdqbert/modeling_qdqbert.py b/src/transformers/models/qdqbert/modeling_qdqbert.py index e7be1b4518..0e90dba4fd 100755 --- a/src/transformers/models/qdqbert/modeling_qdqbert.py +++ b/src/transformers/models/qdqbert/modeling_qdqbert.py @@ -62,8 +62,9 @@ if is_pytorch_quantization_available(): from pytorch_quantization.nn.modules.tensor_quantizer import TensorQuantizer except OSError: logger.error( - "QDQBERT model are not usable since `pytorch_quantization` can't be loaded. " - "Please try to reinstall it following the instructions here: https://github.com/NVIDIA/TensorRT/tree/master/tools/pytorch-quantization." + "QDQBERT model are not usable since `pytorch_quantization` can't be loaded. Please try to reinstall it" + " following the instructions here:" + " https://github.com/NVIDIA/TensorRT/tree/master/tools/pytorch-quantization." ) _CHECKPOINT_FOR_DOC = "bert-base-uncased" @@ -507,7 +508,8 @@ class QDQBertLayer(nn.Module): if self.is_decoder and encoder_hidden_states is not None: if not hasattr(self, "crossattention"): raise ValueError( - f"If `encoder_hidden_states` are passed, {self} has to be instantiated with cross-attention layers by setting `config.add_cross_attention=True`" + f"If `encoder_hidden_states` are passed, {self} has to be instantiated with cross-attention layers" + " by setting `config.add_cross_attention=True`" ) # cross_attn cached key/values tuple is at positions 3,4 of past_key_value tuple @@ -1331,7 +1333,8 @@ class QDQBertForNextSentencePrediction(QDQBertPreTrainedModel): if "next_sentence_label" in kwargs: warnings.warn( - "The `next_sentence_label` argument is deprecated and will be removed in a future version, use `labels` instead.", + "The `next_sentence_label` argument is deprecated and will be removed in a future version, use" + " `labels` instead.", FutureWarning, ) labels = kwargs.pop("next_sentence_label") diff --git a/src/transformers/models/rag/modeling_rag.py b/src/transformers/models/rag/modeling_rag.py index 205e825cbc..1d6a62b201 100644 --- a/src/transformers/models/rag/modeling_rag.py +++ b/src/transformers/models/rag/modeling_rag.py @@ -336,9 +336,10 @@ class RagPreTrainedModel(PreTrainedModel): # by the value of the flag `is_generator` that we need to set correctly. question_encoder = kwargs_question_encoder.pop("model", None) if question_encoder is None: - assert ( - question_encoder_pretrained_model_name_or_path is not None - ), "If `model` is not defined as an argument, a `question_encoder_pretrained_model_name_or_path` has to be defined" + assert question_encoder_pretrained_model_name_or_path is not None, ( + "If `model` is not defined as an argument, a `question_encoder_pretrained_model_name_or_path` has to" + " be defined" + ) from ..auto.modeling_auto import AutoModel if "config" not in kwargs_question_encoder: @@ -357,9 +358,10 @@ class RagPreTrainedModel(PreTrainedModel): generator = kwargs_generator.pop("model", None) if generator is None: - assert ( - generator_pretrained_model_name_or_path is not None - ), "If `generator_model` is not defined as an argument, a `generator_pretrained_model_name_or_path` has to be defined" + assert generator_pretrained_model_name_or_path is not None, ( + "If `generator_model` is not defined as an argument, a `generator_pretrained_model_name_or_path` has" + " to be defined" + ) from ..auto.modeling_auto import AutoModelForSeq2SeqLM if "config" not in kwargs_generator: @@ -654,23 +656,27 @@ class RagModel(RagPreTrainedModel): question_encoder_last_hidden_state.unsqueeze(1), retrieved_doc_embeds.transpose(1, 2) ).squeeze(1) else: - assert ( - context_input_ids is not None - ), "Make sure that `context_input_ids` are passed, if no `retriever` is set. Alternatively, you can set a retriever using the `set_retriever(...)` function." - assert ( - context_attention_mask is not None - ), "Make sure that `context_attention_mask` are passed, if no `retriever` is set. Alternatively, you can set a retriever using the `set_retriever(...)` function." - assert ( - doc_scores is not None - ), "Make sure that `doc_scores` are passed, if no `retriever` is set. Alternatively, you can set a retriever using the `set_retriever(...)` function." + assert context_input_ids is not None, ( + "Make sure that `context_input_ids` are passed, if no `retriever` is set. Alternatively, you can" + " set a retriever using the `set_retriever(...)` function." + ) + assert context_attention_mask is not None, ( + "Make sure that `context_attention_mask` are passed, if no `retriever` is set. Alternatively, you" + " can set a retriever using the `set_retriever(...)` function." + ) + assert doc_scores is not None, ( + "Make sure that `doc_scores` are passed, if no `retriever` is set. Alternatively, you can set a" + " retriever using the `set_retriever(...)` function." + ) assert ( doc_scores is not None ), "Make sure that `doc_scores` are passed when passing `encoder_outputs` to the forward function." - assert ( - doc_scores.shape[1] % n_docs - ) == 0, f" The first dimension of `context_input_ids` should be a multiple of `n_docs`={n_docs}, but is {context_input_ids.shape[0]}." + assert (doc_scores.shape[1] % n_docs) == 0, ( + f" The first dimension of `context_input_ids` should be a multiple of `n_docs`={n_docs}, but is" + f" {context_input_ids.shape[0]}." + ) # Decoder input without context documents if decoder_input_ids is not None: @@ -1022,12 +1028,14 @@ class RagSequenceForGeneration(RagPreTrainedModel): new_input_ids = input_ids[index : index + 1].repeat(num_candidates, 1) outputs = self(new_input_ids, labels=output_sequences, exclude_bos_score=True) else: # input_ids is None, need context_input_ids/mask and doc_scores - assert ( - context_attention_mask is not None - ), "Make sure that `context_attention_mask` are passed, if no `input_ids` is set. Alternatively, you can set a retriever using the `set_retriever(...)` function." - assert ( - doc_scores is not None - ), "Make sure that `doc_scores` are passed, if no `input_ids` is set. Alternatively, you can set a retriever using the `set_retriever(...)` function." + assert context_attention_mask is not None, ( + "Make sure that `context_attention_mask` are passed, if no `input_ids` is set. Alternatively, you" + " can set a retriever using the `set_retriever(...)` function." + ) + assert doc_scores is not None, ( + "Make sure that `doc_scores` are passed, if no `input_ids` is set. Alternatively, you can set a" + " retriever using the `set_retriever(...)` function." + ) individual_input_ids = generator_input_ids.repeat( num_candidates, 1 @@ -1567,9 +1575,10 @@ class RagTokenForGeneration(RagPreTrainedModel): 1 ) - assert ( - context_input_ids.shape[0] % n_docs - ) == 0, f" The first dimension of `context_input_ids` should be a multiple of `n_docs`={n_docs}, but is {context_input_ids.shape[0]}." + assert (context_input_ids.shape[0] % n_docs) == 0, ( + f" The first dimension of `context_input_ids` should be a multiple of `n_docs`={n_docs}, but is" + f" {context_input_ids.shape[0]}." + ) # batch_size batch_size = context_input_ids.shape[0] // n_docs diff --git a/src/transformers/models/rag/modeling_tf_rag.py b/src/transformers/models/rag/modeling_tf_rag.py index 30f50a29ff..3d0ad31db8 100644 --- a/src/transformers/models/rag/modeling_tf_rag.py +++ b/src/transformers/models/rag/modeling_tf_rag.py @@ -321,9 +321,10 @@ class TFRagPreTrainedModel(TFPreTrainedModel): # by the value of the flag `is_generator` that we need to set correctly. question_encoder = kwargs_question_encoder.pop("model", None) if question_encoder is None: - assert ( - question_encoder_pretrained_model_name_or_path is not None - ), "If `model` is not defined as an argument, a `question_encoder_pretrained_model_name_or_path` has to be defined" + assert question_encoder_pretrained_model_name_or_path is not None, ( + "If `model` is not defined as an argument, a `question_encoder_pretrained_model_name_or_path` has to" + " be defined" + ) from ..auto.modeling_tf_auto import TFAutoModel @@ -343,9 +344,10 @@ class TFRagPreTrainedModel(TFPreTrainedModel): generator = kwargs_generator.pop("generator", None) if generator is None: - assert ( - generator_pretrained_model_name_or_path is not None - ), "If `generator_model` is not defined as an argument, a `generator_pretrained_model_name_or_path` has to be defined" + assert generator_pretrained_model_name_or_path is not None, ( + "If `generator_model` is not defined as an argument, a `generator_pretrained_model_name_or_path` has" + " to be defined" + ) from ..auto.modeling_tf_auto import TFAutoModelForSeq2SeqLM @@ -632,23 +634,27 @@ class TFRagModel(TFRagPreTrainedModel): ) else: - assert ( - context_input_ids is not None - ), "Make sure that `context_input_ids` are passed, if no `retriever` is set. Alternatively, you can set a retriever using the `set_retriever(...)` function." - assert ( - context_attention_mask is not None - ), "Make sure that `context_attention_mask` are passed, if no `retriever` is set. Alternatively, you can set a retriever using the `set_retriever(...)` function." - assert ( - doc_scores is not None - ), "Make sure that `doc_scores` are passed, if no `retriever` is set. Alternatively, you can set a retriever using the `set_retriever(...)` function." + assert context_input_ids is not None, ( + "Make sure that `context_input_ids` are passed, if no `retriever` is set. Alternatively, you can" + " set a retriever using the `set_retriever(...)` function." + ) + assert context_attention_mask is not None, ( + "Make sure that `context_attention_mask` are passed, if no `retriever` is set. Alternatively, you" + " can set a retriever using the `set_retriever(...)` function." + ) + assert doc_scores is not None, ( + "Make sure that `doc_scores` are passed, if no `retriever` is set. Alternatively, you can set a" + " retriever using the `set_retriever(...)` function." + ) assert ( doc_scores is not None ), "Make sure that `doc_scores` are passed when passing `encoder_outputs` to the forward function." - assert ( - doc_scores.shape[1] % n_docs - ) == 0, f" The first dimension of `context_input_ids` should be a multiple of `n_docs`={n_docs}, but is {context_input_ids.shape[0]}." + assert (doc_scores.shape[1] % n_docs) == 0, ( + f" The first dimension of `context_input_ids` should be a multiple of `n_docs`={n_docs}, but is" + f" {context_input_ids.shape[0]}." + ) # Decoder input without context documents if decoder_input_ids is not None: @@ -1149,9 +1155,10 @@ class TFRagTokenForGeneration(TFRagPreTrainedModel, TFCausalLanguageModelingLoss ) doc_scores = tf.squeeze(doc_scores, axis=1) - assert ( - context_input_ids.shape[0] % n_docs - ) == 0, f" The first dimension of `context_input_ids` should be a multiple of `n_docs`={n_docs}, but is {context_input_ids.shape[0]}." + assert (context_input_ids.shape[0] % n_docs) == 0, ( + f" The first dimension of `context_input_ids` should be a multiple of `n_docs`={n_docs}, but is" + f" {context_input_ids.shape[0]}." + ) batch_size = context_input_ids.shape[0] // n_docs @@ -1286,9 +1293,10 @@ class TFRagTokenForGeneration(TFRagPreTrainedModel, TFCausalLanguageModelingLoss if start_token_id is None: start_token_id = self.generator.config.decoder_start_token_id - assert ( - start_token_id is not None - ), "self.generator.config.decoder_start_token_id has to be defined. In Rag we commonly use Bart as generator, see Bart docs for more information" + assert start_token_id is not None, ( + "self.generator.config.decoder_start_token_id has to be defined. In Rag we commonly use Bart as" + " generator, see Bart docs for more information" + ) pad_token_id = self.generator.config.pad_token_id assert pad_token_id is not None, "self.model.config.pad_token_id has to be defined." @@ -1745,12 +1753,14 @@ class TFRagSequenceForGeneration(TFRagPreTrainedModel, TFCausalLanguageModelingL new_input_ids = tf.tile(input_ids[index : index + 1], (num_candidates, 1)) outputs = self(new_input_ids, labels=output_sequences, exclude_bos_score=True) else: # input_ids is None, need context_input_ids/mask and doc_scores - assert ( - context_attention_mask is not None - ), "Make sure that `context_attention_mask` are passed, if no `input_ids` is set. Alternatively, you can set a retriever using the `set_retriever(...)` function." - assert ( - doc_scores is not None - ), "Make sure that `doc_scores` are passed, if no `input_ids` is set. Alternatively, you can set a retriever using the `set_retriever(...)` function." + assert context_attention_mask is not None, ( + "Make sure that `context_attention_mask` are passed, if no `input_ids` is set. Alternatively, you" + " can set a retriever using the `set_retriever(...)` function." + ) + assert doc_scores is not None, ( + "Make sure that `doc_scores` are passed, if no `input_ids` is set. Alternatively, you can set a" + " retriever using the `set_retriever(...)` function." + ) individual_input_ids = tf.tile( generator_input_ids, (num_candidates, 1) diff --git a/src/transformers/models/realm/configuration_realm.py b/src/transformers/models/realm/configuration_realm.py index d3383bd897..8d816a736e 100644 --- a/src/transformers/models/realm/configuration_realm.py +++ b/src/transformers/models/realm/configuration_realm.py @@ -21,10 +21,18 @@ from ...utils import logging logger = logging.get_logger(__name__) REALM_PRETRAINED_CONFIG_ARCHIVE_MAP = { - "google/realm-cc-news-pretrained-embedder": "https://huggingface.co/google/realm-cc-news-pretrained-embedder/resolve/main/config.json", - "google/realm-cc-news-pretrained-encoder": "https://huggingface.co/google/realm-cc-news-pretrained-encoder/resolve/main/config.json", - "google/realm-cc-news-pretrained-scorer": "https://huggingface.co/google/realm-cc-news-pretrained-scorer/resolve/main/config.json", - "google/realm-cc-news-pretrained-openqa": "https://huggingface.co/google/realm-cc-news-pretrained-openqa/aresolve/main/config.json", + "google/realm-cc-news-pretrained-embedder": ( + "https://huggingface.co/google/realm-cc-news-pretrained-embedder/resolve/main/config.json" + ), + "google/realm-cc-news-pretrained-encoder": ( + "https://huggingface.co/google/realm-cc-news-pretrained-encoder/resolve/main/config.json" + ), + "google/realm-cc-news-pretrained-scorer": ( + "https://huggingface.co/google/realm-cc-news-pretrained-scorer/resolve/main/config.json" + ), + "google/realm-cc-news-pretrained-openqa": ( + "https://huggingface.co/google/realm-cc-news-pretrained-openqa/aresolve/main/config.json" + ), "google/realm-orqa-nq-openqa": "https://huggingface.co/google/realm-orqa-nq-openqa/resolve/main/config.json", "google/realm-orqa-nq-reader": "https://huggingface.co/google/realm-orqa-nq-reader/resolve/main/config.json", "google/realm-orqa-wq-openqa": "https://huggingface.co/google/realm-orqa-wq-openqa/resolve/main/config.json", diff --git a/src/transformers/models/realm/modeling_realm.py b/src/transformers/models/realm/modeling_realm.py index c467dcd30a..e6de31a4cb 100644 --- a/src/transformers/models/realm/modeling_realm.py +++ b/src/transformers/models/realm/modeling_realm.py @@ -502,7 +502,8 @@ class RealmLayer(nn.Module): if self.is_decoder and encoder_hidden_states is not None: if not hasattr(self, "crossattention"): raise ValueError( - f"If `encoder_hidden_states` are passed, {self} has to be instantiated with cross-attention layers by setting `config.add_cross_attention=True`" + f"If `encoder_hidden_states` are passed, {self} has to be instantiated with cross-attention layers" + " by setting `config.add_cross_attention=True`" ) # cross_attn cached key/values tuple is at positions 3,4 of past_key_value tuple @@ -1366,7 +1367,8 @@ class RealmScorer(RealmPreTrainedModel): @add_start_docstrings( - "The knowledge-augmented encoder of REALM outputting masked language model logits and marginal log-likelihood loss.", + "The knowledge-augmented encoder of REALM outputting masked language model logits and marginal log-likelihood" + " loss.", REALM_START_DOCSTRING, ) class RealmKnowledgeAugEncoder(RealmPreTrainedModel): diff --git a/src/transformers/models/realm/tokenization_realm.py b/src/transformers/models/realm/tokenization_realm.py index 426b5d775c..63295826d4 100644 --- a/src/transformers/models/realm/tokenization_realm.py +++ b/src/transformers/models/realm/tokenization_realm.py @@ -30,10 +30,18 @@ VOCAB_FILES_NAMES = {"vocab_file": "vocab.txt"} PRETRAINED_VOCAB_FILES_MAP = { "vocab_file": { - "google/realm-cc-news-pretrained-embedder": "https://huggingface.co/google/realm-cc-news-pretrained-embedder/resolve/main/vocab.txt", - "google/realm-cc-news-pretrained-encoder": "https://huggingface.co/google/realm-cc-news-pretrained-encoder/resolve/main/vocab.txt", - "google/realm-cc-news-pretrained-scorer": "https://huggingface.co/google/realm-cc-news-pretrained-scorer/resolve/main/vocab.txt", - "google/realm-cc-news-pretrained-openqa": "https://huggingface.co/google/realm-cc-news-pretrained-openqa/aresolve/main/vocab.txt", + "google/realm-cc-news-pretrained-embedder": ( + "https://huggingface.co/google/realm-cc-news-pretrained-embedder/resolve/main/vocab.txt" + ), + "google/realm-cc-news-pretrained-encoder": ( + "https://huggingface.co/google/realm-cc-news-pretrained-encoder/resolve/main/vocab.txt" + ), + "google/realm-cc-news-pretrained-scorer": ( + "https://huggingface.co/google/realm-cc-news-pretrained-scorer/resolve/main/vocab.txt" + ), + "google/realm-cc-news-pretrained-openqa": ( + "https://huggingface.co/google/realm-cc-news-pretrained-openqa/aresolve/main/vocab.txt" + ), "google/realm-orqa-nq-openqa": "https://huggingface.co/google/realm-orqa-nq-openqa/resolve/main/vocab.txt", "google/realm-orqa-nq-reader": "https://huggingface.co/google/realm-orqa-nq-reader/resolve/main/vocab.txt", "google/realm-orqa-wq-openqa": "https://huggingface.co/google/realm-orqa-wq-openqa/resolve/main/vocab.txt", @@ -165,8 +173,8 @@ class RealmTokenizer(PreTrainedTokenizer): if not os.path.isfile(vocab_file): raise ValueError( - f"Can't find a vocabulary file at path '{vocab_file}'. To load the vocabulary from a Google pretrained " - "model use `tokenizer = RealmTokenizer.from_pretrained(PRETRAINED_MODEL_NAME)`" + f"Can't find a vocabulary file at path '{vocab_file}'. To load the vocabulary from a Google pretrained" + " model use `tokenizer = RealmTokenizer.from_pretrained(PRETRAINED_MODEL_NAME)`" ) self.vocab = load_vocab(vocab_file) self.ids_to_tokens = collections.OrderedDict([(ids, tok) for tok, ids in self.vocab.items()]) diff --git a/src/transformers/models/realm/tokenization_realm_fast.py b/src/transformers/models/realm/tokenization_realm_fast.py index 87580baa22..f61fa8418e 100644 --- a/src/transformers/models/realm/tokenization_realm_fast.py +++ b/src/transformers/models/realm/tokenization_realm_fast.py @@ -31,24 +31,48 @@ VOCAB_FILES_NAMES = {"vocab_file": "vocab.txt", "tokenizer_file": "tokenizer.jso PRETRAINED_VOCAB_FILES_MAP = { "vocab_file": { - "google/realm-cc-news-pretrained-embedder": "https://huggingface.co/google/realm-cc-news-pretrained-embedder/resolve/main/vocab.txt", - "google/realm-cc-news-pretrained-encoder": "https://huggingface.co/google/realm-cc-news-pretrained-encoder/resolve/main/vocab.txt", - "google/realm-cc-news-pretrained-scorer": "https://huggingface.co/google/realm-cc-news-pretrained-scorer/resolve/main/vocab.txt", - "google/realm-cc-news-pretrained-openqa": "https://huggingface.co/google/realm-cc-news-pretrained-openqa/aresolve/main/vocab.txt", + "google/realm-cc-news-pretrained-embedder": ( + "https://huggingface.co/google/realm-cc-news-pretrained-embedder/resolve/main/vocab.txt" + ), + "google/realm-cc-news-pretrained-encoder": ( + "https://huggingface.co/google/realm-cc-news-pretrained-encoder/resolve/main/vocab.txt" + ), + "google/realm-cc-news-pretrained-scorer": ( + "https://huggingface.co/google/realm-cc-news-pretrained-scorer/resolve/main/vocab.txt" + ), + "google/realm-cc-news-pretrained-openqa": ( + "https://huggingface.co/google/realm-cc-news-pretrained-openqa/aresolve/main/vocab.txt" + ), "google/realm-orqa-nq-openqa": "https://huggingface.co/google/realm-orqa-nq-openqa/resolve/main/vocab.txt", "google/realm-orqa-nq-reader": "https://huggingface.co/google/realm-orqa-nq-reader/resolve/main/vocab.txt", "google/realm-orqa-wq-openqa": "https://huggingface.co/google/realm-orqa-wq-openqa/resolve/main/vocab.txt", "google/realm-orqa-wq-reader": "https://huggingface.co/google/realm-orqa-wq-reader/resolve/main/vocab.txt", }, "tokenizer_file": { - "google/realm-cc-news-pretrained-embedder": "https://huggingface.co/google/realm-cc-news-pretrained-embedder/resolve/main/tokenizer.jsont", - "google/realm-cc-news-pretrained-encoder": "https://huggingface.co/google/realm-cc-news-pretrained-encoder/resolve/main/tokenizer.json", - "google/realm-cc-news-pretrained-scorer": "https://huggingface.co/google/realm-cc-news-pretrained-scorer/resolve/main/tokenizer.json", - "google/realm-cc-news-pretrained-openqa": "https://huggingface.co/google/realm-cc-news-pretrained-openqa/aresolve/main/tokenizer.json", - "google/realm-orqa-nq-openqa": "https://huggingface.co/google/realm-orqa-nq-openqa/resolve/main/tokenizer.json", - "google/realm-orqa-nq-reader": "https://huggingface.co/google/realm-orqa-nq-reader/resolve/main/tokenizer.json", - "google/realm-orqa-wq-openqa": "https://huggingface.co/google/realm-orqa-wq-openqa/resolve/main/tokenizer.json", - "google/realm-orqa-wq-reader": "https://huggingface.co/google/realm-orqa-wq-reader/resolve/main/tokenizer.json", + "google/realm-cc-news-pretrained-embedder": ( + "https://huggingface.co/google/realm-cc-news-pretrained-embedder/resolve/main/tokenizer.jsont" + ), + "google/realm-cc-news-pretrained-encoder": ( + "https://huggingface.co/google/realm-cc-news-pretrained-encoder/resolve/main/tokenizer.json" + ), + "google/realm-cc-news-pretrained-scorer": ( + "https://huggingface.co/google/realm-cc-news-pretrained-scorer/resolve/main/tokenizer.json" + ), + "google/realm-cc-news-pretrained-openqa": ( + "https://huggingface.co/google/realm-cc-news-pretrained-openqa/aresolve/main/tokenizer.json" + ), + "google/realm-orqa-nq-openqa": ( + "https://huggingface.co/google/realm-orqa-nq-openqa/resolve/main/tokenizer.json" + ), + "google/realm-orqa-nq-reader": ( + "https://huggingface.co/google/realm-orqa-nq-reader/resolve/main/tokenizer.json" + ), + "google/realm-orqa-wq-openqa": ( + "https://huggingface.co/google/realm-orqa-wq-openqa/resolve/main/tokenizer.json" + ), + "google/realm-orqa-wq-reader": ( + "https://huggingface.co/google/realm-orqa-wq-reader/resolve/main/tokenizer.json" + ), }, } diff --git a/src/transformers/models/reformer/configuration_reformer.py b/src/transformers/models/reformer/configuration_reformer.py index d481b3b137..ea2a1abd08 100755 --- a/src/transformers/models/reformer/configuration_reformer.py +++ b/src/transformers/models/reformer/configuration_reformer.py @@ -22,7 +22,9 @@ from ...utils import logging logger = logging.get_logger(__name__) REFORMER_PRETRAINED_CONFIG_ARCHIVE_MAP = { - "google/reformer-crime-and-punishment": "https://huggingface.co/google/reformer-crime-and-punishment/resolve/main/config.json", + "google/reformer-crime-and-punishment": ( + "https://huggingface.co/google/reformer-crime-and-punishment/resolve/main/config.json" + ), "google/reformer-enwik8": "https://huggingface.co/google/reformer-enwik8/resolve/main/config.json", } diff --git a/src/transformers/models/reformer/convert_reformer_trax_checkpoint_to_pytorch.py b/src/transformers/models/reformer/convert_reformer_trax_checkpoint_to_pytorch.py index 2e2e3f3a60..f25e166ef9 100755 --- a/src/transformers/models/reformer/convert_reformer_trax_checkpoint_to_pytorch.py +++ b/src/transformers/models/reformer/convert_reformer_trax_checkpoint_to_pytorch.py @@ -210,8 +210,10 @@ if __name__ == "__main__": default=None, type=str, required=True, - help="The config json file corresponding to the pre-trained Reformer model. \n" - "This specifies the model architecture.", + help=( + "The config json file corresponding to the pre-trained Reformer model. \n" + "This specifies the model architecture." + ), ) parser.add_argument( "--pytorch_dump_path", default=None, type=str, required=True, help="Path to the output PyTorch model." diff --git a/src/transformers/models/reformer/modeling_reformer.py b/src/transformers/models/reformer/modeling_reformer.py index 089481f854..8430f3a62c 100755 --- a/src/transformers/models/reformer/modeling_reformer.py +++ b/src/transformers/models/reformer/modeling_reformer.py @@ -380,9 +380,10 @@ class LSHSelfAttention(nn.Module, EfficientAttentionMixin): # check if cache shall be used and that hidden states are already cached if do_cached_attention: - assert ( - sequence_length == 1 - ), f"At the moment, auto-regressive language generation is only possible one word at a time. Make sure that input sequence length {sequence_length} equals 1, when `past_buckets_states` is passed." + assert sequence_length == 1, ( + "At the moment, auto-regressive language generation is only possible one word at a time. Make sure" + f" that input sequence length {sequence_length} equals 1, when `past_buckets_states` is passed." + ) past_buckets = past_buckets_states[0] past_states = past_buckets_states[1] @@ -505,9 +506,10 @@ class LSHSelfAttention(nn.Module, EfficientAttentionMixin): ) if self.chunk_length is None: - assert ( - self.num_chunks_before == 0 and self.num_chunks_after == 0 - ), "If `config.chunk_length` is `None`, make sure `config.num_chunks_after` and `config.num_chunks_before` are set to 0." + assert self.num_chunks_before == 0 and self.num_chunks_after == 0, ( + "If `config.chunk_length` is `None`, make sure `config.num_chunks_after` and" + " `config.num_chunks_before` are set to 0." + ) elif do_cached_attention and past_buckets is not None: # use max sequence length sorted_bucket_idx_per_hash = sorted_bucket_idx @@ -577,7 +579,10 @@ class LSHSelfAttention(nn.Module, EfficientAttentionMixin): self.num_attention_heads, sequence_length, self.attention_head_size, - ), "out_vectors have be of shape `[batch_size, config.num_attention_heads, sequence_length, config.attention_head_size]`." + ), ( + "out_vectors have be of shape `[batch_size, config.num_attention_heads, sequence_length," + " config.attention_head_size]`." + ) out_vectors = self._merge_hidden_size_dims(out_vectors, self.num_attention_heads, self.attention_head_size) @@ -891,7 +896,10 @@ class LSHSelfAttention(nn.Module, EfficientAttentionMixin): self.num_attention_heads, num_hashes, sequence_length, - ), f"bucket_idx should have shape {(batch_size, self.num_attention_heads, num_hashes, sequence_length)}, but has shape {bucket_idx.shape}." + ), ( + f"bucket_idx should have shape {(batch_size, self.num_attention_heads, num_hashes, sequence_length)}, but" + f" has shape {bucket_idx.shape}." + ) # find indices of new bucket indices relevant_bucket_idx = (bucket_idx == (bucket_idx.shape[-1] - 1)).nonzero() @@ -925,12 +933,20 @@ class LSHSelfAttention(nn.Module, EfficientAttentionMixin): assert ( relevant_hidden_states.shape[2] == (self.num_chunks_before + self.num_chunks_after + 1) * self.chunk_length * num_hashes - ), f"There should be {(self.num_chunks_before + self.num_chunks_after + 1) * self.chunk_length * num_hashes} `hidden_states`, there are {relevant_hidden_states.shape[2]} `hidden_states`." + ), ( + "There should be" + f" {(self.num_chunks_before + self.num_chunks_after + 1) * self.chunk_length * num_hashes} `hidden_states`," + f" there are {relevant_hidden_states.shape[2]} `hidden_states`." + ) assert ( relevant_bucket_idx_chunk.shape[-1] == (self.num_chunks_before + self.num_chunks_after + 1) * self.chunk_length - ), f"There should be {(self.num_chunks_before + self.num_chunks_after + 1) * self.chunk_length} `hidden_states`, there are {relevant_bucket_idx_chunk.shape[-1]} `bucket_idx`." + ), ( + "There should be" + f" {(self.num_chunks_before + self.num_chunks_after + 1) * self.chunk_length} `hidden_states`, there are" + f" {relevant_bucket_idx_chunk.shape[-1]} `bucket_idx`." + ) return relevant_hidden_states, relevant_bucket_idx_chunk, query_buckets @@ -1054,9 +1070,10 @@ class LocalSelfAttention(nn.Module, EfficientAttentionMixin): # check if cache shall be used and that hidden states are already cached if use_cache and past_buckets_states[1] is not None: - assert ( - past_buckets_states[0] is None - ), "LocalSelfAttention should not make use of `buckets`. There seems to be an error when caching hidden_states_and_buckets." + assert past_buckets_states[0] is None, ( + "LocalSelfAttention should not make use of `buckets`. There seems to be an error when caching" + " hidden_states_and_buckets." + ) key_value_hidden_states = self._retrieve_relevant_hidden_states( past_buckets_states[1], self.chunk_length, self.num_chunks_before ) @@ -1092,9 +1109,10 @@ class LocalSelfAttention(nn.Module, EfficientAttentionMixin): ), f"last dim of query_key_vectors is {value_vectors.shape[-1]} but should be {self.attention_head_size}." if self.chunk_length is None: - assert ( - self.num_chunks_before == 0 and self.num_chunks_after == 0 - ), "If `config.chunk_length` is `None`, make sure `config.num_chunks_after` and `config.num_chunks_before` are set to 0." + assert self.num_chunks_before == 0 and self.num_chunks_after == 0, ( + "If `config.chunk_length` is `None`, make sure `config.num_chunks_after` and" + " `config.num_chunks_before` are set to 0." + ) # normalize key vectors key_vectors = key_vectors / torch.sqrt( @@ -1514,9 +1532,10 @@ class ReformerLayer(nn.Module): # Implementation of RevNet (see Fig. 6 in https://towardsdatascience.com/illustrating-the-reformer-393575ac6ba0) # This code is heavily inspired by https://github.com/lucidrains/reformer-pytorch/blob/master/reformer_pytorch/reversible.py - assert ( - self.training - ), "If you want to train `ReformerModel` and its variations, make sure to use `model.train()` to put the model into training mode." + assert self.training, ( + "If you want to train `ReformerModel` and its variations, make sure to use `model.train()` to put the" + " model into training mode." + ) with torch.enable_grad(): next_attn_output.requires_grad = True @@ -1957,7 +1976,7 @@ REFORMER_INPUTS_DOCSTRING = r""" @add_start_docstrings( - "The bare Reformer Model transformer outputting raw hidden-states" "without any specific head on top.", + "The bare Reformer Model transformer outputting raw hidden-stateswithout any specific head on top.", REFORMER_START_DOCSTRING, ) class ReformerModel(ReformerPreTrainedModel): @@ -2176,12 +2195,14 @@ class ReformerModelWithLMHead(ReformerPreTrainedModel): def __init__(self, config): super().__init__(config) assert config.is_decoder, "If you want to use `ReformerModelWithLMHead` make sure that `is_decoder=True`." - assert ( - "local" not in self.config.attn_layers or config.local_num_chunks_after == 0 - ), f"If causal mask is enabled, make sure that `config.local_num_chunks_after` is set to 0 and not {config.local_num_chunks_after}." - assert ( - "lsh" not in self.config.attn_layers or config.lsh_num_chunks_after == 0 - ), f"If causal mask is enabled, make sure that `config.lsh_num_chunks_after` is set to 1 and not {config.lsh_num_chunks_after}." + assert "local" not in self.config.attn_layers or config.local_num_chunks_after == 0, ( + "If causal mask is enabled, make sure that `config.local_num_chunks_after` is set to 0 and not" + f" {config.local_num_chunks_after}." + ) + assert "lsh" not in self.config.attn_layers or config.lsh_num_chunks_after == 0, ( + "If causal mask is enabled, make sure that `config.lsh_num_chunks_after` is set to 1 and not" + f" {config.lsh_num_chunks_after}." + ) self.reformer = ReformerModel(config) self.lm_head = ReformerOnlyLMHead(config) @@ -2296,9 +2317,10 @@ class ReformerModelWithLMHead(ReformerPreTrainedModel): class ReformerForMaskedLM(ReformerPreTrainedModel): def __init__(self, config): super().__init__(config) - assert ( - not config.is_decoder - ), "If you want to use `ReformerForMaskedLM` make sure `config.is_decoder=False` for bi-directional self-attention." + assert not config.is_decoder, ( + "If you want to use `ReformerForMaskedLM` make sure `config.is_decoder=False` for bi-directional" + " self-attention." + ) self.reformer = ReformerModel(config) self.lm_head = ReformerOnlyLMHead(config) diff --git a/src/transformers/models/reformer/tokenization_reformer.py b/src/transformers/models/reformer/tokenization_reformer.py index 8c75dda15e..d5d73f3e45 100644 --- a/src/transformers/models/reformer/tokenization_reformer.py +++ b/src/transformers/models/reformer/tokenization_reformer.py @@ -34,7 +34,9 @@ VOCAB_FILES_NAMES = {"vocab_file": "spiece.model"} PRETRAINED_VOCAB_FILES_MAP = { "vocab_file": { - "google/reformer-crime-and-punishment": "https://huggingface.co/google/reformer-crime-and-punishment/resolve/main/spiece.model" + "google/reformer-crime-and-punishment": ( + "https://huggingface.co/google/reformer-crime-and-punishment/resolve/main/spiece.model" + ) } } diff --git a/src/transformers/models/reformer/tokenization_reformer_fast.py b/src/transformers/models/reformer/tokenization_reformer_fast.py index e6a8483791..e9c6a61993 100644 --- a/src/transformers/models/reformer/tokenization_reformer_fast.py +++ b/src/transformers/models/reformer/tokenization_reformer_fast.py @@ -38,10 +38,14 @@ VOCAB_FILES_NAMES = {"vocab_file": "spiece.model", "tokenizer_file": "tokenizer. PRETRAINED_VOCAB_FILES_MAP = { "vocab_file": { - "google/reformer-crime-and-punishment": "https://huggingface.co/google/reformer-crime-and-punishment/resolve/main/spiece.model" + "google/reformer-crime-and-punishment": ( + "https://huggingface.co/google/reformer-crime-and-punishment/resolve/main/spiece.model" + ) }, "tokenizer_file": { - "google/reformer-crime-and-punishment": "https://huggingface.co/google/reformer-crime-and-punishment/resolve/main/tokenizer.json" + "google/reformer-crime-and-punishment": ( + "https://huggingface.co/google/reformer-crime-and-punishment/resolve/main/tokenizer.json" + ) }, } diff --git a/src/transformers/models/regnet/convert_regnet_seer_10b_to_pytorch.py b/src/transformers/models/regnet/convert_regnet_seer_10b_to_pytorch.py index 8024ef6792..a43967d009 100644 --- a/src/transformers/models/regnet/convert_regnet_seer_10b_to_pytorch.py +++ b/src/transformers/models/regnet/convert_regnet_seer_10b_to_pytorch.py @@ -277,7 +277,10 @@ if __name__ == "__main__": "--model_name", default=None, type=str, - help="The name of the model you wish to convert, it must be one of the supported regnet* architecture, currently: regnetx-*, regnety-*. If `None`, all of them will the converted.", + help=( + "The name of the model you wish to convert, it must be one of the supported regnet* architecture," + " currently: regnetx-*, regnety-*. If `None`, all of them will the converted." + ), ) parser.add_argument( "--pytorch_dump_folder_path", diff --git a/src/transformers/models/regnet/convert_regnet_to_pytorch.py b/src/transformers/models/regnet/convert_regnet_to_pytorch.py index 96e4ab700a..9bb0ba0f05 100644 --- a/src/transformers/models/regnet/convert_regnet_to_pytorch.py +++ b/src/transformers/models/regnet/convert_regnet_to_pytorch.py @@ -84,7 +84,8 @@ class ModuleTransfer: if len(dest_traced) != len(src_traced) and self.raise_if_mismatch: raise Exception( - f"Numbers of operations are different. Source module has {len(src_traced)} operations while destination module has {len(dest_traced)}." + f"Numbers of operations are different. Source module has {len(src_traced)} operations while" + f" destination module has {len(dest_traced)}." ) for dest_m, src_m in zip(dest_traced, src_traced): @@ -431,7 +432,10 @@ if __name__ == "__main__": "--model_name", default=None, type=str, - help="The name of the model you wish to convert, it must be one of the supported regnet* architecture, currently: regnetx-*, regnety-*. If `None`, all of them will the converted.", + help=( + "The name of the model you wish to convert, it must be one of the supported regnet* architecture," + " currently: regnetx-*, regnety-*. If `None`, all of them will the converted." + ), ) parser.add_argument( "--pytorch_dump_folder_path", diff --git a/src/transformers/models/rembert/convert_rembert_tf_checkpoint_to_pytorch.py b/src/transformers/models/rembert/convert_rembert_tf_checkpoint_to_pytorch.py index 2a3c497d37..4c3d53e789 100755 --- a/src/transformers/models/rembert/convert_rembert_tf_checkpoint_to_pytorch.py +++ b/src/transformers/models/rembert/convert_rembert_tf_checkpoint_to_pytorch.py @@ -51,8 +51,10 @@ if __name__ == "__main__": default=None, type=str, required=True, - help="The config json file corresponding to the pre-trained RemBERT model. \n" - "This specifies the model architecture.", + help=( + "The config json file corresponding to the pre-trained RemBERT model. \n" + "This specifies the model architecture." + ), ) parser.add_argument( "--pytorch_dump_path", default=None, type=str, required=True, help="Path to the output PyTorch model." diff --git a/src/transformers/models/rembert/modeling_rembert.py b/src/transformers/models/rembert/modeling_rembert.py index c7b8da35a2..08fd7d3e2d 100755 --- a/src/transformers/models/rembert/modeling_rembert.py +++ b/src/transformers/models/rembert/modeling_rembert.py @@ -460,7 +460,8 @@ class RemBertLayer(nn.Module): if self.is_decoder and encoder_hidden_states is not None: if not hasattr(self, "crossattention"): raise ValueError( - f"If `encoder_hidden_states` are passed, {self} has to be instantiated with cross-attention layers by setting `config.add_cross_attention=True`" + f"If `encoder_hidden_states` are passed, {self} has to be instantiated with cross-attention layers" + " by setting `config.add_cross_attention=True`" ) # cross_attn cached key/values tuple is at positions 3,4 of past_key_value tuple diff --git a/src/transformers/models/rembert/modeling_tf_rembert.py b/src/transformers/models/rembert/modeling_tf_rembert.py index c039f26350..92d4604b6c 100644 --- a/src/transformers/models/rembert/modeling_tf_rembert.py +++ b/src/transformers/models/rembert/modeling_tf_rembert.py @@ -414,8 +414,8 @@ class TFRemBertLayer(tf.keras.layers.Layer): if self.is_decoder and encoder_hidden_states is not None: if not hasattr(self, "crossattention"): raise ValueError( - f"If `encoder_hidden_states` are passed, {self} has to be instantiated with cross-attention layers " - "by setting `config.add_cross_attention=True`" + f"If `encoder_hidden_states` are passed, {self} has to be instantiated with cross-attention layers" + " by setting `config.add_cross_attention=True`" ) # cross_attn cached key/values tuple is at positions 3,4 of past_key_value tuple diff --git a/src/transformers/models/resnet/convert_resnet_to_pytorch.py b/src/transformers/models/resnet/convert_resnet_to_pytorch.py index 60973ecdec..55a865ed59 100644 --- a/src/transformers/models/resnet/convert_resnet_to_pytorch.py +++ b/src/transformers/models/resnet/convert_resnet_to_pytorch.py @@ -81,7 +81,8 @@ class ModuleTransfer: if len(dest_traced) != len(src_traced): raise Exception( - f"Numbers of operations are different. Source module has {len(src_traced)} operations while destination module has {len(dest_traced)}." + f"Numbers of operations are different. Source module has {len(src_traced)} operations while" + f" destination module has {len(dest_traced)}." ) for dest_m, src_m in zip(dest_traced, src_traced): @@ -173,7 +174,10 @@ if __name__ == "__main__": "--model_name", default=None, type=str, - help="The name of the model you wish to convert, it must be one of the supported resnet* architecture, currently: resnet18,26,34,50,101,152. If `None`, all of them will the converted.", + help=( + "The name of the model you wish to convert, it must be one of the supported resnet* architecture," + " currently: resnet18,26,34,50,101,152. If `None`, all of them will the converted." + ), ) parser.add_argument( "--pytorch_dump_folder_path", diff --git a/src/transformers/models/retribert/configuration_retribert.py b/src/transformers/models/retribert/configuration_retribert.py index 1e4feb2a69..23172cf40e 100644 --- a/src/transformers/models/retribert/configuration_retribert.py +++ b/src/transformers/models/retribert/configuration_retribert.py @@ -22,7 +22,9 @@ logger = logging.get_logger(__name__) # TODO: upload to AWS RETRIBERT_PRETRAINED_CONFIG_ARCHIVE_MAP = { - "yjernite/retribert-base-uncased": "https://huggingface.co/yjernite/retribert-base-uncased/resolve/main/config.json", + "yjernite/retribert-base-uncased": ( + "https://huggingface.co/yjernite/retribert-base-uncased/resolve/main/config.json" + ), } diff --git a/src/transformers/models/retribert/tokenization_retribert.py b/src/transformers/models/retribert/tokenization_retribert.py index be9a40913f..b61c063440 100644 --- a/src/transformers/models/retribert/tokenization_retribert.py +++ b/src/transformers/models/retribert/tokenization_retribert.py @@ -24,7 +24,9 @@ VOCAB_FILES_NAMES = {"vocab_file": "vocab.txt"} PRETRAINED_VOCAB_FILES_MAP = { "vocab_file": { - "yjernite/retribert-base-uncased": "https://huggingface.co/yjernite/retribert-base-uncased/resolve/main/vocab.txt", + "yjernite/retribert-base-uncased": ( + "https://huggingface.co/yjernite/retribert-base-uncased/resolve/main/vocab.txt" + ), } } diff --git a/src/transformers/models/retribert/tokenization_retribert_fast.py b/src/transformers/models/retribert/tokenization_retribert_fast.py index 43cc383721..3451d1224a 100644 --- a/src/transformers/models/retribert/tokenization_retribert_fast.py +++ b/src/transformers/models/retribert/tokenization_retribert_fast.py @@ -25,10 +25,14 @@ VOCAB_FILES_NAMES = {"vocab_file": "vocab.txt", "tokenizer_file": "tokenizer.jso PRETRAINED_VOCAB_FILES_MAP = { "vocab_file": { - "yjernite/retribert-base-uncased": "https://huggingface.co/yjernite/retribert-base-uncased/resolve/main/vocab.txt", + "yjernite/retribert-base-uncased": ( + "https://huggingface.co/yjernite/retribert-base-uncased/resolve/main/vocab.txt" + ), }, "tokenizer_file": { - "yjernite/retribert-base-uncased": "https://huggingface.co/yjernite/retribert-base-uncased/resolve/main/tokenizer.json", + "yjernite/retribert-base-uncased": ( + "https://huggingface.co/yjernite/retribert-base-uncased/resolve/main/tokenizer.json" + ), }, } diff --git a/src/transformers/models/roberta/modeling_flax_roberta.py b/src/transformers/models/roberta/modeling_flax_roberta.py index 4a34fa77bc..84bf15da6d 100644 --- a/src/transformers/models/roberta/modeling_flax_roberta.py +++ b/src/transformers/models/roberta/modeling_flax_roberta.py @@ -183,8 +183,8 @@ class FlaxRobertaSelfAttention(nn.Module): self.head_dim = self.config.hidden_size // self.config.num_attention_heads if self.config.hidden_size % self.config.num_attention_heads != 0: raise ValueError( - "`config.hidden_size`: {self.config.hidden_size} has to be a multiple of `config.num_attention_heads`\ - : {self.config.num_attention_heads}" + "`config.hidden_size`: {self.config.hidden_size} has to be a multiple of `config.num_attention_heads` " + " : {self.config.num_attention_heads}" ) self.query = nn.Dense( @@ -538,8 +538,8 @@ class FlaxRobertaLayerCollection(nn.Module): if head_mask is not None: if head_mask.shape[0] != (len(self.layers)): raise ValueError( - f"The head_mask should be specified for {len(self.layers)} layers, but it is for \ - {head_mask.shape[0]}." + f"The head_mask should be specified for {len(self.layers)} layers, but it is for " + f" {head_mask.shape[0]}." ) for i, layer in enumerate(self.layers): diff --git a/src/transformers/models/roberta/modeling_roberta.py b/src/transformers/models/roberta/modeling_roberta.py index 3b5f6a9a6b..0b57b1031e 100644 --- a/src/transformers/models/roberta/modeling_roberta.py +++ b/src/transformers/models/roberta/modeling_roberta.py @@ -426,7 +426,8 @@ class RobertaLayer(nn.Module): if self.is_decoder and encoder_hidden_states is not None: if not hasattr(self, "crossattention"): raise ValueError( - f"If `encoder_hidden_states` are passed, {self} has to be instantiated with cross-attention layers by setting `config.add_cross_attention=True`" + f"If `encoder_hidden_states` are passed, {self} has to be instantiated with cross-attention layers" + " by setting `config.add_cross_attention=True`" ) # cross_attn cached key/values tuple is at positions 3,4 of past_key_value tuple diff --git a/src/transformers/models/roberta/modeling_tf_roberta.py b/src/transformers/models/roberta/modeling_tf_roberta.py index 7c39b7334a..a320664bce 100644 --- a/src/transformers/models/roberta/modeling_tf_roberta.py +++ b/src/transformers/models/roberta/modeling_tf_roberta.py @@ -463,8 +463,8 @@ class TFRobertaLayer(tf.keras.layers.Layer): if self.is_decoder and encoder_hidden_states is not None: if not hasattr(self, "crossattention"): raise ValueError( - f"If `encoder_hidden_states` are passed, {self} has to be instantiated with cross-attention layers " - "by setting `config.add_cross_attention=True`" + f"If `encoder_hidden_states` are passed, {self} has to be instantiated with cross-attention layers" + " by setting `config.add_cross_attention=True`" ) # cross_attn cached key/values tuple is at positions 3,4 of past_key_value tuple diff --git a/src/transformers/models/roberta/tokenization_roberta.py b/src/transformers/models/roberta/tokenization_roberta.py index 0d87615c15..face0c38be 100644 --- a/src/transformers/models/roberta/tokenization_roberta.py +++ b/src/transformers/models/roberta/tokenization_roberta.py @@ -39,7 +39,9 @@ PRETRAINED_VOCAB_FILES_MAP = { "roberta-large-mnli": "https://huggingface.co/roberta-large-mnli/resolve/main/vocab.json", "distilroberta-base": "https://huggingface.co/distilroberta-base/resolve/main/vocab.json", "roberta-base-openai-detector": "https://huggingface.co/roberta-base-openai-detector/resolve/main/vocab.json", - "roberta-large-openai-detector": "https://huggingface.co/roberta-large-openai-detector/resolve/main/vocab.json", + "roberta-large-openai-detector": ( + "https://huggingface.co/roberta-large-openai-detector/resolve/main/vocab.json" + ), }, "merges_file": { "roberta-base": "https://huggingface.co/roberta-base/resolve/main/merges.txt", @@ -47,7 +49,9 @@ PRETRAINED_VOCAB_FILES_MAP = { "roberta-large-mnli": "https://huggingface.co/roberta-large-mnli/resolve/main/merges.txt", "distilroberta-base": "https://huggingface.co/distilroberta-base/resolve/main/merges.txt", "roberta-base-openai-detector": "https://huggingface.co/roberta-base-openai-detector/resolve/main/merges.txt", - "roberta-large-openai-detector": "https://huggingface.co/roberta-large-openai-detector/resolve/main/merges.txt", + "roberta-large-openai-detector": ( + "https://huggingface.co/roberta-large-openai-detector/resolve/main/merges.txt" + ), }, } diff --git a/src/transformers/models/roberta/tokenization_roberta_fast.py b/src/transformers/models/roberta/tokenization_roberta_fast.py index 7b774f69f1..cb055430b1 100644 --- a/src/transformers/models/roberta/tokenization_roberta_fast.py +++ b/src/transformers/models/roberta/tokenization_roberta_fast.py @@ -35,7 +35,9 @@ PRETRAINED_VOCAB_FILES_MAP = { "roberta-large-mnli": "https://huggingface.co/roberta-large-mnli/resolve/main/vocab.json", "distilroberta-base": "https://huggingface.co/distilroberta-base/resolve/main/vocab.json", "roberta-base-openai-detector": "https://huggingface.co/roberta-base-openai-detector/resolve/main/vocab.json", - "roberta-large-openai-detector": "https://huggingface.co/roberta-large-openai-detector/resolve/main/vocab.json", + "roberta-large-openai-detector": ( + "https://huggingface.co/roberta-large-openai-detector/resolve/main/vocab.json" + ), }, "merges_file": { "roberta-base": "https://huggingface.co/roberta-base/resolve/main/merges.txt", @@ -43,15 +45,21 @@ PRETRAINED_VOCAB_FILES_MAP = { "roberta-large-mnli": "https://huggingface.co/roberta-large-mnli/resolve/main/merges.txt", "distilroberta-base": "https://huggingface.co/distilroberta-base/resolve/main/merges.txt", "roberta-base-openai-detector": "https://huggingface.co/roberta-base-openai-detector/resolve/main/merges.txt", - "roberta-large-openai-detector": "https://huggingface.co/roberta-large-openai-detector/resolve/main/merges.txt", + "roberta-large-openai-detector": ( + "https://huggingface.co/roberta-large-openai-detector/resolve/main/merges.txt" + ), }, "tokenizer_file": { "roberta-base": "https://huggingface.co/roberta-base/resolve/main/tokenizer.json", "roberta-large": "https://huggingface.co/roberta-large/resolve/main/tokenizer.json", "roberta-large-mnli": "https://huggingface.co/roberta-large-mnli/resolve/main/tokenizer.json", "distilroberta-base": "https://huggingface.co/distilroberta-base/resolve/main/tokenizer.json", - "roberta-base-openai-detector": "https://huggingface.co/roberta-base-openai-detector/resolve/main/tokenizer.json", - "roberta-large-openai-detector": "https://huggingface.co/roberta-large-openai-detector/resolve/main/tokenizer.json", + "roberta-base-openai-detector": ( + "https://huggingface.co/roberta-base-openai-detector/resolve/main/tokenizer.json" + ), + "roberta-large-openai-detector": ( + "https://huggingface.co/roberta-large-openai-detector/resolve/main/tokenizer.json" + ), }, } diff --git a/src/transformers/models/roformer/configuration_roformer.py b/src/transformers/models/roformer/configuration_roformer.py index 2c5de2bbbe..ea547ca52d 100644 --- a/src/transformers/models/roformer/configuration_roformer.py +++ b/src/transformers/models/roformer/configuration_roformer.py @@ -27,10 +27,18 @@ logger = logging.get_logger(__name__) ROFORMER_PRETRAINED_CONFIG_ARCHIVE_MAP = { "junnyu/roformer_chinese_small": "https://huggingface.co/junnyu/roformer_chinese_small/resolve/main/config.json", "junnyu/roformer_chinese_base": "https://huggingface.co/junnyu/roformer_chinese_base/resolve/main/config.json", - "junnyu/roformer_chinese_char_small": "https://huggingface.co/junnyu/roformer_chinese_char_small/resolve/main/config.json", - "junnyu/roformer_chinese_char_base": "https://huggingface.co/junnyu/roformer_chinese_char_base/resolve/main/config.json", - "junnyu/roformer_small_discriminator": "https://huggingface.co/junnyu/roformer_small_discriminator/resolve/main/config.json", - "junnyu/roformer_small_generator": "https://huggingface.co/junnyu/roformer_small_generator/resolve/main/config.json", + "junnyu/roformer_chinese_char_small": ( + "https://huggingface.co/junnyu/roformer_chinese_char_small/resolve/main/config.json" + ), + "junnyu/roformer_chinese_char_base": ( + "https://huggingface.co/junnyu/roformer_chinese_char_base/resolve/main/config.json" + ), + "junnyu/roformer_small_discriminator": ( + "https://huggingface.co/junnyu/roformer_small_discriminator/resolve/main/config.json" + ), + "junnyu/roformer_small_generator": ( + "https://huggingface.co/junnyu/roformer_small_generator/resolve/main/config.json" + ), # See all RoFormer models at https://huggingface.co/models?filter=roformer } diff --git a/src/transformers/models/roformer/convert_roformer_original_tf_checkpoint_to_pytorch.py b/src/transformers/models/roformer/convert_roformer_original_tf_checkpoint_to_pytorch.py index 33edf59f6b..0ab8b671d0 100755 --- a/src/transformers/models/roformer/convert_roformer_original_tf_checkpoint_to_pytorch.py +++ b/src/transformers/models/roformer/convert_roformer_original_tf_checkpoint_to_pytorch.py @@ -51,8 +51,10 @@ if __name__ == "__main__": default=None, type=str, required=True, - help="The config json file corresponding to the pre-trained BERT model. \n" - "This specifies the model architecture.", + help=( + "The config json file corresponding to the pre-trained BERT model. \n" + "This specifies the model architecture." + ), ) parser.add_argument( "--pytorch_dump_path", default=None, type=str, required=True, help="Path to the output PyTorch model." diff --git a/src/transformers/models/roformer/modeling_flax_roformer.py b/src/transformers/models/roformer/modeling_flax_roformer.py index 37dd729666..011f161048 100644 --- a/src/transformers/models/roformer/modeling_flax_roformer.py +++ b/src/transformers/models/roformer/modeling_flax_roformer.py @@ -180,8 +180,8 @@ class FlaxRoFormerSelfAttention(nn.Module): def setup(self) -> None: if self.config.hidden_size % self.config.num_attention_heads != 0: raise ValueError( - "`config.hidden_size`: {self.config.hidden_size} has to be a multiple of `config.num_attention_heads`\ - : {self.config.num_attention_heads}" + "`config.hidden_size`: {self.config.hidden_size} has to be a multiple of `config.num_attention_heads` " + " : {self.config.num_attention_heads}" ) self.query = nn.Dense( @@ -456,8 +456,8 @@ class FlaxRoFormerLayerCollection(nn.Module): if head_mask is not None: if head_mask.shape[0] != (len(self.layers)): raise ValueError( - f"The head_mask should be specified for {len(self.layers)} layers, but it is for \ - {head_mask.shape[0]}." + f"The head_mask should be specified for {len(self.layers)} layers, but it is for " + f" {head_mask.shape[0]}." ) for i, layer in enumerate(self.layers): diff --git a/src/transformers/models/roformer/tokenization_roformer.py b/src/transformers/models/roformer/tokenization_roformer.py index e5e3728c03..ac1efc72d0 100644 --- a/src/transformers/models/roformer/tokenization_roformer.py +++ b/src/transformers/models/roformer/tokenization_roformer.py @@ -31,10 +31,18 @@ PRETRAINED_VOCAB_FILES_MAP = { "vocab_file": { "junnyu/roformer_chinese_small": "https://huggingface.co/junnyu/roformer_chinese_small/resolve/main/vocab.txt", "junnyu/roformer_chinese_base": "https://huggingface.co/junnyu/roformer_chinese_base/resolve/main/vocab.txt", - "junnyu/roformer_chinese_char_small": "https://huggingface.co/junnyu/roformer_chinese_char_small/resolve/main/vocab.txt", - "junnyu/roformer_chinese_char_base": "https://huggingface.co/junnyu/roformer_chinese_char_base/resolve/main/vocab.txt", - "junnyu/roformer_small_discriminator": "https://huggingface.co/junnyu/roformer_small_discriminator/resolve/main/vocab.txt", - "junnyu/roformer_small_generator": "https://huggingface.co/junnyu/roformer_small_generator/resolve/main/vocab.txt", + "junnyu/roformer_chinese_char_small": ( + "https://huggingface.co/junnyu/roformer_chinese_char_small/resolve/main/vocab.txt" + ), + "junnyu/roformer_chinese_char_base": ( + "https://huggingface.co/junnyu/roformer_chinese_char_base/resolve/main/vocab.txt" + ), + "junnyu/roformer_small_discriminator": ( + "https://huggingface.co/junnyu/roformer_small_discriminator/resolve/main/vocab.txt" + ), + "junnyu/roformer_small_generator": ( + "https://huggingface.co/junnyu/roformer_small_generator/resolve/main/vocab.txt" + ), } } @@ -144,8 +152,8 @@ class RoFormerTokenizer(PreTrainedTokenizer): if not os.path.isfile(vocab_file): raise ValueError( - f"Can't find a vocabulary file at path '{vocab_file}'. To load the vocabulary from a Google pretrained " - "model use `tokenizer = AutoTokenizer.from_pretrained(PRETRAINED_MODEL_NAME)`" + f"Can't find a vocabulary file at path '{vocab_file}'. To load the vocabulary from a Google pretrained" + " model use `tokenizer = AutoTokenizer.from_pretrained(PRETRAINED_MODEL_NAME)`" ) self.vocab = load_vocab(vocab_file) self.ids_to_tokens = collections.OrderedDict([(ids, tok) for tok, ids in self.vocab.items()]) diff --git a/src/transformers/models/roformer/tokenization_roformer_fast.py b/src/transformers/models/roformer/tokenization_roformer_fast.py index 59644df746..7b2cab5688 100644 --- a/src/transformers/models/roformer/tokenization_roformer_fast.py +++ b/src/transformers/models/roformer/tokenization_roformer_fast.py @@ -33,10 +33,18 @@ PRETRAINED_VOCAB_FILES_MAP = { "vocab_file": { "junnyu/roformer_chinese_small": "https://huggingface.co/junnyu/roformer_chinese_small/resolve/main/vocab.txt", "junnyu/roformer_chinese_base": "https://huggingface.co/junnyu/roformer_chinese_base/resolve/main/vocab.txt", - "junnyu/roformer_chinese_char_small": "https://huggingface.co/junnyu/roformer_chinese_char_small/resolve/main/vocab.txt", - "junnyu/roformer_chinese_char_base": "https://huggingface.co/junnyu/roformer_chinese_char_base/resolve/main/vocab.txt", - "junnyu/roformer_small_discriminator": "https://huggingface.co/junnyu/roformer_small_discriminator/resolve/main/vocab.txt", - "junnyu/roformer_small_generator": "https://huggingface.co/junnyu/roformer_small_generator/resolve/main/vocab.txt", + "junnyu/roformer_chinese_char_small": ( + "https://huggingface.co/junnyu/roformer_chinese_char_small/resolve/main/vocab.txt" + ), + "junnyu/roformer_chinese_char_base": ( + "https://huggingface.co/junnyu/roformer_chinese_char_base/resolve/main/vocab.txt" + ), + "junnyu/roformer_small_discriminator": ( + "https://huggingface.co/junnyu/roformer_small_discriminator/resolve/main/vocab.txt" + ), + "junnyu/roformer_small_generator": ( + "https://huggingface.co/junnyu/roformer_small_generator/resolve/main/vocab.txt" + ), } } diff --git a/src/transformers/models/segformer/configuration_segformer.py b/src/transformers/models/segformer/configuration_segformer.py index fa54c62c22..faec5d6c4c 100644 --- a/src/transformers/models/segformer/configuration_segformer.py +++ b/src/transformers/models/segformer/configuration_segformer.py @@ -23,7 +23,9 @@ from ...utils import logging logger = logging.get_logger(__name__) SEGFORMER_PRETRAINED_CONFIG_ARCHIVE_MAP = { - "nvidia/segformer-b0-finetuned-ade-512-512": "https://huggingface.co/nvidia/segformer-b0-finetuned-ade-512-512/resolve/main/config.json", + "nvidia/segformer-b0-finetuned-ade-512-512": ( + "https://huggingface.co/nvidia/segformer-b0-finetuned-ade-512-512/resolve/main/config.json" + ), # See all SegFormer models at https://huggingface.co/models?filter=segformer } @@ -122,8 +124,8 @@ class SegformerConfig(PretrainedConfig): if "reshape_last_stage" in kwargs and kwargs["reshape_last_stage"] is False: warnings.warn( - "Reshape_last_stage is set to False in this config. This argument is deprecated and will soon be removed, " - "as the behaviour will default to that of reshape_last_stage = True.", + "Reshape_last_stage is set to False in this config. This argument is deprecated and will soon be" + " removed, as the behaviour will default to that of reshape_last_stage = True.", FutureWarning, ) diff --git a/src/transformers/models/segformer/feature_extraction_segformer.py b/src/transformers/models/segformer/feature_extraction_segformer.py index c706c559af..0a9ae01ef1 100644 --- a/src/transformers/models/segformer/feature_extraction_segformer.py +++ b/src/transformers/models/segformer/feature_extraction_segformer.py @@ -158,8 +158,9 @@ class SegformerFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMi if not valid_segmentation_maps: raise ValueError( - "Segmentation maps must of type `PIL.Image.Image`, `np.ndarray` or `torch.Tensor` (single example)," - "`List[PIL.Image.Image]`, `List[np.ndarray]` or `List[torch.Tensor]` (batch of examples)." + "Segmentation maps must of type `PIL.Image.Image`, `np.ndarray` or `torch.Tensor` (single" + " example),`List[PIL.Image.Image]`, `List[np.ndarray]` or `List[torch.Tensor]` (batch of" + " examples)." ) is_batched = bool( diff --git a/src/transformers/models/sew/convert_sew_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/sew/convert_sew_original_pytorch_checkpoint_to_pytorch.py index 6449288810..58c0338a85 100644 --- a/src/transformers/models/sew/convert_sew_original_pytorch_checkpoint_to_pytorch.py +++ b/src/transformers/models/sew/convert_sew_original_pytorch_checkpoint_to_pytorch.py @@ -67,9 +67,10 @@ def set_recursively(hf_pointer, key, value, full_name, weight_type): else: hf_shape = hf_pointer.shape - assert ( - hf_shape == value.shape - ), f"Shape of hf {key + '.' + weight_type if weight_type is not None else ''} is {hf_shape}, but should be {value.shape} for {full_name}" + assert hf_shape == value.shape, ( + f"Shape of hf {key + '.' + weight_type if weight_type is not None else ''} is {hf_shape}, but should be" + f" {value.shape} for {full_name}" + ) if weight_type == "weight": hf_pointer.weight.data = value @@ -137,28 +138,32 @@ def load_conv_layer(full_name, value, feature_extractor, unused_weights, use_gro if type_id == 0: if "bias" in name: - assert ( - value.shape == feature_extractor.conv_layers[layer_id].conv.bias.data.shape - ), f"{full_name} has size {value.shape}, but {feature_extractor.conv_layers[layer_id].conv.bias.data.shape} was found." + assert value.shape == feature_extractor.conv_layers[layer_id].conv.bias.data.shape, ( + f"{full_name} has size {value.shape}, but" + f" {feature_extractor.conv_layers[layer_id].conv.bias.data.shape} was found." + ) feature_extractor.conv_layers[layer_id].conv.bias.data = value logger.info(f"Feat extract conv layer {layer_id} was initialized from {full_name}.") elif "weight" in name: - assert ( - value.shape == feature_extractor.conv_layers[layer_id].conv.weight.data.shape - ), f"{full_name} has size {value.shape}, but {feature_extractor.conv_layers[layer_id].conv.weight.data.shape} was found." + assert value.shape == feature_extractor.conv_layers[layer_id].conv.weight.data.shape, ( + f"{full_name} has size {value.shape}, but" + f" {feature_extractor.conv_layers[layer_id].conv.weight.data.shape} was found." + ) feature_extractor.conv_layers[layer_id].conv.weight.data = value logger.info(f"Feat extract conv layer {layer_id} was initialized from {full_name}.") elif (type_id == 2 and not use_group_norm) or (type_id == 2 and layer_id == 0 and use_group_norm): if "bias" in name: - assert ( - value.shape == feature_extractor.conv_layers[layer_id].layer_norm.bias.data.shape - ), f"{full_name} has size {value.shape}, but {feature_extractor[layer_id].layer_norm.bias.data.shape} was found." + assert value.shape == feature_extractor.conv_layers[layer_id].layer_norm.bias.data.shape, ( + f"{full_name} has size {value.shape}, but {feature_extractor[layer_id].layer_norm.bias.data.shape} was" + " found." + ) feature_extractor.conv_layers[layer_id].layer_norm.bias.data = value logger.info(f"Feat extract layer norm weight of layer {layer_id} was initialized from {full_name}.") elif "weight" in name: - assert ( - value.shape == feature_extractor.conv_layers[layer_id].layer_norm.weight.data.shape - ), f"{full_name} has size {value.shape}, but {feature_extractor[layer_id].layer_norm.weight.data.shape} was found." + assert value.shape == feature_extractor.conv_layers[layer_id].layer_norm.weight.data.shape, ( + f"{full_name} has size {value.shape}, but" + f" {feature_extractor[layer_id].layer_norm.weight.data.shape} was found." + ) feature_extractor.conv_layers[layer_id].layer_norm.weight.data = value logger.info(f"Feat extract layer norm weight of layer {layer_id} was initialized from {full_name}.") else: diff --git a/src/transformers/models/sew/modeling_sew.py b/src/transformers/models/sew/modeling_sew.py index ac2a6293cb..1ead293261 100644 --- a/src/transformers/models/sew/modeling_sew.py +++ b/src/transformers/models/sew/modeling_sew.py @@ -489,7 +489,8 @@ class SEWAttention(nn.Module): if attn_weights.size() != (bsz * self.num_heads, tgt_len, src_len): raise ValueError( - f"Attention weights should be of size {(bsz * self.num_heads, tgt_len, src_len)}, but is {attn_weights.size()}" + f"Attention weights should be of size {(bsz * self.num_heads, tgt_len, src_len)}, but is" + f" {attn_weights.size()}" ) if attention_mask is not None: @@ -505,7 +506,8 @@ class SEWAttention(nn.Module): if layer_head_mask is not None: if layer_head_mask.size() != (self.num_heads,): raise ValueError( - f"Head mask for a single layer should be of size {(self.num_heads,)}, but is {layer_head_mask.size()}" + f"Head mask for a single layer should be of size {(self.num_heads,)}, but is" + f" {layer_head_mask.size()}" ) attn_weights = layer_head_mask.view(1, -1, 1, 1) * attn_weights.view(bsz, self.num_heads, tgt_len, src_len) attn_weights = attn_weights.view(bsz * self.num_heads, tgt_len, src_len) @@ -526,7 +528,8 @@ class SEWAttention(nn.Module): if attn_output.size() != (bsz * self.num_heads, tgt_len, self.head_dim): raise ValueError( - f"`attn_output` should be of size {(bsz, self.num_heads, tgt_len, self.head_dim)}, but is {attn_output.size()}" + f"`attn_output` should be of size {(bsz, self.num_heads, tgt_len, self.head_dim)}, but is" + f" {attn_output.size()}" ) attn_output = attn_output.view(bsz, self.num_heads, tgt_len, self.head_dim) diff --git a/src/transformers/models/sew_d/convert_sew_d_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/sew_d/convert_sew_d_original_pytorch_checkpoint_to_pytorch.py index e6529eea04..942add470b 100644 --- a/src/transformers/models/sew_d/convert_sew_d_original_pytorch_checkpoint_to_pytorch.py +++ b/src/transformers/models/sew_d/convert_sew_d_original_pytorch_checkpoint_to_pytorch.py @@ -69,9 +69,10 @@ def set_recursively(hf_pointer, key, value, full_name, weight_type): else: hf_shape = hf_pointer.shape - assert ( - hf_shape == value.shape - ), f"Shape of hf {key + '.' + weight_type if weight_type is not None else ''} is {hf_shape}, but should be {value.shape} for {full_name}" + assert hf_shape == value.shape, ( + f"Shape of hf {key + '.' + weight_type if weight_type is not None else ''} is {hf_shape}, but should be" + f" {value.shape} for {full_name}" + ) if weight_type == "weight": hf_pointer.weight.data = value @@ -141,28 +142,32 @@ def load_conv_layer(full_name, value, feature_extractor, unused_weights, use_gro if type_id == 0: if "bias" in name: - assert ( - value.shape == feature_extractor.conv_layers[layer_id].conv.bias.data.shape - ), f"{full_name} has size {value.shape}, but {feature_extractor.conv_layers[layer_id].conv.bias.data.shape} was found." + assert value.shape == feature_extractor.conv_layers[layer_id].conv.bias.data.shape, ( + f"{full_name} has size {value.shape}, but" + f" {feature_extractor.conv_layers[layer_id].conv.bias.data.shape} was found." + ) feature_extractor.conv_layers[layer_id].conv.bias.data = value logger.info(f"Feat extract conv layer {layer_id} was initialized from {full_name}.") elif "weight" in name: - assert ( - value.shape == feature_extractor.conv_layers[layer_id].conv.weight.data.shape - ), f"{full_name} has size {value.shape}, but {feature_extractor.conv_layers[layer_id].conv.weight.data.shape} was found." + assert value.shape == feature_extractor.conv_layers[layer_id].conv.weight.data.shape, ( + f"{full_name} has size {value.shape}, but" + f" {feature_extractor.conv_layers[layer_id].conv.weight.data.shape} was found." + ) feature_extractor.conv_layers[layer_id].conv.weight.data = value logger.info(f"Feat extract conv layer {layer_id} was initialized from {full_name}.") elif (type_id == 2 and not use_group_norm) or (type_id == 2 and layer_id == 0 and use_group_norm): if "bias" in name: - assert ( - value.shape == feature_extractor.conv_layers[layer_id].layer_norm.bias.data.shape - ), f"{full_name} has size {value.shape}, but {feature_extractor[layer_id].layer_norm.bias.data.shape} was found." + assert value.shape == feature_extractor.conv_layers[layer_id].layer_norm.bias.data.shape, ( + f"{full_name} has size {value.shape}, but {feature_extractor[layer_id].layer_norm.bias.data.shape} was" + " found." + ) feature_extractor.conv_layers[layer_id].layer_norm.bias.data = value logger.info(f"Feat extract layer norm weight of layer {layer_id} was initialized from {full_name}.") elif "weight" in name: - assert ( - value.shape == feature_extractor.conv_layers[layer_id].layer_norm.weight.data.shape - ), f"{full_name} has size {value.shape}, but {feature_extractor[layer_id].layer_norm.weight.data.shape} was found." + assert value.shape == feature_extractor.conv_layers[layer_id].layer_norm.weight.data.shape, ( + f"{full_name} has size {value.shape}, but" + f" {feature_extractor[layer_id].layer_norm.weight.data.shape} was found." + ) feature_extractor.conv_layers[layer_id].layer_norm.weight.data = value logger.info(f"Feat extract layer norm weight of layer {layer_id} was initialized from {full_name}.") else: diff --git a/src/transformers/models/speech_encoder_decoder/configuration_speech_encoder_decoder.py b/src/transformers/models/speech_encoder_decoder/configuration_speech_encoder_decoder.py index ca3e4966aa..8b648f8e21 100644 --- a/src/transformers/models/speech_encoder_decoder/configuration_speech_encoder_decoder.py +++ b/src/transformers/models/speech_encoder_decoder/configuration_speech_encoder_decoder.py @@ -77,7 +77,8 @@ class SpeechEncoderDecoderConfig(PretrainedConfig): super().__init__(**kwargs) if "encoder" not in kwargs or "decoder" not in kwargs: raise ValueError( - f"A configuraton of type {self.model_type} cannot be instantiated because not both `encoder` and `decoder` sub-configurations are passed, but only {kwargs}" + f"A configuraton of type {self.model_type} cannot be instantiated because not both `encoder` and" + f" `decoder` sub-configurations are passed, but only {kwargs}" ) encoder_config = kwargs.pop("encoder") diff --git a/src/transformers/models/speech_encoder_decoder/convert_mbart_wav2vec2_seq2seq_original_to_pytorch.py b/src/transformers/models/speech_encoder_decoder/convert_mbart_wav2vec2_seq2seq_original_to_pytorch.py index 3c25ab706f..8680f96e50 100644 --- a/src/transformers/models/speech_encoder_decoder/convert_mbart_wav2vec2_seq2seq_original_to_pytorch.py +++ b/src/transformers/models/speech_encoder_decoder/convert_mbart_wav2vec2_seq2seq_original_to_pytorch.py @@ -75,9 +75,10 @@ def set_recursively(hf_pointer, key, value, full_name, weight_type): else: hf_shape = hf_pointer.shape - assert ( - hf_shape == value.shape - ), f"Shape of hf {key + '.' + weight_type if weight_type is not None else ''} is {hf_shape}, but should be {value.shape} for {full_name}" + assert hf_shape == value.shape, ( + f"Shape of hf {key + '.' + weight_type if weight_type is not None else ''} is {hf_shape}, but should be" + f" {value.shape} for {full_name}" + ) if weight_type == "weight": hf_pointer.weight.data = value @@ -147,28 +148,32 @@ def load_conv_layer(full_name, value, feature_extractor, unused_weights, use_gro if type_id == 0: if "bias" in name: - assert ( - value.shape == feature_extractor.conv_layers[layer_id].conv.bias.data.shape - ), f"{full_name} has size {value.shape}, but {feature_extractor.conv_layers[layer_id].conv.bias.data.shape} was found." + assert value.shape == feature_extractor.conv_layers[layer_id].conv.bias.data.shape, ( + f"{full_name} has size {value.shape}, but" + f" {feature_extractor.conv_layers[layer_id].conv.bias.data.shape} was found." + ) feature_extractor.conv_layers[layer_id].conv.bias.data = value logger.info(f"Feat extract conv layer {layer_id} was initialized from {full_name}.") elif "weight" in name: - assert ( - value.shape == feature_extractor.conv_layers[layer_id].conv.weight.data.shape - ), f"{full_name} has size {value.shape}, but {feature_extractor.conv_layers[layer_id].conv.weight.data.shape} was found." + assert value.shape == feature_extractor.conv_layers[layer_id].conv.weight.data.shape, ( + f"{full_name} has size {value.shape}, but" + f" {feature_extractor.conv_layers[layer_id].conv.weight.data.shape} was found." + ) feature_extractor.conv_layers[layer_id].conv.weight.data = value logger.info(f"Feat extract conv layer {layer_id} was initialized from {full_name}.") elif (type_id == 2 and not use_group_norm) or (type_id == 2 and layer_id == 0 and use_group_norm): if "bias" in name: - assert ( - value.shape == feature_extractor.conv_layers[layer_id].layer_norm.bias.data.shape - ), f"{full_name} has size {value.shape}, but {feature_extractor[layer_id].layer_norm.bias.data.shape} was found." + assert value.shape == feature_extractor.conv_layers[layer_id].layer_norm.bias.data.shape, ( + f"{full_name} has size {value.shape}, but {feature_extractor[layer_id].layer_norm.bias.data.shape} was" + " found." + ) feature_extractor.conv_layers[layer_id].layer_norm.bias.data = value logger.info(f"Feat extract layer norm weight of layer {layer_id} was initialized from {full_name}.") elif "weight" in name: - assert ( - value.shape == feature_extractor.conv_layers[layer_id].layer_norm.weight.data.shape - ), f"{full_name} has size {value.shape}, but {feature_extractor[layer_id].layer_norm.weight.data.shape} was found." + assert value.shape == feature_extractor.conv_layers[layer_id].layer_norm.weight.data.shape, ( + f"{full_name} has size {value.shape}, but" + f" {feature_extractor[layer_id].layer_norm.weight.data.shape} was found." + ) feature_extractor.conv_layers[layer_id].layer_norm.weight.data = value logger.info(f"Feat extract layer norm weight of layer {layer_id} was initialized from {full_name}.") else: diff --git a/src/transformers/models/speech_encoder_decoder/convert_speech_to_text_wav2vec2_seq2seq_original_to_pytorch.py b/src/transformers/models/speech_encoder_decoder/convert_speech_to_text_wav2vec2_seq2seq_original_to_pytorch.py index 40433bba13..0a4bc48dea 100644 --- a/src/transformers/models/speech_encoder_decoder/convert_speech_to_text_wav2vec2_seq2seq_original_to_pytorch.py +++ b/src/transformers/models/speech_encoder_decoder/convert_speech_to_text_wav2vec2_seq2seq_original_to_pytorch.py @@ -77,9 +77,10 @@ def set_recursively(hf_pointer, key, value, full_name, weight_type): else: hf_shape = hf_pointer.shape - assert ( - hf_shape == value.shape - ), f"Shape of hf {key + '.' + weight_type if weight_type is not None else ''} is {hf_shape}, but should be {value.shape} for {full_name}" + assert hf_shape == value.shape, ( + f"Shape of hf {key + '.' + weight_type if weight_type is not None else ''} is {hf_shape}, but should be" + f" {value.shape} for {full_name}" + ) if weight_type == "weight": hf_pointer.weight.data = value @@ -153,28 +154,32 @@ def load_conv_layer(full_name, value, feature_extractor, unused_weights, use_gro if type_id == 0: if "bias" in name: - assert ( - value.shape == feature_extractor.conv_layers[layer_id].conv.bias.data.shape - ), f"{full_name} has size {value.shape}, but {feature_extractor.conv_layers[layer_id].conv.bias.data.shape} was found." + assert value.shape == feature_extractor.conv_layers[layer_id].conv.bias.data.shape, ( + f"{full_name} has size {value.shape}, but" + f" {feature_extractor.conv_layers[layer_id].conv.bias.data.shape} was found." + ) feature_extractor.conv_layers[layer_id].conv.bias.data = value logger.info(f"Feat extract conv layer {layer_id} was initialized from {full_name}.") elif "weight" in name: - assert ( - value.shape == feature_extractor.conv_layers[layer_id].conv.weight.data.shape - ), f"{full_name} has size {value.shape}, but {feature_extractor.conv_layers[layer_id].conv.weight.data.shape} was found." + assert value.shape == feature_extractor.conv_layers[layer_id].conv.weight.data.shape, ( + f"{full_name} has size {value.shape}, but" + f" {feature_extractor.conv_layers[layer_id].conv.weight.data.shape} was found." + ) feature_extractor.conv_layers[layer_id].conv.weight.data = value logger.info(f"Feat extract conv layer {layer_id} was initialized from {full_name}.") elif (type_id == 2 and not use_group_norm) or (type_id == 2 and layer_id == 0 and use_group_norm): if "bias" in name: - assert ( - value.shape == feature_extractor.conv_layers[layer_id].layer_norm.bias.data.shape - ), f"{full_name} has size {value.shape}, but {feature_extractor[layer_id].layer_norm.bias.data.shape} was found." + assert value.shape == feature_extractor.conv_layers[layer_id].layer_norm.bias.data.shape, ( + f"{full_name} has size {value.shape}, but {feature_extractor[layer_id].layer_norm.bias.data.shape} was" + " found." + ) feature_extractor.conv_layers[layer_id].layer_norm.bias.data = value logger.info(f"Feat extract layer norm weight of layer {layer_id} was initialized from {full_name}.") elif "weight" in name: - assert ( - value.shape == feature_extractor.conv_layers[layer_id].layer_norm.weight.data.shape - ), f"{full_name} has size {value.shape}, but {feature_extractor[layer_id].layer_norm.weight.data.shape} was found." + assert value.shape == feature_extractor.conv_layers[layer_id].layer_norm.weight.data.shape, ( + f"{full_name} has size {value.shape}, but" + f" {feature_extractor[layer_id].layer_norm.weight.data.shape} was found." + ) feature_extractor.conv_layers[layer_id].layer_norm.weight.data = value logger.info(f"Feat extract layer norm weight of layer {layer_id} was initialized from {full_name}.") else: diff --git a/src/transformers/models/speech_encoder_decoder/modeling_flax_speech_encoder_decoder.py b/src/transformers/models/speech_encoder_decoder/modeling_flax_speech_encoder_decoder.py index 0326fee63e..cd304fa0c0 100644 --- a/src/transformers/models/speech_encoder_decoder/modeling_flax_speech_encoder_decoder.py +++ b/src/transformers/models/speech_encoder_decoder/modeling_flax_speech_encoder_decoder.py @@ -357,10 +357,10 @@ class FlaxSpeechEncoderDecoderModel(FlaxPreTrainedModel): # Raise ValueError or option to project enc to dec hidden_size (eg EncAdapterLayer) if config.decoder.cross_attention_hidden_size != config.encoder.hidden_size: raise ValueError( - "If `cross_attention_hidden_size` is specified in the decoder's configuration, " - "it has to be equal to the encoder's `hidden_size`. " - f"Got {config.decoder.cross_attention_hidden_size} for `config.decoder.cross_attention_hidden_size` " - f"and {config.encoder.hidden_size} for `config.encoder.hidden_size`." + "If `cross_attention_hidden_size` is specified in the decoder's configuration, it has to be equal" + f" to the encoder's `hidden_size`. Got {config.decoder.cross_attention_hidden_size} for" + f" `config.decoder.cross_attention_hidden_size` and {config.encoder.hidden_size} for" + " `config.encoder.hidden_size`." ) # make sure input & output embeddings are not tied @@ -389,7 +389,8 @@ class FlaxSpeechEncoderDecoderModel(FlaxPreTrainedModel): decoder_batch_size, decoder_sequence_length = decoder_input_ids.shape if not decoder_batch_size == batch_size: raise ValueError( - f"The inputs of encoder and decoder should have the same batch size, but got {batch_size} for encoder and {decoder_batch_size} for decoder." + f"The inputs of encoder and decoder should have the same batch size, but got {batch_size} for encoder" + f" and {decoder_batch_size} for decoder." ) decoder_position_ids = jnp.broadcast_to( jnp.arange(decoder_sequence_length)[None, :], (decoder_batch_size, decoder_sequence_length) @@ -713,7 +714,8 @@ class FlaxSpeechEncoderDecoderModel(FlaxPreTrainedModel): # prepare decoder inputs if decoder_input_ids is None: raise ValueError( - "`decoder_input_ids` cannot be `None`. For sequence to sequence training, `decoder_position_ids` must be specified as an input argument." + "`decoder_input_ids` cannot be `None`. For sequence to sequence training, `decoder_position_ids` must" + " be specified as an input argument." ) if decoder_attention_mask is None: decoder_attention_mask = jnp.ones_like(decoder_input_ids) @@ -895,10 +897,9 @@ class FlaxSpeechEncoderDecoderModel(FlaxPreTrainedModel): ) if decoder_config.is_decoder is False or decoder_config.add_cross_attention is False: logger.info( - f"Initializing {decoder_pretrained_model_name_or_path} as a decoder model. " - f"Cross attention layers are added to {decoder_pretrained_model_name_or_path} " - f"and randomly initialized if {decoder_pretrained_model_name_or_path}'s architecture allows for " - "cross attention layers." + f"Initializing {decoder_pretrained_model_name_or_path} as a decoder model. Cross attention" + f" layers are added to {decoder_pretrained_model_name_or_path} and randomly initialized if" + f" {decoder_pretrained_model_name_or_path}'s architecture allows for cross attention layers." ) decoder_config.is_decoder = True decoder_config.add_cross_attention = True diff --git a/src/transformers/models/speech_encoder_decoder/modeling_speech_encoder_decoder.py b/src/transformers/models/speech_encoder_decoder/modeling_speech_encoder_decoder.py index 1dbba59f9e..8b717641bb 100644 --- a/src/transformers/models/speech_encoder_decoder/modeling_speech_encoder_decoder.py +++ b/src/transformers/models/speech_encoder_decoder/modeling_speech_encoder_decoder.py @@ -199,10 +199,10 @@ class SpeechEncoderDecoderModel(PreTrainedModel): if config.decoder.cross_attention_hidden_size is not None: if config.decoder.cross_attention_hidden_size != config.encoder.hidden_size: raise ValueError( - "If `cross_attention_hidden_size` is specified in the decoder's configuration, " - "it has to be equal to the encoder's `hidden_size`. " - f"Got {config.decoder.cross_attention_hidden_size} for `config.decoder.cross_attention_hidden_size` " - f"and {config.encoder.hidden_size} for `config.encoder.hidden_size`." + "If `cross_attention_hidden_size` is specified in the decoder's configuration, it has to be equal" + f" to the encoder's `hidden_size`. Got {config.decoder.cross_attention_hidden_size} for" + f" `config.decoder.cross_attention_hidden_size` and {config.encoder.hidden_size} for" + " `config.encoder.hidden_size`." ) # initialize with config @@ -221,11 +221,13 @@ class SpeechEncoderDecoderModel(PreTrainedModel): if self.encoder.config.to_dict() != self.config.encoder.to_dict(): logger.warning( - f"Config of the encoder: {self.encoder.__class__} is overwritten by shared encoder config: {self.config.encoder}" + f"Config of the encoder: {self.encoder.__class__} is overwritten by shared encoder config:" + f" {self.config.encoder}" ) if self.decoder.config.to_dict() != self.config.decoder.to_dict(): logger.warning( - f"Config of the decoder: {self.decoder.__class__} is overwritten by shared decoder config: {self.config.decoder}" + f"Config of the decoder: {self.decoder.__class__} is overwritten by shared decoder config:" + f" {self.config.decoder}" ) # make sure that the individual model's config refers to the shared config @@ -410,10 +412,9 @@ class SpeechEncoderDecoderModel(PreTrainedModel): if decoder_config.is_decoder is False or decoder_config.add_cross_attention is False: logger.info( - f"Initializing {decoder_pretrained_model_name_or_path} as a decoder model. " - f"Cross attention layers are added to {decoder_pretrained_model_name_or_path} " - f"and randomly initialized if {decoder_pretrained_model_name_or_path}'s architecture allows for " - "cross attention layers." + f"Initializing {decoder_pretrained_model_name_or_path} as a decoder model. Cross attention" + f" layers are added to {decoder_pretrained_model_name_or_path} and randomly initialized if" + f" {decoder_pretrained_model_name_or_path}'s architecture allows for cross attention layers." ) decoder_config.is_decoder = True decoder_config.add_cross_attention = True @@ -599,8 +600,8 @@ class SpeechEncoderDecoderModel(PreTrainedModel): def resize_token_embeddings(self, *args, **kwargs): raise NotImplementedError( - "Resizing the embedding layers via the SpeechEncoderDecoderModel directly is not supported. " - "Please use the respective methods of the wrapped decoder object (model.decoder.resize_token_embeddings(...))" + "Resizing the embedding layers via the SpeechEncoderDecoderModel directly is not supported. Please use the" + " respective methods of the wrapped decoder object (model.decoder.resize_token_embeddings(...))" ) def _reorder_cache(self, past, beam_idx): diff --git a/src/transformers/models/speech_to_text/configuration_speech_to_text.py b/src/transformers/models/speech_to_text/configuration_speech_to_text.py index f08bbf51e1..f12be50b53 100644 --- a/src/transformers/models/speech_to_text/configuration_speech_to_text.py +++ b/src/transformers/models/speech_to_text/configuration_speech_to_text.py @@ -21,7 +21,9 @@ from ...utils import logging logger = logging.get_logger(__name__) SPEECH_TO_TEXT_PRETRAINED_CONFIG_ARCHIVE_MAP = { - "facebook/s2t-small-librispeech-asr": "https://huggingface.co/facebook/s2t-small-librispeech-asr/resolve/main/config.json", + "facebook/s2t-small-librispeech-asr": ( + "https://huggingface.co/facebook/s2t-small-librispeech-asr/resolve/main/config.json" + ), # See all Speech2Text models at https://huggingface.co/models?filter=speech_to_text } diff --git a/src/transformers/models/speech_to_text/convert_s2t_fairseq_to_tfms.py b/src/transformers/models/speech_to_text/convert_s2t_fairseq_to_tfms.py index df8bc48536..6c1cd993fe 100644 --- a/src/transformers/models/speech_to_text/convert_s2t_fairseq_to_tfms.py +++ b/src/transformers/models/speech_to_text/convert_s2t_fairseq_to_tfms.py @@ -102,7 +102,8 @@ def convert_fairseq_s2t_checkpoint_to_tfms(checkpoint_path, pytorch_dump_folder_ ] ): raise ValueError( - f"Only `encoder.embed_positions.weights` and `decoder.embed_positions.weights` are allowed to be missing, but all the following weights are missing {missing}" + "Only `encoder.embed_positions.weights` and `decoder.embed_positions.weights` are allowed to be missing," + f" but all the following weights are missing {missing}" ) if tie_embeds: diff --git a/src/transformers/models/speech_to_text/feature_extraction_speech_to_text.py b/src/transformers/models/speech_to_text/feature_extraction_speech_to_text.py index e6ff52f183..4294c48c71 100644 --- a/src/transformers/models/speech_to_text/feature_extraction_speech_to_text.py +++ b/src/transformers/models/speech_to_text/feature_extraction_speech_to_text.py @@ -190,8 +190,9 @@ class Speech2TextFeatureExtractor(SequenceFeatureExtractor): if sampling_rate is not None: if sampling_rate != self.sampling_rate: raise ValueError( - f"The model corresponding to this feature extractor: {self} was trained using a sampling rate of {self.sampling_rate}. " - f"Please make sure that the provided `raw_speech` input was sampled with {self.sampling_rate} and not {sampling_rate}." + f"The model corresponding to this feature extractor: {self} was trained using a sampling rate of" + f" {self.sampling_rate}. Please make sure that the provided `raw_speech` input was sampled with" + f" {self.sampling_rate} and not {sampling_rate}." ) else: logger.warning( diff --git a/src/transformers/models/speech_to_text/modeling_speech_to_text.py b/src/transformers/models/speech_to_text/modeling_speech_to_text.py index 8f3062e6c7..a358b13c1f 100755 --- a/src/transformers/models/speech_to_text/modeling_speech_to_text.py +++ b/src/transformers/models/speech_to_text/modeling_speech_to_text.py @@ -292,7 +292,8 @@ class Speech2TextAttention(nn.Module): if attn_weights.size() != (bsz * self.num_heads, tgt_len, src_len): raise ValueError( - f"Attention weights should be of size {(bsz * self.num_heads, tgt_len, src_len)}, but is {attn_weights.size()}" + f"Attention weights should be of size {(bsz * self.num_heads, tgt_len, src_len)}, but is" + f" {attn_weights.size()}" ) if attention_mask is not None: @@ -308,7 +309,8 @@ class Speech2TextAttention(nn.Module): if layer_head_mask is not None: if layer_head_mask.size() != (self.num_heads,): raise ValueError( - f"Head mask for a single layer should be of size {(self.num_heads,)}, but is {layer_head_mask.size()}" + f"Head mask for a single layer should be of size {(self.num_heads,)}, but is" + f" {layer_head_mask.size()}" ) attn_weights = layer_head_mask.view(1, -1, 1, 1) * attn_weights.view(bsz, self.num_heads, tgt_len, src_len) attn_weights = attn_weights.view(bsz * self.num_heads, tgt_len, src_len) @@ -329,7 +331,8 @@ class Speech2TextAttention(nn.Module): if attn_output.size() != (bsz * self.num_heads, tgt_len, self.head_dim): raise ValueError( - f"`attn_output` should be of size {(bsz, self.num_heads, tgt_len, self.head_dim)}, but is {attn_output.size()}" + f"`attn_output` should be of size {(bsz, self.num_heads, tgt_len, self.head_dim)}, but is" + f" {attn_output.size()}" ) attn_output = attn_output.view(bsz, self.num_heads, tgt_len, self.head_dim) @@ -1024,9 +1027,10 @@ class Speech2TextDecoder(Speech2TextPreTrainedModel): # check if head_mask/cross_attn_head_mask has a correct number of layers specified if desired for attn_mask, mask_name in zip([head_mask, cross_attn_head_mask], ["head_mask", "cross_attn_head_mask"]): if attn_mask is not None: - assert attn_mask.size()[0] == ( - len(self.layers) - ), f"The `{mask_name}` should be specified for {len(self.layers)} layers, but it is for {head_mask.size()[0]}." + assert attn_mask.size()[0] == (len(self.layers)), ( + f"The `{mask_name}` should be specified for {len(self.layers)} layers, but it is for" + f" {head_mask.size()[0]}." + ) for idx, decoder_layer in enumerate(self.layers): # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description) if output_hidden_states: @@ -1041,7 +1045,8 @@ class Speech2TextDecoder(Speech2TextPreTrainedModel): if use_cache: logger.warning( - "`use_cache = True` is incompatible with gradient checkpointing. Setting `use_cache = False`..." + "`use_cache = True` is incompatible with gradient checkpointing. Setting `use_cache =" + " False`..." ) use_cache = False diff --git a/src/transformers/models/speech_to_text/modeling_tf_speech_to_text.py b/src/transformers/models/speech_to_text/modeling_tf_speech_to_text.py index c78d19056b..f61ddd7fed 100755 --- a/src/transformers/models/speech_to_text/modeling_tf_speech_to_text.py +++ b/src/transformers/models/speech_to_text/modeling_tf_speech_to_text.py @@ -331,7 +331,10 @@ class TFSpeech2TextAttention(tf.keras.layers.Layer): tf.debugging.assert_equal( shape_list(attn_weights), [bsz * self.num_heads, tgt_len, src_len], - message=f"Attention weights should be of size {(bsz * self.num_heads, tgt_len, src_len)}, but is {shape_list(attn_weights)}", + message=( + f"Attention weights should be of size {(bsz * self.num_heads, tgt_len, src_len)}, but is" + f" {shape_list(attn_weights)}" + ), ) if attention_mask is not None: @@ -341,7 +344,10 @@ class TFSpeech2TextAttention(tf.keras.layers.Layer): tf.debugging.assert_equal( shape_list(attention_mask), [bsz, 1, tgt_len, src_len], - message=f"Attention mask should be of size {(bsz, 1, tgt_len, src_len)}, but is {shape_list(attention_mask)}", + message=( + f"Attention mask should be of size {(bsz, 1, tgt_len, src_len)}, but is" + f" {shape_list(attention_mask)}" + ), ) attention_mask = tf.cast(attention_mask, dtype=attn_weights.dtype) @@ -357,7 +363,10 @@ class TFSpeech2TextAttention(tf.keras.layers.Layer): tf.debugging.assert_equal( shape_list(layer_head_mask), [self.num_heads], - message=f"Head mask for a single layer should be of size {(self.num_heads)}, but is {shape_list(layer_head_mask)}", + message=( + f"Head mask for a single layer should be of size {(self.num_heads)}, but is" + f" {shape_list(layer_head_mask)}" + ), ) attn_weights = tf.reshape(layer_head_mask, (1, -1, 1, 1)) * tf.reshape( @@ -374,7 +383,10 @@ class TFSpeech2TextAttention(tf.keras.layers.Layer): tf.debugging.assert_equal( shape_list(attn_output), [bsz * self.num_heads, tgt_len, self.head_dim], - message=f"`attn_output` should be of size {(bsz, self.num_heads, tgt_len, self.head_dim)}, but is {shape_list(attn_output)}", + message=( + f"`attn_output` should be of size {(bsz, self.num_heads, tgt_len, self.head_dim)}, but is" + f" {shape_list(attn_output)}" + ), ) attn_output = tf.transpose( @@ -856,7 +868,10 @@ class TFSpeech2TextEncoder(tf.keras.layers.Layer): tf.debugging.assert_equal( shape_list(head_mask)[0], len(self.layers), - message=f"The head_mask should be specified for {len(self.layers)} layers, but it is for {shape_list(head_mask)[0]}.", + message=( + f"The head_mask should be specified for {len(self.layers)} layers, but it is for" + f" {shape_list(head_mask)[0]}." + ), ) for idx, encoder_layer in enumerate(self.layers): @@ -1065,7 +1080,10 @@ class TFSpeech2TextDecoder(tf.keras.layers.Layer): tf.debugging.assert_equal( shape_list(attn_mask)[0], len(self.layers), - message=f"The {attn_mask_name} should be specified for {len(self.layers)} layers, but it is for {shape_list(attn_mask)[0]}.", + message=( + f"The {attn_mask_name} should be specified for {len(self.layers)} layers, but it is for" + f" {shape_list(attn_mask)[0]}." + ), ) for idx, decoder_layer in enumerate(self.layers): diff --git a/src/transformers/models/speech_to_text/tokenization_speech_to_text.py b/src/transformers/models/speech_to_text/tokenization_speech_to_text.py index 7d77c945ce..e1bc681499 100644 --- a/src/transformers/models/speech_to_text/tokenization_speech_to_text.py +++ b/src/transformers/models/speech_to_text/tokenization_speech_to_text.py @@ -36,10 +36,14 @@ VOCAB_FILES_NAMES = { PRETRAINED_VOCAB_FILES_MAP = { "vocab_file": { - "facebook/s2t-small-librispeech-asr": "https://huggingface.co/facebook/s2t-small-librispeech-asr/resolve/main/vocab.json", + "facebook/s2t-small-librispeech-asr": ( + "https://huggingface.co/facebook/s2t-small-librispeech-asr/resolve/main/vocab.json" + ), }, "spm_file": { - "facebook/s2t-small-librispeech-asr": "https://huggingface.co/facebook/s2t-small-librispeech-asr/resolve/main/sentencepiece.bpe.model" + "facebook/s2t-small-librispeech-asr": ( + "https://huggingface.co/facebook/s2t-small-librispeech-asr/resolve/main/sentencepiece.bpe.model" + ) }, } diff --git a/src/transformers/models/speech_to_text_2/configuration_speech_to_text_2.py b/src/transformers/models/speech_to_text_2/configuration_speech_to_text_2.py index d27bad73c7..c1b3cf7e4c 100644 --- a/src/transformers/models/speech_to_text_2/configuration_speech_to_text_2.py +++ b/src/transformers/models/speech_to_text_2/configuration_speech_to_text_2.py @@ -21,7 +21,9 @@ from ...utils import logging logger = logging.get_logger(__name__) SPEECH_TO_TEXT_2_PRETRAINED_CONFIG_ARCHIVE_MAP = { - "facebook/s2t-wav2vec2-large-en-de": "https://huggingface.co/facebook/s2t-wav2vec2-large-en-de/resolve/main/config.json", + "facebook/s2t-wav2vec2-large-en-de": ( + "https://huggingface.co/facebook/s2t-wav2vec2-large-en-de/resolve/main/config.json" + ), # See all Speech2Text models at https://huggingface.co/models?filter=speech2text2 } diff --git a/src/transformers/models/speech_to_text_2/modeling_speech_to_text_2.py b/src/transformers/models/speech_to_text_2/modeling_speech_to_text_2.py index dccbd2adf4..5c0ea65fcc 100755 --- a/src/transformers/models/speech_to_text_2/modeling_speech_to_text_2.py +++ b/src/transformers/models/speech_to_text_2/modeling_speech_to_text_2.py @@ -238,7 +238,8 @@ class Speech2Text2Attention(nn.Module): if attn_weights.size() != (bsz * self.num_heads, tgt_len, src_len): raise ValueError( - f"Attention weights should be of size {(bsz * self.num_heads, tgt_len, src_len)}, but is {attn_weights.size()}" + f"Attention weights should be of size {(bsz * self.num_heads, tgt_len, src_len)}, but is" + f" {attn_weights.size()}" ) if attention_mask is not None: @@ -254,7 +255,8 @@ class Speech2Text2Attention(nn.Module): if layer_head_mask is not None: if layer_head_mask.size() != (self.num_heads,): raise ValueError( - f"Head mask for a single layer should be of size {(self.num_heads,)}, but is {layer_head_mask.size()}" + f"Head mask for a single layer should be of size {(self.num_heads,)}, but is" + f" {layer_head_mask.size()}" ) attn_weights = layer_head_mask.view(1, -1, 1, 1) * attn_weights.view(bsz, self.num_heads, tgt_len, src_len) attn_weights = attn_weights.view(bsz * self.num_heads, tgt_len, src_len) @@ -275,7 +277,8 @@ class Speech2Text2Attention(nn.Module): if attn_output.size() != (bsz * self.num_heads, tgt_len, self.head_dim): raise ValueError( - f"`attn_output` should be of size {(bsz, self.num_heads, tgt_len, self.head_dim)}, but is {attn_output.size()}" + f"`attn_output` should be of size {(bsz, self.num_heads, tgt_len, self.head_dim)}, but is" + f" {attn_output.size()}" ) attn_output = attn_output.view(bsz, self.num_heads, tgt_len, self.head_dim) @@ -633,7 +636,8 @@ class Speech2Text2Decoder(Speech2Text2PreTrainedModel): if attn_mask is not None: if attn_mask.size()[0] != (len(self.layers)): raise ValueError( - f"The `{mask_name}` should be specified for {len(self.layers)} layers, but it is for {head_mask.size()[0]}." + f"The `{mask_name}` should be specified for {len(self.layers)} layers, but it is for" + f" {head_mask.size()[0]}." ) for idx, decoder_layer in enumerate(self.layers): # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description) @@ -649,7 +653,8 @@ class Speech2Text2Decoder(Speech2Text2PreTrainedModel): if use_cache: logger.warning( - "`use_cache = True` is incompatible with gradient checkpointing. Setting `use_cache = False`..." + "`use_cache = True` is incompatible with gradient checkpointing. Setting `use_cache =" + " False`..." ) use_cache = False @@ -735,7 +740,8 @@ class Speech2Text2DecoderWrapper(Speech2Text2PreTrainedModel): @add_start_docstrings( - "The Speech2Text2 Decoder with a language modeling head. Can be used as the decoder part of [`EncoderDecoderModel`] and [`SpeechEncoderDecoder`].", + "The Speech2Text2 Decoder with a language modeling head. Can be used as the decoder part of" + " [`EncoderDecoderModel`] and [`SpeechEncoderDecoder`].", SPEECH_TO_TEXT_2_START_DOCSTRING, ) class Speech2Text2ForCausalLM(Speech2Text2PreTrainedModel): diff --git a/src/transformers/models/speech_to_text_2/tokenization_speech_to_text_2.py b/src/transformers/models/speech_to_text_2/tokenization_speech_to_text_2.py index 51d5c31ec9..6f69f7f90e 100644 --- a/src/transformers/models/speech_to_text_2/tokenization_speech_to_text_2.py +++ b/src/transformers/models/speech_to_text_2/tokenization_speech_to_text_2.py @@ -33,13 +33,19 @@ VOCAB_FILES_NAMES = { PRETRAINED_VOCAB_FILES_MAP = { "vocab_file": { - "facebook/s2t-wav2vec2-large-en-de": "https://huggingface.co/facebook/s2t-wav2vec2-large-en-de/resolve/main/vocab.json", + "facebook/s2t-wav2vec2-large-en-de": ( + "https://huggingface.co/facebook/s2t-wav2vec2-large-en-de/resolve/main/vocab.json" + ), }, "tokenizer_config_file": { - "facebook/s2t-wav2vec2-large-en-de": "https://huggingface.co/facebook/s2t-wav2vec2-large-en-de/resolve/main/tokenizer_config.json", + "facebook/s2t-wav2vec2-large-en-de": ( + "https://huggingface.co/facebook/s2t-wav2vec2-large-en-de/resolve/main/tokenizer_config.json" + ), }, "merges_file": { - "facebook/s2t-wav2vec2-large-en-de": "https://huggingface.co/facebook/s2t-wav2vec2-large-en-de/resolve/main/merges.txt", + "facebook/s2t-wav2vec2-large-en-de": ( + "https://huggingface.co/facebook/s2t-wav2vec2-large-en-de/resolve/main/merges.txt" + ), }, } diff --git a/src/transformers/models/splinter/modeling_splinter.py b/src/transformers/models/splinter/modeling_splinter.py index 840aa07b87..0bf8411f2f 100755 --- a/src/transformers/models/splinter/modeling_splinter.py +++ b/src/transformers/models/splinter/modeling_splinter.py @@ -370,7 +370,8 @@ class SplinterLayer(nn.Module): if self.is_decoder and encoder_hidden_states is not None: if not hasattr(self, "crossattention"): raise ValueError( - f"If `encoder_hidden_states` are passed, {self} has to be instantiated with cross-attention layers by setting `config.add_cross_attention=True`" + f"If `encoder_hidden_states` are passed, {self} has to be instantiated with cross-attention layers" + " by setting `config.add_cross_attention=True`" ) # cross_attn cached key/values tuple is at positions 3,4 of past_key_value tuple diff --git a/src/transformers/models/splinter/tokenization_splinter.py b/src/transformers/models/splinter/tokenization_splinter.py index 9649da03f9..f600566e6e 100644 --- a/src/transformers/models/splinter/tokenization_splinter.py +++ b/src/transformers/models/splinter/tokenization_splinter.py @@ -153,8 +153,8 @@ class SplinterTokenizer(PreTrainedTokenizer): if not os.path.isfile(vocab_file): raise ValueError( - f"Can't find a vocabulary file at path '{vocab_file}'. To load the vocabulary from a Google pretrained " - "model use `tokenizer = BertTokenizer.from_pretrained(PRETRAINED_MODEL_NAME)`" + f"Can't find a vocabulary file at path '{vocab_file}'. To load the vocabulary from a Google pretrained" + " model use `tokenizer = BertTokenizer.from_pretrained(PRETRAINED_MODEL_NAME)`" ) self.vocab = load_vocab(vocab_file) self.ids_to_tokens = collections.OrderedDict([(ids, tok) for tok, ids in self.vocab.items()]) diff --git a/src/transformers/models/squeezebert/configuration_squeezebert.py b/src/transformers/models/squeezebert/configuration_squeezebert.py index 5a77495fc7..b4b707d6cb 100644 --- a/src/transformers/models/squeezebert/configuration_squeezebert.py +++ b/src/transformers/models/squeezebert/configuration_squeezebert.py @@ -21,9 +21,13 @@ from ...utils import logging logger = logging.get_logger(__name__) SQUEEZEBERT_PRETRAINED_CONFIG_ARCHIVE_MAP = { - "squeezebert/squeezebert-uncased": "https://huggingface.co/squeezebert/squeezebert-uncased/resolve/main/config.json", + "squeezebert/squeezebert-uncased": ( + "https://huggingface.co/squeezebert/squeezebert-uncased/resolve/main/config.json" + ), "squeezebert/squeezebert-mnli": "https://huggingface.co/squeezebert/squeezebert-mnli/resolve/main/config.json", - "squeezebert/squeezebert-mnli-headless": "https://huggingface.co/squeezebert/squeezebert-mnli-headless/resolve/main/config.json", + "squeezebert/squeezebert-mnli-headless": ( + "https://huggingface.co/squeezebert/squeezebert-mnli-headless/resolve/main/config.json" + ), } diff --git a/src/transformers/models/squeezebert/tokenization_squeezebert.py b/src/transformers/models/squeezebert/tokenization_squeezebert.py index e41e576455..72d927ecca 100644 --- a/src/transformers/models/squeezebert/tokenization_squeezebert.py +++ b/src/transformers/models/squeezebert/tokenization_squeezebert.py @@ -24,9 +24,13 @@ VOCAB_FILES_NAMES = {"vocab_file": "vocab.txt"} PRETRAINED_VOCAB_FILES_MAP = { "vocab_file": { - "squeezebert/squeezebert-uncased": "https://huggingface.co/squeezebert/squeezebert-uncased/resolve/main/vocab.txt", + "squeezebert/squeezebert-uncased": ( + "https://huggingface.co/squeezebert/squeezebert-uncased/resolve/main/vocab.txt" + ), "squeezebert/squeezebert-mnli": "https://huggingface.co/squeezebert/squeezebert-mnli/resolve/main/vocab.txt", - "squeezebert/squeezebert-mnli-headless": "https://huggingface.co/squeezebert/squeezebert-mnli-headless/resolve/main/vocab.txt", + "squeezebert/squeezebert-mnli-headless": ( + "https://huggingface.co/squeezebert/squeezebert-mnli-headless/resolve/main/vocab.txt" + ), } } diff --git a/src/transformers/models/squeezebert/tokenization_squeezebert_fast.py b/src/transformers/models/squeezebert/tokenization_squeezebert_fast.py index 58708030f9..5ee656e5a8 100644 --- a/src/transformers/models/squeezebert/tokenization_squeezebert_fast.py +++ b/src/transformers/models/squeezebert/tokenization_squeezebert_fast.py @@ -25,14 +25,24 @@ VOCAB_FILES_NAMES = {"vocab_file": "vocab.txt", "tokenizer_file": "tokenizer.jso PRETRAINED_VOCAB_FILES_MAP = { "vocab_file": { - "squeezebert/squeezebert-uncased": "https://huggingface.co/squeezebert/squeezebert-uncased/resolve/main/vocab.txt", + "squeezebert/squeezebert-uncased": ( + "https://huggingface.co/squeezebert/squeezebert-uncased/resolve/main/vocab.txt" + ), "squeezebert/squeezebert-mnli": "https://huggingface.co/squeezebert/squeezebert-mnli/resolve/main/vocab.txt", - "squeezebert/squeezebert-mnli-headless": "https://huggingface.co/squeezebert/squeezebert-mnli-headless/resolve/main/vocab.txt", + "squeezebert/squeezebert-mnli-headless": ( + "https://huggingface.co/squeezebert/squeezebert-mnli-headless/resolve/main/vocab.txt" + ), }, "tokenizer_file": { - "squeezebert/squeezebert-uncased": "https://huggingface.co/squeezebert/squeezebert-uncased/resolve/main/tokenizer.json", - "squeezebert/squeezebert-mnli": "https://huggingface.co/squeezebert/squeezebert-mnli/resolve/main/tokenizer.json", - "squeezebert/squeezebert-mnli-headless": "https://huggingface.co/squeezebert/squeezebert-mnli-headless/resolve/main/tokenizer.json", + "squeezebert/squeezebert-uncased": ( + "https://huggingface.co/squeezebert/squeezebert-uncased/resolve/main/tokenizer.json" + ), + "squeezebert/squeezebert-mnli": ( + "https://huggingface.co/squeezebert/squeezebert-mnli/resolve/main/tokenizer.json" + ), + "squeezebert/squeezebert-mnli-headless": ( + "https://huggingface.co/squeezebert/squeezebert-mnli-headless/resolve/main/tokenizer.json" + ), }, } diff --git a/src/transformers/models/swin/configuration_swin.py b/src/transformers/models/swin/configuration_swin.py index 9956482b9a..878a73e920 100644 --- a/src/transformers/models/swin/configuration_swin.py +++ b/src/transformers/models/swin/configuration_swin.py @@ -21,7 +21,9 @@ from ...utils import logging logger = logging.get_logger(__name__) SWIN_PRETRAINED_CONFIG_ARCHIVE_MAP = { - "microsoft/swin-tiny-patch4-window7-224": "https://huggingface.co/microsoft/swin-tiny-patch4-window7-224/resolve/main/config.json", + "microsoft/swin-tiny-patch4-window7-224": ( + "https://huggingface.co/microsoft/swin-tiny-patch4-window7-224/resolve/main/config.json" + ), # See all Swin models at https://huggingface.co/models?filter=swin } diff --git a/src/transformers/models/swin/modeling_swin.py b/src/transformers/models/swin/modeling_swin.py index b2d6b348fb..2b415dbe26 100644 --- a/src/transformers/models/swin/modeling_swin.py +++ b/src/transformers/models/swin/modeling_swin.py @@ -402,7 +402,7 @@ class SwinSelfAttention(nn.Module): super().__init__() if dim % num_heads != 0: raise ValueError( - f"The hidden size ({dim}) is not a multiple of the number of attention " f"heads ({num_heads})" + f"The hidden size ({dim}) is not a multiple of the number of attention heads ({num_heads})" ) self.num_attention_heads = num_heads @@ -997,7 +997,8 @@ class SwinModel(SwinPreTrainedModel): @add_start_docstrings( - "Swin Model with a decoder on top for masked image modeling, as proposed in `SimMIM `__.", + "Swin Model with a decoder on top for masked image modeling, as proposed in `SimMIM" + " `__.", SWIN_START_DOCSTRING, ) class SwinForMaskedImageModeling(SwinPreTrainedModel): diff --git a/src/transformers/models/t5/convert_t5_original_tf_checkpoint_to_pytorch.py b/src/transformers/models/t5/convert_t5_original_tf_checkpoint_to_pytorch.py index a002030168..7d9a20f3b0 100755 --- a/src/transformers/models/t5/convert_t5_original_tf_checkpoint_to_pytorch.py +++ b/src/transformers/models/t5/convert_t5_original_tf_checkpoint_to_pytorch.py @@ -49,8 +49,9 @@ if __name__ == "__main__": default=None, type=str, required=True, - help="The config json file corresponding to the pre-trained T5 model. \n" - "This specifies the model architecture.", + help=( + "The config json file corresponding to the pre-trained T5 model. \nThis specifies the model architecture." + ), ) parser.add_argument( "--pytorch_dump_path", default=None, type=str, required=True, help="Path to the output PyTorch model." diff --git a/src/transformers/models/t5/modeling_flax_t5.py b/src/transformers/models/t5/modeling_flax_t5.py index 767caea3eb..a6e1da70bb 100644 --- a/src/transformers/models/t5/modeling_flax_t5.py +++ b/src/transformers/models/t5/modeling_flax_t5.py @@ -977,7 +977,8 @@ class FlaxT5PreTrainedModel(FlaxPreTrainedModel): if decoder_input_ids is None: raise ValueError( - "Make sure to provide both `input_ids` and `decoder_input_ids`. `decoder_input_ids` is not passed here." + "Make sure to provide both `input_ids` and `decoder_input_ids`. `decoder_input_ids` is not passed" + " here." ) # prepare encoder inputs @@ -1243,7 +1244,7 @@ T5_START_DOCSTRING = r""" @add_start_docstrings( - "The bare T5 Model transformer outputting raw hidden-states" "without any specific head on top.", + "The bare T5 Model transformer outputting raw hidden-stateswithout any specific head on top.", T5_START_DOCSTRING, ) class FlaxT5Module(nn.Module): diff --git a/src/transformers/models/t5/modeling_t5.py b/src/transformers/models/t5/modeling_t5.py index 630e9dd17a..bcd4837867 100644 --- a/src/transformers/models/t5/modeling_t5.py +++ b/src/transformers/models/t5/modeling_t5.py @@ -809,9 +809,10 @@ class T5PreTrainedModel(PreTrainedModel): decoder_start_token_id = self.config.decoder_start_token_id pad_token_id = self.config.pad_token_id - assert ( - decoder_start_token_id is not None - ), "self.model.config.decoder_start_token_id has to be defined. In T5 it is usually set to the pad_token_id. See T5 docs for more information" + assert decoder_start_token_id is not None, ( + "self.model.config.decoder_start_token_id has to be defined. In T5 it is usually set to the pad_token_id." + " See T5 docs for more information" + ) # shift inputs to the right if is_torch_fx_proxy(input_ids): diff --git a/src/transformers/models/t5/modeling_tf_t5.py b/src/transformers/models/t5/modeling_tf_t5.py index 3434a6ea4f..e7bae23c87 100644 --- a/src/transformers/models/t5/modeling_tf_t5.py +++ b/src/transformers/models/t5/modeling_tf_t5.py @@ -406,7 +406,10 @@ class TFT5Attention(tf.keras.layers.Layer): tf.debugging.assert_equal( shape_list(layer_head_mask), [self.n_heads], - message=f"Head mask for a single layer should be of size {(self.n_heads)}, but is {shape_list(layer_head_mask)}", + message=( + f"Head mask for a single layer should be of size {(self.n_heads)}, but is" + f" {shape_list(layer_head_mask)}" + ), ) weights = tf.reshape(layer_head_mask, (1, -1, 1, 1)) * weights @@ -899,9 +902,10 @@ class TFT5PreTrainedModel(TFPreTrainedModel): decoder_start_token_id = self.config.decoder_start_token_id pad_token_id = self.config.pad_token_id - assert ( - decoder_start_token_id is not None - ), "self.model.config.decoder_start_token_id has to be defined. In TF T5 it is usually set to the pad_token_id. See T5 docs for more information" + assert decoder_start_token_id is not None, ( + "self.model.config.decoder_start_token_id has to be defined. In TF T5 it is usually set to the" + " pad_token_id. See T5 docs for more information" + ) start_tokens = tf.fill((shape_list(input_ids)[0], 1), decoder_start_token_id) start_tokens = tf.cast(start_tokens, input_ids.dtype) # Ensure compatible dtypes for concatenation @@ -1102,7 +1106,7 @@ num_heads))`. @add_start_docstrings( - "The bare T5 Model transformer outputting raw hidden-states" "without any specific head on top.", + "The bare T5 Model transformer outputting raw hidden-stateswithout any specific head on top.", T5_START_DOCSTRING, ) class TFT5Model(TFT5PreTrainedModel): @@ -1590,7 +1594,7 @@ class TFT5ForConditionalGeneration(TFT5PreTrainedModel, TFCausalLanguageModeling @add_start_docstrings( - "The bare T5 Model transformer outputting encoder's raw hidden-states" "without any specific head on top.", + "The bare T5 Model transformer outputting encoder's raw hidden-stateswithout any specific head on top.", T5_START_DOCSTRING, ) class TFT5EncoderModel(TFT5PreTrainedModel): diff --git a/src/transformers/models/t5/tokenization_t5.py b/src/transformers/models/t5/tokenization_t5.py index 09414ae407..2dbc788374 100644 --- a/src/transformers/models/t5/tokenization_t5.py +++ b/src/transformers/models/t5/tokenization_t5.py @@ -131,8 +131,9 @@ class T5Tokenizer(PreTrainedTokenizer): extra_tokens = len(set(filter(lambda x: bool("extra_id" in str(x)), additional_special_tokens))) if extra_tokens != extra_ids: raise ValueError( - f"Both extra_ids ({extra_ids}) and additional_special_tokens ({additional_special_tokens}) are provided to T5Tokenizer. " - "In this case the additional_special_tokens must include the extra_ids tokens" + f"Both extra_ids ({extra_ids}) and additional_special_tokens ({additional_special_tokens}) are" + " provided to T5Tokenizer. In this case the additional_special_tokens must include the extra_ids" + " tokens" ) self.sp_model_kwargs = {} if sp_model_kwargs is None else sp_model_kwargs @@ -161,11 +162,15 @@ class T5Tokenizer(PreTrainedTokenizer): return init_max_model_length elif init_max_model_length is None: warnings.warn( - f"This tokenizer was incorrectly instantiated with a model max length of {deprecated_max_model_length} which will be corrected in Transformers v5.\n" - f"For now, this behavior is kept to avoid breaking backwards compatibility when padding/encoding with `truncation is True`.\n" - f"- Be aware that you SHOULD NOT rely on {pretrained_model_name_or_path} automatically truncating your input to {deprecated_max_model_length} when padding/encoding.\n" - f"- If you want to encode/pad to sequences longer than {deprecated_max_model_length} you can either instantiate this tokenizer with `model_max_length` or pass `max_length` when encoding/padding.\n" - f"- To avoid this warning, please instantiate this tokenizer with `model_max_length` set to your preferred value.", + "This tokenizer was incorrectly instantiated with a model max length of" + f" {deprecated_max_model_length} which will be corrected in Transformers v5.\nFor now, this" + " behavior is kept to avoid breaking backwards compatibility when padding/encoding with" + " `truncation is True`.\n- Be aware that you SHOULD NOT rely on" + f" {pretrained_model_name_or_path} automatically truncating your input to" + f" {deprecated_max_model_length} when padding/encoding.\n- If you want to encode/pad to sequences" + f" longer than {deprecated_max_model_length} you can either instantiate this tokenizer with" + " `model_max_length` or pass `max_length` when encoding/padding.\n- To avoid this warning, please" + " instantiate this tokenizer with `model_max_length` set to your preferred value.", FutureWarning, ) @@ -212,7 +217,8 @@ class T5Tokenizer(PreTrainedTokenizer): """Do not add eos again if user already added it.""" if len(token_ids) > 0 and token_ids[-1] == self.eos_token_id: warnings.warn( - f"This sequence already has {self.eos_token}. In future versions this behavior may lead to duplicated eos tokens being added." + f"This sequence already has {self.eos_token}. In future versions this behavior may lead to duplicated" + " eos tokens being added." ) return token_ids else: diff --git a/src/transformers/models/t5/tokenization_t5_fast.py b/src/transformers/models/t5/tokenization_t5_fast.py index 77a86810b3..41ad306b74 100644 --- a/src/transformers/models/t5/tokenization_t5_fast.py +++ b/src/transformers/models/t5/tokenization_t5_fast.py @@ -126,8 +126,9 @@ class T5TokenizerFast(PreTrainedTokenizerFast): extra_tokens = len(set(filter(lambda x: bool("extra_id_" in str(x)), additional_special_tokens))) if extra_tokens != extra_ids: raise ValueError( - f"Both extra_ids ({extra_ids}) and additional_special_tokens ({additional_special_tokens}) are provided to T5Tokenizer. " - "In this case the additional_special_tokens must include the extra_ids tokens" + f"Both extra_ids ({extra_ids}) and additional_special_tokens ({additional_special_tokens}) are" + " provided to T5Tokenizer. In this case the additional_special_tokens must include the extra_ids" + " tokens" ) super().__init__( @@ -153,11 +154,15 @@ class T5TokenizerFast(PreTrainedTokenizerFast): return init_max_model_length elif init_max_model_length is None: warnings.warn( - f"This tokenizer was incorrectly instantiated with a model max length of {deprecated_max_model_length} which will be corrected in Transformers v5.\n" - f"For now, this behavior is kept to avoid breaking backwards compatibility when padding/encoding with `truncation is True`.\n" - f"- Be aware that you SHOULD NOT rely on {pretrained_model_name_or_path} automatically truncating your input to {deprecated_max_model_length} when padding/encoding.\n" - f"- If you want to encode/pad to sequences longer than {deprecated_max_model_length} you can either instantiate this tokenizer with `model_max_length` or pass `max_length` when encoding/padding.\n" - f"- To avoid this warning, please instantiate this tokenizer with `model_max_length` set to your preferred value.", + "This tokenizer was incorrectly instantiated with a model max length of" + f" {deprecated_max_model_length} which will be corrected in Transformers v5.\nFor now, this" + " behavior is kept to avoid breaking backwards compatibility when padding/encoding with" + " `truncation is True`.\n- Be aware that you SHOULD NOT rely on" + f" {pretrained_model_name_or_path} automatically truncating your input to" + f" {deprecated_max_model_length} when padding/encoding.\n- If you want to encode/pad to sequences" + f" longer than {deprecated_max_model_length} you can either instantiate this tokenizer with" + " `model_max_length` or pass `max_length` when encoding/padding.\n- To avoid this warning, please" + " instantiate this tokenizer with `model_max_length` set to your preferred value.", FutureWarning, ) diff --git a/src/transformers/models/tapas/configuration_tapas.py b/src/transformers/models/tapas/configuration_tapas.py index 58fb0c66b7..71fd5715ef 100644 --- a/src/transformers/models/tapas/configuration_tapas.py +++ b/src/transformers/models/tapas/configuration_tapas.py @@ -27,10 +27,18 @@ from ...configuration_utils import PretrainedConfig TAPAS_PRETRAINED_CONFIG_ARCHIVE_MAP = { - "google/tapas-base-finetuned-sqa": "https://huggingface.co/google/tapas-base-finetuned-sqa/resolve/main/config.json", - "google/tapas-base-finetuned-wtq": "https://huggingface.co/google/tapas-base-finetuned-wtq/resolve/main/config.json", - "google/tapas-base-finetuned-wikisql-supervised": "https://huggingface.co/google/tapas-base-finetuned-wikisql-supervised/resolve/main/config.json", - "google/tapas-base-finetuned-tabfact": "https://huggingface.co/google/tapas-base-finetuned-tabfact/resolve/main/config.json", + "google/tapas-base-finetuned-sqa": ( + "https://huggingface.co/google/tapas-base-finetuned-sqa/resolve/main/config.json" + ), + "google/tapas-base-finetuned-wtq": ( + "https://huggingface.co/google/tapas-base-finetuned-wtq/resolve/main/config.json" + ), + "google/tapas-base-finetuned-wikisql-supervised": ( + "https://huggingface.co/google/tapas-base-finetuned-wikisql-supervised/resolve/main/config.json" + ), + "google/tapas-base-finetuned-tabfact": ( + "https://huggingface.co/google/tapas-base-finetuned-tabfact/resolve/main/config.json" + ), } diff --git a/src/transformers/models/tapas/convert_tapas_original_tf_checkpoint_to_pytorch.py b/src/transformers/models/tapas/convert_tapas_original_tf_checkpoint_to_pytorch.py index 88edacacfd..2772a7f126 100644 --- a/src/transformers/models/tapas/convert_tapas_original_tf_checkpoint_to_pytorch.py +++ b/src/transformers/models/tapas/convert_tapas_original_tf_checkpoint_to_pytorch.py @@ -120,8 +120,10 @@ if __name__ == "__main__": default=None, type=str, required=True, - help="The config json file corresponding to the pre-trained TAPAS model. \n" - "This specifies the model architecture.", + help=( + "The config json file corresponding to the pre-trained TAPAS model. \n" + "This specifies the model architecture." + ), ) parser.add_argument( "--pytorch_dump_path", default=None, type=str, required=True, help="Path to the output PyTorch model." diff --git a/src/transformers/models/tapas/modeling_tapas.py b/src/transformers/models/tapas/modeling_tapas.py index b0c3786ca0..0b65e84ca7 100644 --- a/src/transformers/models/tapas/modeling_tapas.py +++ b/src/transformers/models/tapas/modeling_tapas.py @@ -582,7 +582,8 @@ class TapasLayer(nn.Module): if self.is_decoder and encoder_hidden_states is not None: if not hasattr(self, "crossattention"): raise ValueError( - f"If `encoder_hidden_states` are passed, {self} has to be instantiated with cross-attention layers by setting `config.add_cross_attention=True`" + f"If `encoder_hidden_states` are passed, {self} has to be instantiated with cross-attention layers" + " by setting `config.add_cross_attention=True`" ) # cross_attn cached key/values tuple is at positions 3,4 of past_key_value tuple @@ -1430,7 +1431,8 @@ class TapasForQuestionAnswering(TapasPreTrainedModel): per_example_additional_loss *= large_answer_loss_mask else: raise ValueError( - "You have to specify numeric values and numeric values scale in order to calculate the regression loss" + "You have to specify numeric values and numeric values scale in order to calculate the" + " regression loss" ) total_loss += torch.mean(per_example_additional_loss) diff --git a/src/transformers/models/tapas/modeling_tf_tapas.py b/src/transformers/models/tapas/modeling_tf_tapas.py index 29cb63c3ad..1875cc8009 100644 --- a/src/transformers/models/tapas/modeling_tf_tapas.py +++ b/src/transformers/models/tapas/modeling_tf_tapas.py @@ -519,8 +519,8 @@ class TFTapasLayer(tf.keras.layers.Layer): if self.is_decoder and encoder_hidden_states is not None: if not hasattr(self, "crossattention"): raise ValueError( - f"If `encoder_hidden_states` are passed, {self} has to be instantiated with cross-attention layers " - "by setting `config.add_cross_attention=True`" + f"If `encoder_hidden_states` are passed, {self} has to be instantiated with cross-attention layers" + " by setting `config.add_cross_attention=True`" ) # cross_attn cached key/values tuple is at positions 3,4 of past_key_value tuple @@ -1533,7 +1533,8 @@ class TFTapasForQuestionAnswering(TFTapasPreTrainedModel): per_example_additional_loss *= large_answer_loss_mask else: raise ValueError( - "You have to specify numeric values and numeric values scale in order to calculate the regression loss" + "You have to specify numeric values and numeric values scale in order to calculate the" + " regression loss" ) total_loss += tf.reduce_mean(per_example_additional_loss) @@ -1723,7 +1724,7 @@ class ProductIndexMap(IndexMap): inner_index: IndexMap, must have the same shape as `outer_index`. """ if outer_index.batch_dims != inner_index.batch_dims: - raise ValueError("outer_index.batch_dims and inner_index.batch_dims " "must be the same.") + raise ValueError("outer_index.batch_dims and inner_index.batch_dims must be the same.") super(ProductIndexMap, self).__init__( indices=( diff --git a/src/transformers/models/tapas/tokenization_tapas.py b/src/transformers/models/tapas/tokenization_tapas.py index 27481c35fb..ddb855642f 100644 --- a/src/transformers/models/tapas/tokenization_tapas.py +++ b/src/transformers/models/tapas/tokenization_tapas.py @@ -50,35 +50,83 @@ VOCAB_FILES_NAMES = {"vocab_file": "vocab.txt"} PRETRAINED_VOCAB_FILES_MAP = { "vocab_file": { # large models - "google/tapas-large-finetuned-sqa": "https://huggingface.co/google/tapas-large-finetuned-sqa/resolve/main/vocab.txt", - "google/tapas-large-finetuned-wtq": "https://huggingface.co/google/tapas-large-finetuned-wtq/resolve/main/vocab.txt", - "google/tapas-large-finetuned-wikisql-supervised": "https://huggingface.co/google/tapas-large-finetuned-wikisql-supervised/resolve/main/vocab.txt", - "google/tapas-large-finetuned-tabfact": "https://huggingface.co/google/tapas-large-finetuned-tabfact/resolve/main/vocab.txt", + "google/tapas-large-finetuned-sqa": ( + "https://huggingface.co/google/tapas-large-finetuned-sqa/resolve/main/vocab.txt" + ), + "google/tapas-large-finetuned-wtq": ( + "https://huggingface.co/google/tapas-large-finetuned-wtq/resolve/main/vocab.txt" + ), + "google/tapas-large-finetuned-wikisql-supervised": ( + "https://huggingface.co/google/tapas-large-finetuned-wikisql-supervised/resolve/main/vocab.txt" + ), + "google/tapas-large-finetuned-tabfact": ( + "https://huggingface.co/google/tapas-large-finetuned-tabfact/resolve/main/vocab.txt" + ), # base models - "google/tapas-base-finetuned-sqa": "https://huggingface.co/google/tapas-base-finetuned-sqa/resolve/main/vocab.txt", - "google/tapas-base-finetuned-wtq": "https://huggingface.co/google/tapas-base-finetuned-wtq/resolve/main/vocab.txt", - "google/tapas-base-finetuned-wikisql-supervised": "https://huggingface.co/google/tapas-base-finetuned-wikisql-supervised/resolve/main/vocab.txt", - "google/tapas-base-finetuned-tabfact": "https://huggingface.co/google/tapas-base-finetuned-tabfact/resolve/main/vocab.txt", + "google/tapas-base-finetuned-sqa": ( + "https://huggingface.co/google/tapas-base-finetuned-sqa/resolve/main/vocab.txt" + ), + "google/tapas-base-finetuned-wtq": ( + "https://huggingface.co/google/tapas-base-finetuned-wtq/resolve/main/vocab.txt" + ), + "google/tapas-base-finetuned-wikisql-supervised": ( + "https://huggingface.co/google/tapas-base-finetuned-wikisql-supervised/resolve/main/vocab.txt" + ), + "google/tapas-base-finetuned-tabfact": ( + "https://huggingface.co/google/tapas-base-finetuned-tabfact/resolve/main/vocab.txt" + ), # medium models - "google/tapas-medium-finetuned-sqa": "https://huggingface.co/google/tapas-medium-finetuned-sqa/resolve/main/vocab.txt", - "google/tapas-medium-finetuned-wtq": "https://huggingface.co/google/tapas-medium-finetuned-wtq/resolve/main/vocab.txt", - "google/tapas-medium-finetuned-wikisql-supervised": "https://huggingface.co/google/tapas-medium-finetuned-wikisql-supervised/resolve/main/vocab.txt", - "google/tapas-medium-finetuned-tabfact": "https://huggingface.co/google/tapas-medium-finetuned-tabfact/resolve/main/vocab.txt", + "google/tapas-medium-finetuned-sqa": ( + "https://huggingface.co/google/tapas-medium-finetuned-sqa/resolve/main/vocab.txt" + ), + "google/tapas-medium-finetuned-wtq": ( + "https://huggingface.co/google/tapas-medium-finetuned-wtq/resolve/main/vocab.txt" + ), + "google/tapas-medium-finetuned-wikisql-supervised": ( + "https://huggingface.co/google/tapas-medium-finetuned-wikisql-supervised/resolve/main/vocab.txt" + ), + "google/tapas-medium-finetuned-tabfact": ( + "https://huggingface.co/google/tapas-medium-finetuned-tabfact/resolve/main/vocab.txt" + ), # small models - "google/tapas-small-finetuned-sqa": "https://huggingface.co/google/tapas-small-finetuned-sqa/resolve/main/vocab.txt", - "google/tapas-small-finetuned-wtq": "https://huggingface.co/google/tapas-small-finetuned-wtq/resolve/main/vocab.txt", - "google/tapas-small-finetuned-wikisql-supervised": "https://huggingface.co/google/tapas-small-finetuned-wikisql-supervised/resolve/main/vocab.txt", - "google/tapas-small-finetuned-tabfact": "https://huggingface.co/google/tapas-small-finetuned-tabfact/resolve/main/vocab.txt", + "google/tapas-small-finetuned-sqa": ( + "https://huggingface.co/google/tapas-small-finetuned-sqa/resolve/main/vocab.txt" + ), + "google/tapas-small-finetuned-wtq": ( + "https://huggingface.co/google/tapas-small-finetuned-wtq/resolve/main/vocab.txt" + ), + "google/tapas-small-finetuned-wikisql-supervised": ( + "https://huggingface.co/google/tapas-small-finetuned-wikisql-supervised/resolve/main/vocab.txt" + ), + "google/tapas-small-finetuned-tabfact": ( + "https://huggingface.co/google/tapas-small-finetuned-tabfact/resolve/main/vocab.txt" + ), # tiny models - "google/tapas-tiny-finetuned-sqa": "https://huggingface.co/google/tapas-tiny-finetuned-sqa/resolve/main/vocab.txt", - "google/tapas-tiny-finetuned-wtq": "https://huggingface.co/google/tapas-tiny-finetuned-wtq/resolve/main/vocab.txt", - "google/tapas-tiny-finetuned-wikisql-supervised": "https://huggingface.co/google/tapas-tiny-finetuned-wikisql-supervised/resolve/main/vocab.txt", - "google/tapas-tiny-finetuned-tabfact": "https://huggingface.co/google/tapas-tiny-finetuned-tabfact/resolve/main/vocab.txt", + "google/tapas-tiny-finetuned-sqa": ( + "https://huggingface.co/google/tapas-tiny-finetuned-sqa/resolve/main/vocab.txt" + ), + "google/tapas-tiny-finetuned-wtq": ( + "https://huggingface.co/google/tapas-tiny-finetuned-wtq/resolve/main/vocab.txt" + ), + "google/tapas-tiny-finetuned-wikisql-supervised": ( + "https://huggingface.co/google/tapas-tiny-finetuned-wikisql-supervised/resolve/main/vocab.txt" + ), + "google/tapas-tiny-finetuned-tabfact": ( + "https://huggingface.co/google/tapas-tiny-finetuned-tabfact/resolve/main/vocab.txt" + ), # mini models - "google/tapas-mini-finetuned-sqa": "https://huggingface.co/google/tapas-mini-finetuned-sqa/resolve/main/vocab.txt", - "google/tapas-mini-finetuned-wtq": "https://huggingface.co/google/tapas-mini-finetuned-wtq/resolve/main/vocab.txt", - "google/tapas-mini-finetuned-wikisql-supervised": "https://huggingface.co/google/tapas-mini-finetuned-wikisql-supervised/resolve/main/vocab.txt", - "google/tapas-mini-finetuned-tabfact": "https://huggingface.co/google/tapas-mini-finetuned-tabfact/resolve/main/vocab.txt", + "google/tapas-mini-finetuned-sqa": ( + "https://huggingface.co/google/tapas-mini-finetuned-sqa/resolve/main/vocab.txt" + ), + "google/tapas-mini-finetuned-wtq": ( + "https://huggingface.co/google/tapas-mini-finetuned-wtq/resolve/main/vocab.txt" + ), + "google/tapas-mini-finetuned-wikisql-supervised": ( + "https://huggingface.co/google/tapas-mini-finetuned-wikisql-supervised/resolve/main/vocab.txt" + ), + "google/tapas-mini-finetuned-tabfact": ( + "https://huggingface.co/google/tapas-mini-finetuned-tabfact/resolve/main/vocab.txt" + ), } } @@ -329,8 +377,8 @@ class TapasTokenizer(PreTrainedTokenizer): if not os.path.isfile(vocab_file): raise ValueError( - f"Can't find a vocabulary file at path '{vocab_file}'. To load the vocabulary from a Google pretrained " - "model use `tokenizer = BertTokenizer.from_pretrained(PRETRAINED_MODEL_NAME)`" + f"Can't find a vocabulary file at path '{vocab_file}'. To load the vocabulary from a Google pretrained" + " model use `tokenizer = BertTokenizer.from_pretrained(PRETRAINED_MODEL_NAME)`" ) self.vocab = load_vocab(vocab_file) self.ids_to_tokens = collections.OrderedDict([(ids, tok) for tok, ids in self.vocab.items()]) @@ -594,7 +642,8 @@ class TapasTokenizer(PreTrainedTokenizer): if not valid_query: raise ValueError( - "queries input must of type `str` (single example), `List[str]` (batch or single pretokenized example). " + "queries input must of type `str` (single example), `List[str]` (batch or single pretokenized" + " example). " ) is_batched = isinstance(queries, (list, tuple)) @@ -1229,7 +1278,7 @@ class TapasTokenizer(PreTrainedTokenizer): if max_length is None and len(encoded_inputs["input_ids"]) > self.model_max_length and verbose: if not self.deprecation_warnings.get("sequence-length-is-longer-than-the-specified-maximum", False): logger.warning( - f"Token indices sequence length is longer than the specified maximum sequence length " + "Token indices sequence length is longer than the specified maximum sequence length " f"for this model ({len(encoded_inputs['input_ids'])} > {self.model_max_length}). Running this " "sequence through the model will result in indexing errors." ) diff --git a/src/transformers/models/transfo_xl/convert_transfo_xl_original_tf_checkpoint_to_pytorch.py b/src/transformers/models/transfo_xl/convert_transfo_xl_original_tf_checkpoint_to_pytorch.py index abde04bd43..646c8a2342 100755 --- a/src/transformers/models/transfo_xl/convert_transfo_xl_original_tf_checkpoint_to_pytorch.py +++ b/src/transformers/models/transfo_xl/convert_transfo_xl_original_tf_checkpoint_to_pytorch.py @@ -101,8 +101,10 @@ if __name__ == "__main__": "--transfo_xl_config_file", default="", type=str, - help="An optional config json file corresponding to the pre-trained BERT model. \n" - "This specifies the model architecture.", + help=( + "An optional config json file corresponding to the pre-trained BERT model. \n" + "This specifies the model architecture." + ), ) parser.add_argument( "--transfo_xl_dataset_file", diff --git a/src/transformers/models/transfo_xl/modeling_tf_transfo_xl.py b/src/transformers/models/transfo_xl/modeling_tf_transfo_xl.py index 2975373883..66467350f1 100644 --- a/src/transformers/models/transfo_xl/modeling_tf_transfo_xl.py +++ b/src/transformers/models/transfo_xl/modeling_tf_transfo_xl.py @@ -935,9 +935,10 @@ class TFTransfoXLLMHeadModel(TFTransfoXLPreTrainedModel): super().__init__(config) self.transformer = TFTransfoXLMainLayer(config, name="transformer") self.sample_softmax = config.sample_softmax - assert ( - self.sample_softmax <= 0 - ), "Sampling from the softmax is not implemented yet. Please look at issue: #3310: https://github.com/huggingface/transformers/issues/3310" + assert self.sample_softmax <= 0, ( + "Sampling from the softmax is not implemented yet. Please look at issue: #3310:" + " https://github.com/huggingface/transformers/issues/3310" + ) self.crit = TFAdaptiveSoftmaxMask( config.vocab_size, config.d_embed, config.d_model, config.cutoffs, div_val=config.div_val, name="crit" @@ -1126,7 +1127,7 @@ class TFTransfoXLForSequenceClassification(TFTransfoXLPreTrainedModel, TFSequenc sequence_lengths = -1 logger.warning( f"{self.__class__.__name__} will not detect padding tokens in `inputs_embeds`. Results may be " - f"unexpected if using padding tokens in conjunction with `inputs_embeds.`" + "unexpected if using padding tokens in conjunction with `inputs_embeds.`" ) loss = None diff --git a/src/transformers/models/transfo_xl/modeling_transfo_xl.py b/src/transformers/models/transfo_xl/modeling_transfo_xl.py index 556525cbf6..1f8f40e455 100644 --- a/src/transformers/models/transfo_xl/modeling_transfo_xl.py +++ b/src/transformers/models/transfo_xl/modeling_transfo_xl.py @@ -1020,13 +1020,15 @@ class TransfoXLLMHeadModel(TransfoXLPreTrainedModel): if not self.trainer_compatible: warnings.warn( "The output of TransfoXL will be updated in v5 to support a single loss as first argument. In order" - "to use that updated output, please specify `trainer_compatible=True` as your configuration attribute.", + "to use that updated output, please specify `trainer_compatible=True` as your configuration" + " attribute.", DeprecationWarning, ) - assert ( - self.sample_softmax <= 0 - ), "Sampling from the softmax is not implemented yet. Please look at issue: #3310: https://github.com/huggingface/transformers/issues/3310" + assert self.sample_softmax <= 0, ( + "Sampling from the softmax is not implemented yet. Please look at issue: #3310:" + " https://github.com/huggingface/transformers/issues/3310" + ) self.crit = ProjectedAdaptiveLogSoftmax( config.vocab_size, config.d_embed, config.d_model, config.cutoffs, div_val=config.div_val @@ -1261,7 +1263,7 @@ class TransfoXLForSequenceClassification(TransfoXLPreTrainedModel): sequence_lengths = -1 logger.warning( f"{self.__class__.__name__} will not detect padding tokens in `inputs_embeds`. Results may be " - f"unexpected if using padding tokens in conjunction with `inputs_embeds.`" + "unexpected if using padding tokens in conjunction with `inputs_embeds.`" ) pooled_logits = logits[range(batch_size), sequence_lengths] diff --git a/src/transformers/models/transfo_xl/modeling_transfo_xl_utilities.py b/src/transformers/models/transfo_xl/modeling_transfo_xl_utilities.py index b25dc2d707..e25ba2cd47 100644 --- a/src/transformers/models/transfo_xl/modeling_transfo_xl_utilities.py +++ b/src/transformers/models/transfo_xl/modeling_transfo_xl_utilities.py @@ -102,7 +102,7 @@ class ProjectedAdaptiveLogSoftmax(nn.Module): hidden = hidden.view(-1, hidden.size(-1)) labels = labels.view(-1) if hidden.size(0) != labels.size(0): - raise RuntimeError("Input and labels should have the same size " "in the batch dimension.") + raise RuntimeError("Input and labels should have the same size in the batch dimension.") else: hidden = hidden.view(-1, hidden.size(-1)) diff --git a/src/transformers/models/transfo_xl/tokenization_transfo_xl.py b/src/transformers/models/transfo_xl/tokenization_transfo_xl.py index 115cd4fdcf..cc72925bb0 100644 --- a/src/transformers/models/transfo_xl/tokenization_transfo_xl.py +++ b/src/transformers/models/transfo_xl/tokenization_transfo_xl.py @@ -680,10 +680,9 @@ class TransfoXLCorpus(object): resolved_corpus_file = cached_path(corpus_file, cache_dir=cache_dir) except EnvironmentError: logger.error( - f"Corpus '{pretrained_model_name_or_path}' was not found in corpus list " - f"({', '.join(PRETRAINED_CORPUS_ARCHIVE_MAP.keys())}. " - f"We assumed '{pretrained_model_name_or_path}' was a path or url but couldn't find files {corpus_file} " - "at this path or url." + f"Corpus '{pretrained_model_name_or_path}' was not found in corpus list" + f" ({', '.join(PRETRAINED_CORPUS_ARCHIVE_MAP.keys())}. We assumed '{pretrained_model_name_or_path}'" + f" was a path or url but couldn't find files {corpus_file} at this path or url." ) return None if resolved_corpus_file == corpus_file: diff --git a/src/transformers/models/trocr/configuration_trocr.py b/src/transformers/models/trocr/configuration_trocr.py index fc878da26d..a635e6b9b0 100644 --- a/src/transformers/models/trocr/configuration_trocr.py +++ b/src/transformers/models/trocr/configuration_trocr.py @@ -21,7 +21,9 @@ from ...utils import logging logger = logging.get_logger(__name__) TROCR_PRETRAINED_CONFIG_ARCHIVE_MAP = { - "microsoft/trocr-base-handwritten": "https://huggingface.co/microsoft/trocr-base-handwritten/resolve/main/config.json", + "microsoft/trocr-base-handwritten": ( + "https://huggingface.co/microsoft/trocr-base-handwritten/resolve/main/config.json" + ), # See all TrOCR models at https://huggingface.co/models?filter=trocr } diff --git a/src/transformers/models/trocr/modeling_trocr.py b/src/transformers/models/trocr/modeling_trocr.py index 75e015f988..52e4801832 100644 --- a/src/transformers/models/trocr/modeling_trocr.py +++ b/src/transformers/models/trocr/modeling_trocr.py @@ -182,7 +182,8 @@ class TrOCRAttention(nn.Module): self.head_dim = embed_dim // num_heads if not (self.head_dim * num_heads == self.embed_dim): raise ValueError( - f"embed_dim must be divisible by num_heads (got `embed_dim`: {self.embed_dim} and `num_heads`: {num_heads})." + f"embed_dim must be divisible by num_heads (got `embed_dim`: {self.embed_dim} and `num_heads`:" + f" {num_heads})." ) self.scaling = self.head_dim**-0.5 self.is_decoder = is_decoder @@ -254,7 +255,8 @@ class TrOCRAttention(nn.Module): if attn_weights.size() != (bsz * self.num_heads, tgt_len, src_len): raise ValueError( - f"Attention weights should be of size {(bsz * self.num_heads, tgt_len, src_len)}, but is {attn_weights.size()}" + f"Attention weights should be of size {(bsz * self.num_heads, tgt_len, src_len)}, but is" + f" {attn_weights.size()}" ) if attention_mask is not None: @@ -270,7 +272,8 @@ class TrOCRAttention(nn.Module): if layer_head_mask is not None: if layer_head_mask.size() != (self.num_heads,): raise ValueError( - f"Head mask for a single layer should be of size {(self.num_heads,)}, but is {layer_head_mask.size()}" + f"Head mask for a single layer should be of size {(self.num_heads,)}, but is" + f" {layer_head_mask.size()}" ) attn_weights = layer_head_mask.view(1, -1, 1, 1) * attn_weights.view(bsz, self.num_heads, tgt_len, src_len) attn_weights = attn_weights.view(bsz * self.num_heads, tgt_len, src_len) @@ -291,7 +294,8 @@ class TrOCRAttention(nn.Module): if attn_output.size() != (bsz * self.num_heads, tgt_len, self.head_dim): raise ValueError( - f"`attn_output` should be of size {(bsz, self.num_heads, tgt_len, self.head_dim)}, but is {attn_output.size()}" + f"`attn_output` should be of size {(bsz, self.num_heads, tgt_len, self.head_dim)}, but is" + f" {attn_output.size()}" ) attn_output = attn_output.view(bsz, self.num_heads, tgt_len, self.head_dim) @@ -667,7 +671,8 @@ class TrOCRDecoder(TrOCRPreTrainedModel): if attn_mask is not None: if attn_mask.size()[0] != (len(self.layers)): raise ValueError( - f"The `{mask_name}` should be specified for {len(self.layers)} layers, but it is for {head_mask.size()[0]}." + f"The `{mask_name}` should be specified for {len(self.layers)} layers, but it is for" + f" {head_mask.size()[0]}." ) for idx, decoder_layer in enumerate(self.layers): # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description) @@ -683,7 +688,8 @@ class TrOCRDecoder(TrOCRPreTrainedModel): if use_cache: logger.warning( - "`use_cache = True` is incompatible with gradient checkpointing. Setting `use_cache = False`..." + "`use_cache = True` is incompatible with gradient checkpointing. Setting `use_cache =" + " False`..." ) use_cache = False @@ -769,7 +775,8 @@ class TrOCRDecoderWrapper(TrOCRPreTrainedModel): @add_start_docstrings( - "The TrOCR Decoder with a language modeling head. Can be used as the decoder part of [`EncoderDecoderModel`] and [`VisionEncoderDecoder`].", + "The TrOCR Decoder with a language modeling head. Can be used as the decoder part of [`EncoderDecoderModel`] and" + " [`VisionEncoderDecoder`].", TROCR_START_DOCSTRING, ) class TrOCRForCausalLM(TrOCRPreTrainedModel): diff --git a/src/transformers/models/unispeech/configuration_unispeech.py b/src/transformers/models/unispeech/configuration_unispeech.py index 85b9985920..a5358c68b2 100644 --- a/src/transformers/models/unispeech/configuration_unispeech.py +++ b/src/transformers/models/unispeech/configuration_unispeech.py @@ -24,7 +24,9 @@ from ...utils import logging logger = logging.get_logger(__name__) UNISPEECH_PRETRAINED_CONFIG_ARCHIVE_MAP = { - "microsoft/unispeech-large-1500h-cv": "https://huggingface.co/microsoft/unispeech-large-1500h-cv/resolve/main/config.json", + "microsoft/unispeech-large-1500h-cv": ( + "https://huggingface.co/microsoft/unispeech-large-1500h-cv/resolve/main/config.json" + ), # See all UniSpeech models at https://huggingface.co/models?filter=unispeech } @@ -261,10 +263,10 @@ class UniSpeechConfig(PretrainedConfig): or (len(self.conv_dim) != self.num_feat_extract_layers) ): raise ValueError( - "Configuration for convolutional layers is incorrect. " - "It is required that `len(config.conv_dim)` == `len(config.conv_stride)` == `len(config.conv_kernel)`, " - f"but is `len(config.conv_dim) = {len(self.conv_dim)}`, `len(config.conv_stride) " - f"= {len(self.conv_stride)}`, `len(config.conv_kernel) = {len(self.conv_kernel)}`." + "Configuration for convolutional layers is incorrect. It is required that `len(config.conv_dim)` ==" + " `len(config.conv_stride)` == `len(config.conv_kernel)`, but is `len(config.conv_dim) =" + f" {len(self.conv_dim)}`, `len(config.conv_stride) = {len(self.conv_stride)}`," + f" `len(config.conv_kernel) = {len(self.conv_kernel)}`." ) # fine-tuning config parameters for SpecAugment: https://arxiv.org/abs/1904.08779 diff --git a/src/transformers/models/unispeech/convert_unispeech_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/unispeech/convert_unispeech_original_pytorch_checkpoint_to_pytorch.py index 83f051627c..bf72930951 100644 --- a/src/transformers/models/unispeech/convert_unispeech_original_pytorch_checkpoint_to_pytorch.py +++ b/src/transformers/models/unispeech/convert_unispeech_original_pytorch_checkpoint_to_pytorch.py @@ -84,9 +84,10 @@ def set_recursively(hf_pointer, key, value, full_name, weight_type, is_finetuned else: hf_shape = hf_pointer.shape - assert ( - hf_shape == value.shape - ), f"Shape of hf {key + '.' + weight_type if weight_type is not None else ''} is {hf_shape}, but should be {value.shape} for {full_name}" + assert hf_shape == value.shape, ( + f"Shape of hf {key + '.' + weight_type if weight_type is not None else ''} is {hf_shape}, but should be" + f" {value.shape} for {full_name}" + ) if weight_type == "weight": hf_pointer.weight.data = value @@ -154,28 +155,32 @@ def load_conv_layer(full_name, value, feature_extractor, unused_weights, use_gro if type_id == 0: if "bias" in name: - assert ( - value.shape == feature_extractor.conv_layers[layer_id].conv.bias.data.shape - ), f"{full_name} has size {value.shape}, but {feature_extractor.conv_layers[layer_id].conv.bias.data.shape} was found." + assert value.shape == feature_extractor.conv_layers[layer_id].conv.bias.data.shape, ( + f"{full_name} has size {value.shape}, but" + f" {feature_extractor.conv_layers[layer_id].conv.bias.data.shape} was found." + ) feature_extractor.conv_layers[layer_id].conv.bias.data = value logger.info(f"Feat extract conv layer {layer_id} was initialized from {full_name}.") elif "weight" in name: - assert ( - value.shape == feature_extractor.conv_layers[layer_id].conv.weight.data.shape - ), f"{full_name} has size {value.shape}, but {feature_extractor.conv_layers[layer_id].conv.weight.data.shape} was found." + assert value.shape == feature_extractor.conv_layers[layer_id].conv.weight.data.shape, ( + f"{full_name} has size {value.shape}, but" + f" {feature_extractor.conv_layers[layer_id].conv.weight.data.shape} was found." + ) feature_extractor.conv_layers[layer_id].conv.weight.data = value logger.info(f"Feat extract conv layer {layer_id} was initialized from {full_name}.") elif (type_id == 2 and not use_group_norm) or (type_id == 2 and layer_id == 0 and use_group_norm): if "bias" in name: - assert ( - value.shape == feature_extractor.conv_layers[layer_id].layer_norm.bias.data.shape - ), f"{full_name} has size {value.shape}, but {feature_extractor[layer_id].layer_norm.bias.data.shape} was found." + assert value.shape == feature_extractor.conv_layers[layer_id].layer_norm.bias.data.shape, ( + f"{full_name} has size {value.shape}, but {feature_extractor[layer_id].layer_norm.bias.data.shape} was" + " found." + ) feature_extractor.conv_layers[layer_id].layer_norm.bias.data = value logger.info(f"Feat extract layer norm weight of layer {layer_id} was initialized from {full_name}.") elif "weight" in name: - assert ( - value.shape == feature_extractor.conv_layers[layer_id].layer_norm.weight.data.shape - ), f"{full_name} has size {value.shape}, but {feature_extractor[layer_id].layer_norm.weight.data.shape} was found." + assert value.shape == feature_extractor.conv_layers[layer_id].layer_norm.weight.data.shape, ( + f"{full_name} has size {value.shape}, but" + f" {feature_extractor[layer_id].layer_norm.weight.data.shape} was found." + ) feature_extractor.conv_layers[layer_id].layer_norm.weight.data = value logger.info(f"Feat extract layer norm weight of layer {layer_id} was initialized from {full_name}.") else: diff --git a/src/transformers/models/unispeech/modeling_unispeech.py b/src/transformers/models/unispeech/modeling_unispeech.py index 61359bf032..8bf43d8b43 100755 --- a/src/transformers/models/unispeech/modeling_unispeech.py +++ b/src/transformers/models/unispeech/modeling_unispeech.py @@ -554,7 +554,8 @@ class UniSpeechAttention(nn.Module): if attn_weights.size() != (bsz * self.num_heads, tgt_len, src_len): raise ValueError( - f"Attention weights should be of size {(bsz * self.num_heads, tgt_len, src_len)}, but is {attn_weights.size()}" + f"Attention weights should be of size {(bsz * self.num_heads, tgt_len, src_len)}, but is" + f" {attn_weights.size()}" ) if attention_mask is not None: @@ -570,7 +571,8 @@ class UniSpeechAttention(nn.Module): if layer_head_mask is not None: if layer_head_mask.size() != (self.num_heads,): raise ValueError( - f"Head mask for a single layer should be of size {(self.num_heads,)}, but is {layer_head_mask.size()}" + f"Head mask for a single layer should be of size {(self.num_heads,)}, but is" + f" {layer_head_mask.size()}" ) attn_weights = layer_head_mask.view(1, -1, 1, 1) * attn_weights.view(bsz, self.num_heads, tgt_len, src_len) attn_weights = attn_weights.view(bsz * self.num_heads, tgt_len, src_len) @@ -591,7 +593,8 @@ class UniSpeechAttention(nn.Module): if attn_output.size() != (bsz * self.num_heads, tgt_len, self.head_dim): raise ValueError( - f"`attn_output` should be of size {(bsz, self.num_heads, tgt_len, self.head_dim)}, but is {attn_output.size()}" + f"`attn_output` should be of size {(bsz, self.num_heads, tgt_len, self.head_dim)}, but is" + f" {attn_output.size()}" ) attn_output = attn_output.view(bsz, self.num_heads, tgt_len, self.head_dim) @@ -888,7 +891,8 @@ class UniSpeechGumbelVectorQuantizer(nn.Module): if config.codevector_dim % self.num_groups != 0: raise ValueError( - f"`config.codevector_dim {config.codevector_dim} must be divisible by `config.num_codevector_groups` {self.num_groups} for concatenation" + f"`config.codevector_dim {config.codevector_dim} must be divisible by `config.num_codevector_groups`" + f" {self.num_groups} for concatenation" ) # storage for codebook variables (codewords) diff --git a/src/transformers/models/unispeech_sat/configuration_unispeech_sat.py b/src/transformers/models/unispeech_sat/configuration_unispeech_sat.py index b88d9cf91f..4e24b0df03 100644 --- a/src/transformers/models/unispeech_sat/configuration_unispeech_sat.py +++ b/src/transformers/models/unispeech_sat/configuration_unispeech_sat.py @@ -24,7 +24,9 @@ from ...utils import logging logger = logging.get_logger(__name__) UNISPEECH_SAT_PRETRAINED_CONFIG_ARCHIVE_MAP = { - "microsoft/unispeech-sat-base-100h-libri-ft": "https://huggingface.co/microsoft/unispeech-sat-base-100h-libri-ft/resolve/main/config.json", + "microsoft/unispeech-sat-base-100h-libri-ft": ( + "https://huggingface.co/microsoft/unispeech-sat-base-100h-libri-ft/resolve/main/config.json" + ), # See all UniSpeechSat models at https://huggingface.co/models?filter=unispeech_sat } @@ -273,10 +275,10 @@ class UniSpeechSatConfig(PretrainedConfig): or (len(self.conv_dim) != self.num_feat_extract_layers) ): raise ValueError( - "Configuration for convolutional layers is incorrect. " - "It is required that `len(config.conv_dim)` == `len(config.conv_stride)` == `len(config.conv_kernel)`, " - f"but is `len(config.conv_dim) = {len(self.conv_dim)}`, `len(config.conv_stride) " - f"= {len(self.conv_stride)}`, `len(config.conv_kernel) = {len(self.conv_kernel)}`." + "Configuration for convolutional layers is incorrect. It is required that `len(config.conv_dim)` ==" + " `len(config.conv_stride)` == `len(config.conv_kernel)`, but is `len(config.conv_dim) =" + f" {len(self.conv_dim)}`, `len(config.conv_stride) = {len(self.conv_stride)}`," + f" `len(config.conv_kernel) = {len(self.conv_kernel)}`." ) # fine-tuning config parameters for SpecAugment: https://arxiv.org/abs/1904.08779 diff --git a/src/transformers/models/unispeech_sat/convert_unispeech_sat_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/unispeech_sat/convert_unispeech_sat_original_pytorch_checkpoint_to_pytorch.py index 78a541d7ed..93750b64cc 100644 --- a/src/transformers/models/unispeech_sat/convert_unispeech_sat_original_pytorch_checkpoint_to_pytorch.py +++ b/src/transformers/models/unispeech_sat/convert_unispeech_sat_original_pytorch_checkpoint_to_pytorch.py @@ -72,7 +72,8 @@ def set_recursively(hf_pointer, key, value, full_name, weight_type): if hf_shape != value.shape: raise ValueError( - f"Shape of hf {key + '.' + weight_type if weight_type is not None else ''} is {hf_shape}, but should be {value.shape} for {full_name}" + f"Shape of hf {key + '.' + weight_type if weight_type is not None else ''} is {hf_shape}, but should be" + f" {value.shape} for {full_name}" ) if weight_type == "weight": @@ -146,14 +147,16 @@ def load_conv_layer(full_name, value, feature_extractor, unused_weights, use_gro if "bias" in name: if value.shape != feature_extractor.conv_layers[layer_id].conv.bias.data.shape: raise ValueError( - f"{full_name} has size {value.shape}, but {feature_extractor.conv_layers[layer_id].conv.bias.data.shape} was found." + f"{full_name} has size {value.shape}, but" + f" {feature_extractor.conv_layers[layer_id].conv.bias.data.shape} was found." ) feature_extractor.conv_layers[layer_id].conv.bias.data = value logger.info(f"Feat extract conv layer {layer_id} was initialized from {full_name}.") elif "weight" in name: if value.shape != feature_extractor.conv_layers[layer_id].conv.weight.data.shape: raise ValueError( - f"{full_name} has size {value.shape}, but {feature_extractor.conv_layers[layer_id].conv.weight.data.shape} was found." + f"{full_name} has size {value.shape}, but" + f" {feature_extractor.conv_layers[layer_id].conv.weight.data.shape} was found." ) feature_extractor.conv_layers[layer_id].conv.weight.data = value logger.info(f"Feat extract conv layer {layer_id} was initialized from {full_name}.") @@ -161,14 +164,16 @@ def load_conv_layer(full_name, value, feature_extractor, unused_weights, use_gro if "bias" in name: if value.shape != feature_extractor.conv_layers[layer_id].layer_norm.bias.data.shape: raise ValueError( - f"{full_name} has size {value.shape}, but {feature_extractor[layer_id].layer_norm.bias.data.shape} was found." + f"{full_name} has size {value.shape}, but" + f" {feature_extractor[layer_id].layer_norm.bias.data.shape} was found." ) feature_extractor.conv_layers[layer_id].layer_norm.bias.data = value logger.info(f"Feat extract layer norm weight of layer {layer_id} was initialized from {full_name}.") elif "weight" in name: if value.shape != feature_extractor.conv_layers[layer_id].layer_norm.weight.data.shape: raise ValueError( - f"{full_name} has size {value.shape}, but {feature_extractor[layer_id].layer_norm.weight.data.shape} was found." + f"{full_name} has size {value.shape}, but" + f" {feature_extractor[layer_id].layer_norm.weight.data.shape} was found." ) feature_extractor.conv_layers[layer_id].layer_norm.weight.data = value logger.info(f"Feat extract layer norm weight of layer {layer_id} was initialized from {full_name}.") diff --git a/src/transformers/models/unispeech_sat/modeling_unispeech_sat.py b/src/transformers/models/unispeech_sat/modeling_unispeech_sat.py index 1812cd6523..e7b634f810 100755 --- a/src/transformers/models/unispeech_sat/modeling_unispeech_sat.py +++ b/src/transformers/models/unispeech_sat/modeling_unispeech_sat.py @@ -593,7 +593,8 @@ class UniSpeechSatAttention(nn.Module): if attn_weights.size() != (bsz * self.num_heads, tgt_len, src_len): raise ValueError( - f"Attention weights should be of size {(bsz * self.num_heads, tgt_len, src_len)}, but is {attn_weights.size()}" + f"Attention weights should be of size {(bsz * self.num_heads, tgt_len, src_len)}, but is" + f" {attn_weights.size()}" ) if attention_mask is not None: @@ -609,7 +610,8 @@ class UniSpeechSatAttention(nn.Module): if layer_head_mask is not None: if layer_head_mask.size() != (self.num_heads,): raise ValueError( - f"Head mask for a single layer should be of size {(self.num_heads,)}, but is {layer_head_mask.size()}" + f"Head mask for a single layer should be of size {(self.num_heads,)}, but is" + f" {layer_head_mask.size()}" ) attn_weights = layer_head_mask.view(1, -1, 1, 1) * attn_weights.view(bsz, self.num_heads, tgt_len, src_len) attn_weights = attn_weights.view(bsz * self.num_heads, tgt_len, src_len) @@ -630,7 +632,8 @@ class UniSpeechSatAttention(nn.Module): if attn_output.size() != (bsz * self.num_heads, tgt_len, self.head_dim): raise ValueError( - f"`attn_output` should be of size {(bsz, self.num_heads, tgt_len, self.head_dim)}, but is {attn_output.size()}" + f"`attn_output` should be of size {(bsz, self.num_heads, tgt_len, self.head_dim)}, but is" + f" {attn_output.size()}" ) attn_output = attn_output.view(bsz, self.num_heads, tgt_len, self.head_dim) @@ -927,7 +930,8 @@ class UniSpeechSatGumbelVectorQuantizer(nn.Module): if config.codevector_dim % self.num_groups != 0: raise ValueError( - f"`config.codevector_dim {config.codevector_dim} must be divisible by `config.num_codevector_groups` {self.num_groups} for concatenation" + f"`config.codevector_dim {config.codevector_dim} must be divisible by `config.num_codevector_groups`" + f" {self.num_groups} for concatenation" ) # storage for codebook variables (codewords) @@ -1651,7 +1655,8 @@ class UniSpeechSatForAudioFrameClassification(UniSpeechSatPreTrainedModel): if hasattr(config, "add_adapter") and config.add_adapter: raise ValueError( - "Audio frame classification does not support the use of UniSpeechSat adapters (config.add_adapter=True)" + "Audio frame classification does not support the use of UniSpeechSat adapters" + " (config.add_adapter=True)" ) self.unispeech_sat = UniSpeechSatModel(config) num_layers = config.num_hidden_layers + 1 # transformer layers + input embeddings diff --git a/src/transformers/models/van/configuration_van.py b/src/transformers/models/van/configuration_van.py index 6d4becdf55..47d5a9b6c1 100644 --- a/src/transformers/models/van/configuration_van.py +++ b/src/transformers/models/van/configuration_van.py @@ -21,7 +21,9 @@ from ...utils import logging logger = logging.get_logger(__name__) VAN_PRETRAINED_CONFIG_ARCHIVE_MAP = { - "Visual-Attention-Network/van-base": "https://huggingface.co/Visual-Attention-Network/van-base/blob/main/config.json", + "Visual-Attention-Network/van-base": ( + "https://huggingface.co/Visual-Attention-Network/van-base/blob/main/config.json" + ), } diff --git a/src/transformers/models/van/convert_van_to_pytorch.py b/src/transformers/models/van/convert_van_to_pytorch.py index cb79c82c5c..e2c0c95e64 100644 --- a/src/transformers/models/van/convert_van_to_pytorch.py +++ b/src/transformers/models/van/convert_van_to_pytorch.py @@ -85,7 +85,8 @@ class ModuleTransfer: if len(dest_traced) != len(src_traced): raise Exception( - f"Numbers of operations are different. Source module has {len(src_traced)} operations while destination module has {len(dest_traced)}." + f"Numbers of operations are different. Source module has {len(src_traced)} operations while" + f" destination module has {len(dest_traced)}." ) for dest_m, src_m in zip(dest_traced, src_traced): @@ -208,10 +209,18 @@ def convert_weights_and_push(save_directory: Path, model_name: str = None, push_ } names_to_original_checkpoints = { - "van-tiny": "https://huggingface.co/Visual-Attention-Network/VAN-Tiny-original/resolve/main/van_tiny_754.pth.tar", - "van-small": "https://huggingface.co/Visual-Attention-Network/VAN-Small-original/resolve/main/van_small_811.pth.tar", - "van-base": "https://huggingface.co/Visual-Attention-Network/VAN-Base-original/resolve/main/van_base_828.pth.tar", - "van-large": "https://huggingface.co/Visual-Attention-Network/VAN-Large-original/resolve/main/van_large_839.pth.tar", + "van-tiny": ( + "https://huggingface.co/Visual-Attention-Network/VAN-Tiny-original/resolve/main/van_tiny_754.pth.tar" + ), + "van-small": ( + "https://huggingface.co/Visual-Attention-Network/VAN-Small-original/resolve/main/van_small_811.pth.tar" + ), + "van-base": ( + "https://huggingface.co/Visual-Attention-Network/VAN-Base-original/resolve/main/van_base_828.pth.tar" + ), + "van-large": ( + "https://huggingface.co/Visual-Attention-Network/VAN-Large-original/resolve/main/van_large_839.pth.tar" + ), } if model_name: @@ -242,7 +251,10 @@ if __name__ == "__main__": "--model-name", default=None, type=str, - help="The name of the model you wish to convert, it must be one of the supported resnet* architecture, currently: van-tiny/small/base/large. If `None`, all of them will the converted.", + help=( + "The name of the model you wish to convert, it must be one of the supported resnet* architecture," + " currently: van-tiny/small/base/large. If `None`, all of them will the converted." + ), ) parser.add_argument( "--pytorch_dump_folder_path", @@ -255,7 +267,10 @@ if __name__ == "__main__": "--van_dir", required=True, type=Path, - help="A path to VAN's original implementation directory. You can download from here: https://github.com/Visual-Attention-Network/VAN-Classification", + help=( + "A path to VAN's original implementation directory. You can download from here:" + " https://github.com/Visual-Attention-Network/VAN-Classification" + ), ) parser.add_argument( "--push_to_hub", diff --git a/src/transformers/models/van/modeling_van.py b/src/transformers/models/van/modeling_van.py index 7a7030c2f5..6c96c6b9fe 100644 --- a/src/transformers/models/van/modeling_van.py +++ b/src/transformers/models/van/modeling_van.py @@ -395,7 +395,8 @@ VAN_INPUTS_DOCSTRING = r""" @add_start_docstrings( - "The bare VAN model outputting raw features without any specific head on top. Note, VAN does not have an embedding layer.", + "The bare VAN model outputting raw features without any specific head on top. Note, VAN does not have an embedding" + " layer.", VAN_START_DOCSTRING, ) class VanModel(VanPreTrainedModel): diff --git a/src/transformers/models/vilt/convert_vilt_original_to_pytorch.py b/src/transformers/models/vilt/convert_vilt_original_to_pytorch.py index 9de026ebec..3a186e1d2d 100644 --- a/src/transformers/models/vilt/convert_vilt_original_to_pytorch.py +++ b/src/transformers/models/vilt/convert_vilt_original_to_pytorch.py @@ -231,7 +231,10 @@ def convert_vilt_checkpoint(checkpoint_url, pytorch_dump_folder_path): if nlvr_model: image1 = Image.open(requests.get("https://lil.nlp.cornell.edu/nlvr/exs/ex0_0.jpg", stream=True).raw) image2 = Image.open(requests.get("https://lil.nlp.cornell.edu/nlvr/exs/ex0_0.jpg", stream=True).raw) - text = "The left image contains twice the number of dogs as the right image, and at least two dogs in total are standing." + text = ( + "The left image contains twice the number of dogs as the right image, and at least two dogs in total are" + " standing." + ) encoding_1 = processor(image1, text, return_tensors="pt") encoding_2 = processor(image2, text, return_tensors="pt") outputs = model( diff --git a/src/transformers/models/vision_encoder_decoder/modeling_flax_vision_encoder_decoder.py b/src/transformers/models/vision_encoder_decoder/modeling_flax_vision_encoder_decoder.py index e0478f1e13..7042b2548d 100644 --- a/src/transformers/models/vision_encoder_decoder/modeling_flax_vision_encoder_decoder.py +++ b/src/transformers/models/vision_encoder_decoder/modeling_flax_vision_encoder_decoder.py @@ -301,10 +301,10 @@ class FlaxVisionEncoderDecoderModel(FlaxPreTrainedModel): if config.decoder.cross_attention_hidden_size is not None: if config.decoder.cross_attention_hidden_size != config.encoder.hidden_size: raise ValueError( - "If `cross_attention_hidden_size` is specified in the decoder's configuration, " - "it has to be equal to the encoder's `hidden_size`. " - f"Got {config.decoder.cross_attention_hidden_size} for `config.decoder.cross_attention_hidden_size` " - f"and {config.encoder.hidden_size} for `config.encoder.hidden_size`." + "If `cross_attention_hidden_size` is specified in the decoder's configuration, it has to be equal" + f" to the encoder's `hidden_size`. Got {config.decoder.cross_attention_hidden_size} for" + f" `config.decoder.cross_attention_hidden_size` and {config.encoder.hidden_size} for" + " `config.encoder.hidden_size`." ) module = self.module_class(config=config, dtype=dtype, **kwargs) @@ -832,10 +832,9 @@ class FlaxVisionEncoderDecoderModel(FlaxPreTrainedModel): decoder_config = AutoConfig.from_pretrained(decoder_pretrained_model_name_or_path) if decoder_config.is_decoder is False or decoder_config.add_cross_attention is False: logger.info( - f"Initializing {decoder_pretrained_model_name_or_path} as a decoder model. " - f"Cross attention layers are added to {decoder_pretrained_model_name_or_path} " - f"and randomly initialized if {decoder_pretrained_model_name_or_path}'s architecture allows for " - "cross attention layers." + f"Initializing {decoder_pretrained_model_name_or_path} as a decoder model. Cross attention" + f" layers are added to {decoder_pretrained_model_name_or_path} and randomly initialized if" + f" {decoder_pretrained_model_name_or_path}'s architecture allows for cross attention layers." ) decoder_config.is_decoder = True decoder_config.add_cross_attention = True diff --git a/src/transformers/models/vision_encoder_decoder/modeling_tf_vision_encoder_decoder.py b/src/transformers/models/vision_encoder_decoder/modeling_tf_vision_encoder_decoder.py index 6bbf514091..ba65525ae0 100644 --- a/src/transformers/models/vision_encoder_decoder/modeling_tf_vision_encoder_decoder.py +++ b/src/transformers/models/vision_encoder_decoder/modeling_tf_vision_encoder_decoder.py @@ -43,10 +43,10 @@ logger = logging.get_logger(__name__) _CONFIG_FOR_DOC = "VisionEncoderDecoderConfig" DEPRECATION_WARNING = ( - "Version v4.17.0 introduces a better way to train encoder-decoder models by computing the loss inside the " - "encoder-decoder framework rather than in the decoder itself. You may observe training discrepancies if fine-tuning " - "a model trained with versions anterior to 4.17.0. The decoder_input_ids are now created based on the labels, no " - "need to pass them yourself anymore." + "Version v4.17.0 introduces a better way to train encoder-decoder models by computing the loss inside the" + " encoder-decoder framework rather than in the decoder itself. You may observe training discrepancies if" + " fine-tuning a model trained with versions anterior to 4.17.0. The decoder_input_ids are now created based on the" + " labels, no need to pass them yourself anymore." ) VISION_ENCODER_DECODER_START_DOCSTRING = r""" @@ -202,10 +202,10 @@ class TFVisionEncoderDecoderModel(TFPreTrainedModel, TFCausalLanguageModelingLos if config.decoder.cross_attention_hidden_size is not None: if config.decoder.cross_attention_hidden_size != config.encoder.hidden_size: raise ValueError( - "If `cross_attention_hidden_size` is specified in the decoder's configuration, " - "it has to be equal to the encoder's `hidden_size`. " - f"Got {config.decoder.cross_attention_hidden_size} for `config.decoder.cross_attention_hidden_size` " - f"and {config.encoder.hidden_size} for `config.encoder.hidden_size`." + "If `cross_attention_hidden_size` is specified in the decoder's configuration, it has to be equal" + f" to the encoder's `hidden_size`. Got {config.decoder.cross_attention_hidden_size} for" + f" `config.decoder.cross_attention_hidden_size` and {config.encoder.hidden_size} for" + " `config.encoder.hidden_size`." ) # initialize with config @@ -222,11 +222,13 @@ class TFVisionEncoderDecoderModel(TFPreTrainedModel, TFCausalLanguageModelingLos if self.encoder.config.to_dict() != self.config.encoder.to_dict(): logger.warning( - f"Config of the encoder: {self.encoder.__class__} is overwritten by shared encoder config: {self.config.encoder}" + f"Config of the encoder: {self.encoder.__class__} is overwritten by shared encoder config:" + f" {self.config.encoder}" ) if self.decoder.config.to_dict() != self.config.decoder.to_dict(): logger.warning( - f"Config of the decoder: {self.decoder.__class__} is overwritten by shared decoder config: {self.config.decoder}" + f"Config of the decoder: {self.decoder.__class__} is overwritten by shared decoder config:" + f" {self.config.decoder}" ) # make sure that the individual model's config refers to the shared config @@ -337,10 +339,10 @@ class TFVisionEncoderDecoderModel(TFPreTrainedModel, TFCausalLanguageModelingLos from_pt = kwargs.pop("from_pt", False) if from_pt: raise ValueError( - "Initializing `TFVisionEncoderDecoderModel` from a pytorch checkpoint is not supported currently. " - "Use a tensorflow checkpoint instead. If only the pytorch checkpoints are available, " - "create the encoder and decoder models separately, and use them to initialize `TFVisionEncoderDecoderModel`. " - "Check `TFVisionEncoderDecoderModel.from_encoder_decoder_pretrained()` for more details." + "Initializing `TFVisionEncoderDecoderModel` from a pytorch checkpoint is not supported currently. Use" + " a tensorflow checkpoint instead. If only the pytorch checkpoints are available, create the encoder" + " and decoder models separately, and use them to initialize `TFVisionEncoderDecoderModel`. Check" + " `TFVisionEncoderDecoderModel.from_encoder_decoder_pretrained()` for more details." ) return super().from_pretrained(pretrained_model_name_or_path, *model_args, **kwargs) @@ -469,10 +471,9 @@ class TFVisionEncoderDecoderModel(TFPreTrainedModel, TFCausalLanguageModelingLos decoder_config = AutoConfig.from_pretrained(decoder_pretrained_model_name_or_path) if decoder_config.is_decoder is False or decoder_config.add_cross_attention is False: logger.info( - f"Initializing {decoder_pretrained_model_name_or_path} as a decoder model. " - f"Cross attention layers are added to {decoder_pretrained_model_name_or_path} " - f"and randomly initialized if {decoder_pretrained_model_name_or_path}'s architecture allows for " - "cross attention layers." + f"Initializing {decoder_pretrained_model_name_or_path} as a decoder model. Cross attention" + f" layers are added to {decoder_pretrained_model_name_or_path} and randomly initialized if" + f" {decoder_pretrained_model_name_or_path}'s architecture allows for cross attention layers." ) decoder_config.is_decoder = True decoder_config.add_cross_attention = True diff --git a/src/transformers/models/vision_encoder_decoder/modeling_vision_encoder_decoder.py b/src/transformers/models/vision_encoder_decoder/modeling_vision_encoder_decoder.py index 37072270a5..d2c4ae6b18 100644 --- a/src/transformers/models/vision_encoder_decoder/modeling_vision_encoder_decoder.py +++ b/src/transformers/models/vision_encoder_decoder/modeling_vision_encoder_decoder.py @@ -173,10 +173,10 @@ class VisionEncoderDecoderModel(PreTrainedModel): if config.decoder.cross_attention_hidden_size is not None: if config.decoder.cross_attention_hidden_size != config.encoder.hidden_size: raise ValueError( - "If `cross_attention_hidden_size` is specified in the decoder's configuration, " - "it has to be equal to the encoder's `hidden_size`. " - f"Got {config.decoder.cross_attention_hidden_size} for `config.decoder.cross_attention_hidden_size` " - f"and {config.encoder.hidden_size} for `config.encoder.hidden_size`." + "If `cross_attention_hidden_size` is specified in the decoder's configuration, it has to be equal" + f" to the encoder's `hidden_size`. Got {config.decoder.cross_attention_hidden_size} for" + f" `config.decoder.cross_attention_hidden_size` and {config.encoder.hidden_size} for" + " `config.encoder.hidden_size`." ) # initialize with config @@ -195,11 +195,13 @@ class VisionEncoderDecoderModel(PreTrainedModel): if self.encoder.config.to_dict() != self.config.encoder.to_dict(): logger.warning( - f"Config of the encoder: {self.encoder.__class__} is overwritten by shared encoder config: {self.config.encoder}" + f"Config of the encoder: {self.encoder.__class__} is overwritten by shared encoder config:" + f" {self.config.encoder}" ) if self.decoder.config.to_dict() != self.config.decoder.to_dict(): logger.warning( - f"Config of the decoder: {self.decoder.__class__} is overwritten by shared decoder config: {self.config.decoder}" + f"Config of the decoder: {self.decoder.__class__} is overwritten by shared decoder config:" + f" {self.config.decoder}" ) # make sure that the individual model's config refers to the shared config @@ -369,10 +371,9 @@ class VisionEncoderDecoderModel(PreTrainedModel): if decoder_config.is_decoder is False or decoder_config.add_cross_attention is False: logger.info( - f"Initializing {decoder_pretrained_model_name_or_path} as a decoder model. " - f"Cross attention layers are added to {decoder_pretrained_model_name_or_path} " - f"and randomly initialized if {decoder_pretrained_model_name_or_path}'s architecture allows for " - "cross attention layers." + f"Initializing {decoder_pretrained_model_name_or_path} as a decoder model. Cross attention" + f" layers are added to {decoder_pretrained_model_name_or_path} and randomly initialized if" + f" {decoder_pretrained_model_name_or_path}'s architecture allows for cross attention layers." ) decoder_config.is_decoder = True decoder_config.add_cross_attention = True @@ -546,8 +547,8 @@ class VisionEncoderDecoderModel(PreTrainedModel): def resize_token_embeddings(self, *args, **kwargs): raise NotImplementedError( - "Resizing the embedding layers via the VisionEncoderDecoderModel directly is not supported." - "Please use the respective methods of the wrapped decoder object (model.decoder.resize_token_embeddings(...))" + "Resizing the embedding layers via the VisionEncoderDecoderModel directly is not supported.Please use the" + " respective methods of the wrapped decoder object (model.decoder.resize_token_embeddings(...))" ) def _reorder_cache(self, past, beam_idx): diff --git a/src/transformers/models/vision_text_dual_encoder/modeling_flax_vision_text_dual_encoder.py b/src/transformers/models/vision_text_dual_encoder/modeling_flax_vision_text_dual_encoder.py index 4cf6c59882..aac1b0e8e9 100644 --- a/src/transformers/models/vision_text_dual_encoder/modeling_flax_vision_text_dual_encoder.py +++ b/src/transformers/models/vision_text_dual_encoder/modeling_flax_vision_text_dual_encoder.py @@ -536,9 +536,9 @@ class FlaxVisionTextDualEncoderModel(FlaxPreTrainedModel): # the projection layers are always newly initialized when loading the model # using pre-trained vision and text model. logger.warning( - "The projection layer and logit scale weights `[('visual_projection', 'kernel'), ('text_projection', 'kernel'), ('logit_scale',)]` " - "are newly initialized. You should probably TRAIN this model on a down-stream task " - "to be able to use it for predictions and inference." + "The projection layer and logit scale weights `[('visual_projection', 'kernel'), ('text_projection'," + " 'kernel'), ('logit_scale',)]` are newly initialized. You should probably TRAIN this model on a" + " down-stream task to be able to use it for predictions and inference." ) return model diff --git a/src/transformers/models/vision_text_dual_encoder/modeling_vision_text_dual_encoder.py b/src/transformers/models/vision_text_dual_encoder/modeling_vision_text_dual_encoder.py index e13c9ca7ef..66340deaf4 100755 --- a/src/transformers/models/vision_text_dual_encoder/modeling_vision_text_dual_encoder.py +++ b/src/transformers/models/vision_text_dual_encoder/modeling_vision_text_dual_encoder.py @@ -530,9 +530,9 @@ class VisionTextDualEncoderModel(PreTrainedModel): # the projection layers are always newly initialized when loading the model # using pre-trained vision and text model. logger.warning( - "The projection layer and logit scale weights `['visual_projection.weight', 'text_projection.weight', 'logit_scale']` " - "are newly initialized. You should probably TRAIN this model on a down-stream task " - "to be able to use it for predictions and inference." + "The projection layer and logit scale weights `['visual_projection.weight', 'text_projection.weight'," + " 'logit_scale']` are newly initialized. You should probably TRAIN this model on a down-stream task to be" + " able to use it for predictions and inference." ) return model diff --git a/src/transformers/models/visual_bert/configuration_visual_bert.py b/src/transformers/models/visual_bert/configuration_visual_bert.py index d4992d5267..60a3692644 100644 --- a/src/transformers/models/visual_bert/configuration_visual_bert.py +++ b/src/transformers/models/visual_bert/configuration_visual_bert.py @@ -23,13 +23,19 @@ logger = logging.get_logger(__name__) VISUAL_BERT_PRETRAINED_CONFIG_ARCHIVE_MAP = { "uclanlp/visualbert-vqa": "https://huggingface.co/uclanlp/visualbert-vqa/resolve/main/config.json", "uclanlp/visualbert-vqa-pre": "https://huggingface.co/uclanlp/visualbert-vqa-pre/resolve/main/config.json", - "uclanlp/visualbert-vqa-coco-pre": "https://huggingface.co/uclanlp/visualbert-vqa-coco-pre/resolve/main/config.json", + "uclanlp/visualbert-vqa-coco-pre": ( + "https://huggingface.co/uclanlp/visualbert-vqa-coco-pre/resolve/main/config.json" + ), "uclanlp/visualbert-vcr": "https://huggingface.co/uclanlp/visualbert-vcr/resolve/main/config.json", "uclanlp/visualbert-vcr-pre": "https://huggingface.co/uclanlp/visualbert-vcr-pre/resolve/main/config.json", - "uclanlp/visualbert-vcr-coco-pre": "https://huggingface.co/uclanlp/visualbert-vcr-coco-pre/resolve/main/config.json", + "uclanlp/visualbert-vcr-coco-pre": ( + "https://huggingface.co/uclanlp/visualbert-vcr-coco-pre/resolve/main/config.json" + ), "uclanlp/visualbert-nlvr2": "https://huggingface.co/uclanlp/visualbert-nlvr2/resolve/main/config.json", "uclanlp/visualbert-nlvr2-pre": "https://huggingface.co/uclanlp/visualbert-nlvr2-pre/resolve/main/config.json", - "uclanlp/visualbert-nlvr2-coco-pre": "https://huggingface.co/uclanlp/visualbert-nlvr2-coco-pre/resolve/main/config.json" + "uclanlp/visualbert-nlvr2-coco-pre": ( + "https://huggingface.co/uclanlp/visualbert-nlvr2-coco-pre/resolve/main/config.json" + ) # See all VisualBERT models at https://huggingface.co/models?filter=visual_bert } diff --git a/src/transformers/models/visual_bert/modeling_visual_bert.py b/src/transformers/models/visual_bert/modeling_visual_bert.py index 643411ee7f..9f3dfaaeb3 100755 --- a/src/transformers/models/visual_bert/modeling_visual_bert.py +++ b/src/transformers/models/visual_bert/modeling_visual_bert.py @@ -158,7 +158,8 @@ class VisualBertEmbeddings(nn.Module): if (image_text_alignment_mask == 0).sum() != 0: image_text_alignment_mask[image_text_alignment_mask == 0] = 1 # Avoid divide by zero error logger.warning( - "Found 0 values in `image_text_alignment_mask`. Setting them to 1 to avoid divide-by-zero error." + "Found 0 values in `image_text_alignment_mask`. Setting them to 1 to avoid divide-by-zero" + " error." ) visual_position_embeddings = visual_position_embeddings / image_text_alignment_mask.unsqueeze(-1) @@ -978,7 +979,7 @@ class VisualBertForPreTraining(VisualBertPreTrainedModel): total_size = attention_mask.size(-1) + visual_attention_mask.size(-1) if labels.size(-1) != total_size: raise ValueError( - f"The labels provided should have same sequence length as total attention mask. " + "The labels provided should have same sequence length as total attention mask. " f"Found labels with sequence length {labels.size(-1)}, expected {total_size}." ) @@ -991,7 +992,7 @@ class VisualBertForPreTraining(VisualBertPreTrainedModel): total_size = attention_mask.size(-1) + visual_attention_mask.size(-1) if labels.size(-1) != total_size: raise ValueError( - f"The labels provided should have same sequence length as total attention mask. " + "The labels provided should have same sequence length as total attention mask. " f"Found labels with sequence length {labels.size(-1)}, expected {total_size}." ) diff --git a/src/transformers/models/vit/modeling_flax_vit.py b/src/transformers/models/vit/modeling_flax_vit.py index eaa7c4225e..f6e7044057 100644 --- a/src/transformers/models/vit/modeling_flax_vit.py +++ b/src/transformers/models/vit/modeling_flax_vit.py @@ -143,7 +143,8 @@ class FlaxViTSelfAttention(nn.Module): def setup(self): if self.config.hidden_size % self.config.num_attention_heads != 0: raise ValueError( - "`config.hidden_size`: {self.config.hidden_size} has to be a multiple of `config.num_attention_heads`: {self.config.num_attention_heads}" + "`config.hidden_size`: {self.config.hidden_size} has to be a multiple of `config.num_attention_heads`:" + " {self.config.num_attention_heads}" ) self.query = nn.Dense( diff --git a/src/transformers/models/vit/modeling_tf_vit.py b/src/transformers/models/vit/modeling_tf_vit.py index 9d478e968c..4666259661 100644 --- a/src/transformers/models/vit/modeling_tf_vit.py +++ b/src/transformers/models/vit/modeling_tf_vit.py @@ -187,7 +187,8 @@ class TFPatchEmbeddings(tf.keras.layers.Layer): if getattr(height, "numpy", None) and getattr(width, "numpy", None): if height != self.image_size[0] or width != self.image_size[1]: raise ValueError( - f"Input image size ({height}*{width}) doesn't match model ({self.image_size[0]}*{self.image_size[1]})." + f"Input image size ({height}*{width}) doesn't match model" + f" ({self.image_size[0]}*{self.image_size[1]})." ) # When running on CPU, `tf.keras.layers.Conv2D` doesn't support `NCHW` format. diff --git a/src/transformers/models/vit/modeling_vit.py b/src/transformers/models/vit/modeling_vit.py index a5fc9a6336..dde36b45ef 100644 --- a/src/transformers/models/vit/modeling_vit.py +++ b/src/transformers/models/vit/modeling_vit.py @@ -186,7 +186,8 @@ class PatchEmbeddings(nn.Module): if not interpolate_pos_encoding: if height != self.image_size[0] or width != self.image_size[1]: raise ValueError( - f"Input image size ({height}*{width}) doesn't match model ({self.image_size[0]}*{self.image_size[1]})." + f"Input image size ({height}*{width}) doesn't match model" + f" ({self.image_size[0]}*{self.image_size[1]})." ) x = self.projection(pixel_values).flatten(2).transpose(1, 2) return x @@ -612,7 +613,8 @@ class ViTPooler(nn.Module): @add_start_docstrings( - "ViT Model with a decoder on top for masked image modeling, as proposed in `SimMIM `__.", + "ViT Model with a decoder on top for masked image modeling, as proposed in `SimMIM" + " `__.", VIT_START_DOCSTRING, ) class ViTForMaskedImageModeling(ViTPreTrainedModel): diff --git a/src/transformers/models/vit_mae/modeling_tf_vit_mae.py b/src/transformers/models/vit_mae/modeling_tf_vit_mae.py index f464b6665a..803a7cccc7 100644 --- a/src/transformers/models/vit_mae/modeling_tf_vit_mae.py +++ b/src/transformers/models/vit_mae/modeling_tf_vit_mae.py @@ -333,7 +333,8 @@ class TFPatchEmbeddings(tf.keras.layers.Layer): if getattr(height, "numpy", None) and getattr(width, "numpy", None): if height != self.image_size[0] or width != self.image_size[1]: raise ValueError( - f"Input image size ({height}*{width}) doesn't match model ({self.image_size[0]}*{self.image_size[1]})." + f"Input image size ({height}*{width}) doesn't match model" + f" ({self.image_size[0]}*{self.image_size[1]})." ) # When running on CPU, `tf.keras.layers.Conv2D` doesn't support `NCHW` format. diff --git a/src/transformers/models/wav2vec2/configuration_wav2vec2.py b/src/transformers/models/wav2vec2/configuration_wav2vec2.py index f675f6799f..8076df8249 100644 --- a/src/transformers/models/wav2vec2/configuration_wav2vec2.py +++ b/src/transformers/models/wav2vec2/configuration_wav2vec2.py @@ -288,10 +288,10 @@ class Wav2Vec2Config(PretrainedConfig): or (len(self.conv_dim) != self.num_feat_extract_layers) ): raise ValueError( - "Configuration for convolutional layers is incorrect. " - "It is required that `len(config.conv_dim)` == `len(config.conv_stride)` == `len(config.conv_kernel)`, " - f"but is `len(config.conv_dim) = {len(self.conv_dim)}`, `len(config.conv_stride) " - f"= {len(self.conv_stride)}`, `len(config.conv_kernel) = {len(self.conv_kernel)}`." + "Configuration for convolutional layers is incorrect. It is required that `len(config.conv_dim)` ==" + " `len(config.conv_stride)` == `len(config.conv_kernel)`, but is `len(config.conv_dim) =" + f" {len(self.conv_dim)}`, `len(config.conv_stride) = {len(self.conv_stride)}`," + f" `len(config.conv_kernel) = {len(self.conv_kernel)}`." ) # fine-tuning config parameters for SpecAugment: https://arxiv.org/abs/1904.08779 diff --git a/src/transformers/models/wav2vec2/convert_wav2vec2_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/wav2vec2/convert_wav2vec2_original_pytorch_checkpoint_to_pytorch.py index db77a9ea16..89ae3ad21c 100644 --- a/src/transformers/models/wav2vec2/convert_wav2vec2_original_pytorch_checkpoint_to_pytorch.py +++ b/src/transformers/models/wav2vec2/convert_wav2vec2_original_pytorch_checkpoint_to_pytorch.py @@ -77,7 +77,8 @@ def set_recursively(hf_pointer, key, value, full_name, weight_type): if hf_shape != value.shape: raise ValueError( - f"Shape of hf {key + '.' + weight_type if weight_type is not None else ''} is {hf_shape}, but should be {value.shape} for {full_name}" + f"Shape of hf {key + '.' + weight_type if weight_type is not None else ''} is {hf_shape}, but should be" + f" {value.shape} for {full_name}" ) if weight_type == "weight": @@ -148,14 +149,16 @@ def load_conv_layer(full_name, value, feature_extractor, unused_weights, use_gro if "bias" in name: if value.shape != feature_extractor.conv_layers[layer_id].conv.bias.data.shape: raise ValueError( - f"{full_name} has size {value.shape}, but {feature_extractor.conv_layers[layer_id].conv.bias.data.shape} was found." + f"{full_name} has size {value.shape}, but" + f" {feature_extractor.conv_layers[layer_id].conv.bias.data.shape} was found." ) feature_extractor.conv_layers[layer_id].conv.bias.data = value logger.info(f"Feat extract conv layer {layer_id} was initialized from {full_name}.") elif "weight" in name: if value.shape != feature_extractor.conv_layers[layer_id].conv.weight.data.shape: raise ValueError( - f"{full_name} has size {value.shape}, but {feature_extractor.conv_layers[layer_id].conv.weight.data.shape} was found." + f"{full_name} has size {value.shape}, but" + f" {feature_extractor.conv_layers[layer_id].conv.weight.data.shape} was found." ) feature_extractor.conv_layers[layer_id].conv.weight.data = value logger.info(f"Feat extract conv layer {layer_id} was initialized from {full_name}.") @@ -163,14 +166,16 @@ def load_conv_layer(full_name, value, feature_extractor, unused_weights, use_gro if "bias" in name: if value.shape != feature_extractor.conv_layers[layer_id].layer_norm.bias.data.shape: raise ValueError( - f"{full_name} has size {value.shape}, but {feature_extractor.conv_layers[layer_id].layer_norm.bias.data.shape} was found." + f"{full_name} has size {value.shape}, but" + f" {feature_extractor.conv_layers[layer_id].layer_norm.bias.data.shape} was found." ) feature_extractor.conv_layers[layer_id].layer_norm.bias.data = value logger.info(f"Feat extract layer norm weight of layer {layer_id} was initialized from {full_name}.") elif "weight" in name: if value.shape != feature_extractor.conv_layers[layer_id].layer_norm.weight.data.shape: raise ValueError( - f"{full_name} has size {value.shape}, but {feature_extractor.conv_layers[layer_id].layer_norm.weight.data.shape} was found." + f"{full_name} has size {value.shape}, but" + f" {feature_extractor.conv_layers[layer_id].layer_norm.weight.data.shape} was found." ) feature_extractor.conv_layers[layer_id].layer_norm.weight.data = value logger.info(f"Feat extract layer norm weight of layer {layer_id} was initialized from {full_name}.") diff --git a/src/transformers/models/wav2vec2/feature_extraction_wav2vec2.py b/src/transformers/models/wav2vec2/feature_extraction_wav2vec2.py index 595fb11192..14b1d688c9 100644 --- a/src/transformers/models/wav2vec2/feature_extraction_wav2vec2.py +++ b/src/transformers/models/wav2vec2/feature_extraction_wav2vec2.py @@ -171,8 +171,9 @@ class Wav2Vec2FeatureExtractor(SequenceFeatureExtractor): if sampling_rate is not None: if sampling_rate != self.sampling_rate: raise ValueError( - f"The model corresponding to this feature extractor: {self} was trained using a sampling rate of {self.sampling_rate}. " - f"Please make sure that the provided `raw_speech` input was sampled with {self.sampling_rate} and not {sampling_rate}." + f"The model corresponding to this feature extractor: {self} was trained using a sampling rate of" + f" {self.sampling_rate}. Please make sure that the provided `raw_speech` input was sampled with" + f" {self.sampling_rate} and not {sampling_rate}." ) else: logger.warning( diff --git a/src/transformers/models/wav2vec2/modeling_flax_wav2vec2.py b/src/transformers/models/wav2vec2/modeling_flax_wav2vec2.py index 7709e43ab9..7a3c6dfc5d 100644 --- a/src/transformers/models/wav2vec2/modeling_flax_wav2vec2.py +++ b/src/transformers/models/wav2vec2/modeling_flax_wav2vec2.py @@ -137,7 +137,8 @@ def _compute_mask_indices( if mask_length > sequence_length: raise ValueError( - f"`mask_length` has to be smaller than `sequence_length`, but got `mask_length`: {mask_length} and `sequence_length`: {sequence_length}`" + f"`mask_length` has to be smaller than `sequence_length`, but got `mask_length`: {mask_length} and" + f" `sequence_length`: {sequence_length}`" ) # compute number of masked spans in batch @@ -186,7 +187,7 @@ def _sample_negative_indices(features_shape: Tuple, num_negatives: int, attentio batch_size, sequence_length, hidden_size = features_shape if sequence_length <= 1: raise ValueError( - f"`features should have `sequence_length` > 1, but are of shape " + "`features should have `sequence_length` > 1, but are of shape " f"(batch_size, sequence_length, hidden_size) = ({batch_size, sequence_length, hidden_size})." ) @@ -386,7 +387,8 @@ class FlaxConvLayersCollection(nn.Module): raise NotImplementedError("At the moment only ``config.feat_extact_norm == 'layer'`` is supported") else: raise ValueError( - f"`config.feat_extract_norm` is {self.config.feat_extract_norm}, but has to be one of ['group', 'layer']" + f"`config.feat_extract_norm` is {self.config.feat_extract_norm}, but has to be one of ['group'," + " 'layer']" ) def __call__(self, hidden_states): @@ -444,7 +446,8 @@ class FlaxWav2Vec2Attention(nn.Module): self.head_dim = self.embed_dim // self.num_heads if self.head_dim * self.num_heads != self.embed_dim: raise ValueError( - f"embed_dim must be divisible by num_heads (got `embed_dim`: {self.embed_dim} and `num_heads`: {self.num_heads})." + f"embed_dim must be divisible by num_heads (got `embed_dim`: {self.embed_dim} and `num_heads`:" + f" {self.num_heads})." ) dense = partial( diff --git a/src/transformers/models/wav2vec2/modeling_tf_wav2vec2.py b/src/transformers/models/wav2vec2/modeling_tf_wav2vec2.py index bac62f148c..567f20040b 100644 --- a/src/transformers/models/wav2vec2/modeling_tf_wav2vec2.py +++ b/src/transformers/models/wav2vec2/modeling_tf_wav2vec2.py @@ -133,12 +133,14 @@ def input_values_processing(func, config, input_values, **kwargs): output[parameter_names[i]] = input else: raise ValueError( - f"Data of type {type(input)} is not allowed only {allowed_types} is accepted for {parameter_names[i]}." + f"Data of type {type(input)} is not allowed only {allowed_types} is accepted for" + f" {parameter_names[i]}." ) elif isinstance(input_values, Mapping): if "inputs" in input_values: warnings.warn( - "The `inputs` argument is deprecated and will be removed in a future version, use `input_values` instead.", + "The `inputs` argument is deprecated and will be removed in a future version, use `input_values`" + " instead.", FutureWarning, ) @@ -146,7 +148,8 @@ def input_values_processing(func, config, input_values, **kwargs): if "decoder_cached_states" in input_values: warnings.warn( - "The `decoder_cached_states` argument is deprecated and will be removed in a future version, use `past_key_values` instead.", + "The `decoder_cached_states` argument is deprecated and will be removed in a future version, use" + " `past_key_values` instead.", FutureWarning, ) output["past_key_values"] = input_values.pop("decoder_cached_states") @@ -166,7 +169,8 @@ def input_values_processing(func, config, input_values, **kwargs): output[parameter_names[0]] = input_values else: raise ValueError( - f"Data of type {type(input_values)} is not allowed only {allowed_types} is accepted for {parameter_names[0]}." + f"Data of type {type(input_values)} is not allowed only {allowed_types} is accepted for" + f" {parameter_names[0]}." ) for name in parameter_names: @@ -254,7 +258,8 @@ def _compute_mask_indices( if mask_length > sequence_length: raise ValueError( - f"`mask_length` has to be smaller than `sequence_length`, but got `mask_length`: {mask_length} and `sequence_length`: {sequence_length}`" + f"`mask_length` has to be smaller than `sequence_length`, but got `mask_length`: {mask_length} and" + f" `sequence_length`: {sequence_length}`" ) # compute number of masked spans in batch num_masked_spans = int(mask_prob * sequence_length / mask_length + tf.random.uniform((1,))) @@ -441,9 +446,11 @@ class TFWav2Vec2GroupNorm(tf.keras.layers.Layer): dim = input_shape[self.axis] if dim is None: raise ValueError( - "Axis " + str(self.axis) + " of " - "input tensor should have a defined dimension " - "but the layer received an input with shape " + str(input_shape) + "." + "Axis " + + str(self.axis) + + " of input tensor should have a defined dimension but the layer received an input with shape " + + str(input_shape) + + "." ) def _set_number_of_groups_for_instance_norm(self, input_shape): @@ -457,22 +464,27 @@ class TFWav2Vec2GroupNorm(tf.keras.layers.Layer): dim = input_shape[self.axis] if dim < self.groups: raise ValueError( - "Number of groups (" + str(self.groups) + ") cannot be " - "more than the number of channels (" + str(dim) + ")." + "Number of groups (" + + str(self.groups) + + ") cannot be more than the number of channels (" + + str(dim) + + ")." ) if dim % self.groups != 0: raise ValueError( - "Number of groups (" + str(self.groups) + ") must be a " - "multiple of the number of channels (" + str(dim) + ")." + "Number of groups (" + + str(self.groups) + + ") must be a multiple of the number of channels (" + + str(dim) + + ")." ) def _check_axis(self): if self.axis == 0: raise ValueError( - "You are trying to normalize your batch axis. Do you want to " - "use tf.layer.batch_normalization instead" + "You are trying to normalize your batch axis. Do you want to use tf.layer.batch_normalization instead" ) def _create_input_spec(self, input_shape): @@ -838,7 +850,10 @@ class TFWav2Vec2Attention(tf.keras.layers.Layer): tf.debugging.assert_equal( shape_list(attn_weights), [bsz * self.num_heads, tgt_len, src_len], - message=f"Attention weights should be of size {(bsz * self.num_heads, tgt_len, src_len)}, but is {shape_list(attn_weights)}", + message=( + f"Attention weights should be of size {(bsz * self.num_heads, tgt_len, src_len)}, but is" + f" {shape_list(attn_weights)}" + ), ) if attention_mask is not None: @@ -848,7 +863,10 @@ class TFWav2Vec2Attention(tf.keras.layers.Layer): tf.debugging.assert_equal( shape_list(attention_mask), [bsz, 1, tgt_len, src_len], - message=f"Attention mask should be of size {(bsz, 1, tgt_len, src_len)}, but is {shape_list(attention_mask)}", + message=( + f"Attention mask should be of size {(bsz, 1, tgt_len, src_len)}, but is" + f" {shape_list(attention_mask)}" + ), ) attention_mask = tf.cast(attention_mask, dtype=attn_weights.dtype) @@ -864,7 +882,10 @@ class TFWav2Vec2Attention(tf.keras.layers.Layer): tf.debugging.assert_equal( shape_list(layer_head_mask), [self.num_heads], - message=f"Head mask for a single layer should be of size {(self.num_heads)}, but is {shape_list(layer_head_mask)}", + message=( + f"Head mask for a single layer should be of size {(self.num_heads)}, but is" + f" {shape_list(layer_head_mask)}" + ), ) attn_weights = tf.reshape(layer_head_mask, (1, -1, 1, 1)) * tf.reshape( @@ -881,7 +902,10 @@ class TFWav2Vec2Attention(tf.keras.layers.Layer): tf.debugging.assert_equal( shape_list(attn_output), [bsz * self.num_heads, tgt_len, self.head_dim], - message=f"`attn_output` should be of size {(bsz, self.num_heads, tgt_len, self.head_dim)}, but is {shape_list(attn_output)}", + message=( + f"`attn_output` should be of size {(bsz, self.num_heads, tgt_len, self.head_dim)}, but is" + f" {shape_list(attn_output)}" + ), ) attn_output = tf.transpose( diff --git a/src/transformers/models/wav2vec2/modeling_wav2vec2.py b/src/transformers/models/wav2vec2/modeling_wav2vec2.py index f58ec9a336..191f0e7e59 100755 --- a/src/transformers/models/wav2vec2/modeling_wav2vec2.py +++ b/src/transformers/models/wav2vec2/modeling_wav2vec2.py @@ -636,7 +636,8 @@ class Wav2Vec2Attention(nn.Module): if attn_weights.size() != (bsz * self.num_heads, tgt_len, src_len): raise ValueError( - f"Attention weights should be of size {(bsz * self.num_heads, tgt_len, src_len)}, but is {attn_weights.size()}" + f"Attention weights should be of size {(bsz * self.num_heads, tgt_len, src_len)}, but is" + f" {attn_weights.size()}" ) if attention_mask is not None: @@ -652,7 +653,8 @@ class Wav2Vec2Attention(nn.Module): if layer_head_mask is not None: if layer_head_mask.size() != (self.num_heads,): raise ValueError( - f"Head mask for a single layer should be of size {(self.num_heads,)}, but is {layer_head_mask.size()}" + f"Head mask for a single layer should be of size {(self.num_heads,)}, but is" + f" {layer_head_mask.size()}" ) attn_weights = layer_head_mask.view(1, -1, 1, 1) * attn_weights.view(bsz, self.num_heads, tgt_len, src_len) attn_weights = attn_weights.view(bsz * self.num_heads, tgt_len, src_len) @@ -673,7 +675,8 @@ class Wav2Vec2Attention(nn.Module): if attn_output.size() != (bsz * self.num_heads, tgt_len, self.head_dim): raise ValueError( - f"`attn_output` should be of size {(bsz, self.num_heads, tgt_len, self.head_dim)}, but is {attn_output.size()}" + f"`attn_output` should be of size {(bsz, self.num_heads, tgt_len, self.head_dim)}, but is" + f" {attn_output.size()}" ) attn_output = attn_output.view(bsz, self.num_heads, tgt_len, self.head_dim) diff --git a/src/transformers/models/wav2vec2/tokenization_wav2vec2.py b/src/transformers/models/wav2vec2/tokenization_wav2vec2.py index 53a6cfe1c0..02840f830b 100644 --- a/src/transformers/models/wav2vec2/tokenization_wav2vec2.py +++ b/src/transformers/models/wav2vec2/tokenization_wav2vec2.py @@ -61,7 +61,9 @@ PRETRAINED_VOCAB_FILES_MAP = { "facebook/wav2vec2-base-960h": "https://huggingface.co/facebook/wav2vec2-base-960h/resolve/main/vocab.json", }, "tokenizer_config_file": { - "facebook/wav2vec2-base-960h": "https://huggingface.co/facebook/wav2vec2-base-960h/resolve/main/tokenizer_config.json", + "facebook/wav2vec2-base-960h": ( + "https://huggingface.co/facebook/wav2vec2-base-960h/resolve/main/tokenizer_config.json" + ), }, } @@ -717,7 +719,9 @@ class Wav2Vec2Tokenizer(PreTrainedTokenizer): "facebook/wav2vec2-base-960h": "https://huggingface.co/facebook/wav2vec2-base-960h/resolve/main/vocab.json" }, "tokenizer_config_file": { - "facebook/wav2vec2-base-960h": "https://huggingface.co/facebook/wav2vec2-base-960h/resolve/main/tokenizer.json", + "facebook/wav2vec2-base-960h": ( + "https://huggingface.co/facebook/wav2vec2-base-960h/resolve/main/tokenizer.json" + ), }, } model_input_names = ["input_values", "attention_mask"] @@ -748,7 +752,8 @@ class Wav2Vec2Tokenizer(PreTrainedTokenizer): ) warnings.warn( - "The class `Wav2Vec2Tokenizer` is deprecated and will be removed in version 5 of Transformers. Please use `Wav2Vec2Processor` or `Wav2Vec2CTCTokenizer` instead.", + "The class `Wav2Vec2Tokenizer` is deprecated and will be removed in version 5 of Transformers. Please use" + " `Wav2Vec2Processor` or `Wav2Vec2CTCTokenizer` instead.", FutureWarning, ) diff --git a/src/transformers/models/wav2vec2_phoneme/tokenization_wav2vec2_phoneme.py b/src/transformers/models/wav2vec2_phoneme/tokenization_wav2vec2_phoneme.py index 6bd355645e..7f4d29147f 100644 --- a/src/transformers/models/wav2vec2_phoneme/tokenization_wav2vec2_phoneme.py +++ b/src/transformers/models/wav2vec2_phoneme/tokenization_wav2vec2_phoneme.py @@ -55,10 +55,14 @@ VOCAB_FILES_NAMES = { PRETRAINED_VOCAB_FILES_MAP = { "vocab_file": { - "facebook/wav2vec2-lv-60-espeak-cv-ft": "https://huggingface.co/facebook/wav2vec2-lv-60-espeak-cv-ft/resolve/main/vocab.json", + "facebook/wav2vec2-lv-60-espeak-cv-ft": ( + "https://huggingface.co/facebook/wav2vec2-lv-60-espeak-cv-ft/resolve/main/vocab.json" + ), }, "tokenizer_config_file": { - "facebook/wav2vec2-lv-60-espeak-cv-ft": "https://huggingface.co/facebook/wav2vec2-lv-60-espeak-cv-ft/resolve/main/tokenizer_config.json", + "facebook/wav2vec2-lv-60-espeak-cv-ft": ( + "https://huggingface.co/facebook/wav2vec2-lv-60-espeak-cv-ft/resolve/main/tokenizer_config.json" + ), }, } @@ -369,7 +373,7 @@ class Wav2Vec2PhonemeCTCTokenizer(PreTrainedTokenizer): if len(char_offsets) != len(processed_chars): raise ValueError( f"`char_offsets`: {char_offsets} and `processed_tokens`: {processed_chars}" - f" have to be of the same length, but are: `len(offsets)`: " + " have to be of the same length, but are: `len(offsets)`: " f"{len(char_offsets)} and `len(processed_tokens)`: {len(processed_chars)}" ) @@ -600,7 +604,7 @@ class Wav2Vec2PhonemeCTCTokenizer(PreTrainedTokenizer): tokens_to_add = [] for token in new_tokens: if not isinstance(token, str): - raise ValueError(f"Token {token} has to be of type string, but is " f"of type {type(token)}.") + raise ValueError(f"Token {token} has to be of type string, but is of type {type(token)}.") assert isinstance(token, str) if ( token != self.unk_token diff --git a/src/transformers/models/wavlm/configuration_wavlm.py b/src/transformers/models/wavlm/configuration_wavlm.py index d7f0b70470..a1906258d9 100644 --- a/src/transformers/models/wavlm/configuration_wavlm.py +++ b/src/transformers/models/wavlm/configuration_wavlm.py @@ -290,10 +290,10 @@ class WavLMConfig(PretrainedConfig): or (len(self.conv_dim) != self.num_feat_extract_layers) ): raise ValueError( - "Configuration for convolutional layers is incorrect. " - "It is required that `len(config.conv_dim)` == `len(config.conv_stride)` == `len(config.conv_kernel)`, " - f"but is `len(config.conv_dim) = {len(self.conv_dim)}`, `len(config.conv_stride) " - f"= {len(self.conv_stride)}`, `len(config.conv_kernel) = {len(self.conv_kernel)}`." + "Configuration for convolutional layers is incorrect. It is required that `len(config.conv_dim)` ==" + " `len(config.conv_stride)` == `len(config.conv_kernel)`, but is `len(config.conv_dim) =" + f" {len(self.conv_dim)}`, `len(config.conv_stride) = {len(self.conv_stride)}`," + f" `len(config.conv_kernel) = {len(self.conv_kernel)}`." ) # fine-tuning config parameters for SpecAugment: https://arxiv.org/abs/1904.08779 diff --git a/src/transformers/models/wavlm/convert_wavlm_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/wavlm/convert_wavlm_original_pytorch_checkpoint_to_pytorch.py index 8523fa87eb..91758cc959 100644 --- a/src/transformers/models/wavlm/convert_wavlm_original_pytorch_checkpoint_to_pytorch.py +++ b/src/transformers/models/wavlm/convert_wavlm_original_pytorch_checkpoint_to_pytorch.py @@ -74,9 +74,10 @@ def set_recursively(hf_pointer, key, value, full_name, weight_type): else: hf_shape = hf_pointer.shape - assert ( - hf_shape == value.shape - ), f"Shape of hf {key + '.' + weight_type if weight_type is not None else ''} is {hf_shape}, but should be {value.shape} for {full_name}" + assert hf_shape == value.shape, ( + f"Shape of hf {key + '.' + weight_type if weight_type is not None else ''} is {hf_shape}, but should be" + f" {value.shape} for {full_name}" + ) if weight_type == "weight": hf_pointer.weight.data = value @@ -144,28 +145,32 @@ def load_conv_layer(full_name, value, feature_extractor, unused_weights, use_gro if type_id == 0: if "bias" in name: - assert ( - value.shape == feature_extractor.conv_layers[layer_id].conv.bias.data.shape - ), f"{full_name} has size {value.shape}, but {feature_extractor.conv_layers[layer_id].conv.bias.data.shape} was found." + assert value.shape == feature_extractor.conv_layers[layer_id].conv.bias.data.shape, ( + f"{full_name} has size {value.shape}, but" + f" {feature_extractor.conv_layers[layer_id].conv.bias.data.shape} was found." + ) feature_extractor.conv_layers[layer_id].conv.bias.data = value logger.info(f"Feat extract conv layer {layer_id} was initialized from {full_name}.") elif "weight" in name: - assert ( - value.shape == feature_extractor.conv_layers[layer_id].conv.weight.data.shape - ), f"{full_name} has size {value.shape}, but {feature_extractor.conv_layers[layer_id].conv.weight.data.shape} was found." + assert value.shape == feature_extractor.conv_layers[layer_id].conv.weight.data.shape, ( + f"{full_name} has size {value.shape}, but" + f" {feature_extractor.conv_layers[layer_id].conv.weight.data.shape} was found." + ) feature_extractor.conv_layers[layer_id].conv.weight.data = value logger.info(f"Feat extract conv layer {layer_id} was initialized from {full_name}.") elif (type_id == 2 and not use_group_norm) or (type_id == 2 and layer_id == 0 and use_group_norm): if "bias" in name: - assert ( - value.shape == feature_extractor.conv_layers[layer_id].layer_norm.bias.data.shape - ), f"{full_name} has size {value.shape}, but {feature_extractor[layer_id].layer_norm.bias.data.shape} was found." + assert value.shape == feature_extractor.conv_layers[layer_id].layer_norm.bias.data.shape, ( + f"{full_name} has size {value.shape}, but {feature_extractor[layer_id].layer_norm.bias.data.shape} was" + " found." + ) feature_extractor.conv_layers[layer_id].layer_norm.bias.data = value logger.info(f"Feat extract layer norm weight of layer {layer_id} was initialized from {full_name}.") elif "weight" in name: - assert ( - value.shape == feature_extractor.conv_layers[layer_id].layer_norm.weight.data.shape - ), f"{full_name} has size {value.shape}, but {feature_extractor[layer_id].layer_norm.weight.data.shape} was found." + assert value.shape == feature_extractor.conv_layers[layer_id].layer_norm.weight.data.shape, ( + f"{full_name} has size {value.shape}, but" + f" {feature_extractor[layer_id].layer_norm.weight.data.shape} was found." + ) feature_extractor.conv_layers[layer_id].layer_norm.weight.data = value logger.info(f"Feat extract layer norm weight of layer {layer_id} was initialized from {full_name}.") else: diff --git a/src/transformers/models/xglm/modeling_xglm.py b/src/transformers/models/xglm/modeling_xglm.py index f26c7fa818..4047958d4f 100755 --- a/src/transformers/models/xglm/modeling_xglm.py +++ b/src/transformers/models/xglm/modeling_xglm.py @@ -330,7 +330,8 @@ class XGLMAttention(nn.Module): if attn_weights.size() != (bsz * self.num_heads, tgt_len, src_len): raise ValueError( - f"Attention weights should be of size {(bsz * self.num_heads, tgt_len, src_len)}, but is {attn_weights.size()}" + f"Attention weights should be of size {(bsz * self.num_heads, tgt_len, src_len)}, but is" + f" {attn_weights.size()}" ) if attention_mask is not None: @@ -346,7 +347,8 @@ class XGLMAttention(nn.Module): if layer_head_mask is not None: if layer_head_mask.size() != (self.num_heads,): raise ValueError( - f"Head mask for a single layer should be of size {(self.num_heads,)}, but is {layer_head_mask.size()}" + f"Head mask for a single layer should be of size {(self.num_heads,)}, but is" + f" {layer_head_mask.size()}" ) attn_weights = layer_head_mask.view(1, -1, 1, 1) * attn_weights.view(bsz, self.num_heads, tgt_len, src_len) attn_weights = attn_weights.view(bsz * self.num_heads, tgt_len, src_len) @@ -367,7 +369,8 @@ class XGLMAttention(nn.Module): if attn_output.size() != (bsz * self.num_heads, tgt_len, self.head_dim): raise ValueError( - f"`attn_output` should be of size {(bsz, self.num_heads, tgt_len, self.head_dim)}, but is {attn_output.size()}" + f"`attn_output` should be of size {(bsz, self.num_heads, tgt_len, self.head_dim)}, but is" + f" {attn_output.size()}" ) attn_output = attn_output.view(bsz, self.num_heads, tgt_len, self.head_dim) @@ -722,7 +725,8 @@ class XGLMModel(XGLMPreTrainedModel): if attn_mask is not None: if attn_mask.size()[0] != len(self.layers): raise ValueError( - f"The `{mask_name}` should be specified for {len(self.layers)} layers, but it is for {head_mask.size()[0]}." + f"The `{mask_name}` should be specified for {len(self.layers)} layers, but it is for" + f" {head_mask.size()[0]}." ) for idx, decoder_layer in enumerate(self.layers): # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description) @@ -738,7 +742,8 @@ class XGLMModel(XGLMPreTrainedModel): if use_cache: logger.warning( - "`use_cache = True` is incompatible with gradient checkpointing`. Setting `use_cache = False`..." + "`use_cache = True` is incompatible with gradient checkpointing`. Setting `use_cache =" + " False`..." ) use_cache = False diff --git a/src/transformers/models/xlm/tokenization_xlm.py b/src/transformers/models/xlm/tokenization_xlm.py index f6c94f11ae..6b72cf113b 100644 --- a/src/transformers/models/xlm/tokenization_xlm.py +++ b/src/transformers/models/xlm/tokenization_xlm.py @@ -697,7 +697,8 @@ class XLMTokenizer(PreTrainedTokenizer): ) except (AttributeError, ImportError): logger.error( - "Make sure you install KyTea (https://github.com/neubig/kytea) and it's python wrapper (https://github.com/chezou/Mykytea-python) with the following steps" + "Make sure you install KyTea (https://github.com/neubig/kytea) and it's python wrapper" + " (https://github.com/chezou/Mykytea-python) with the following steps" ) logger.error("1. git clone git@github.com:neubig/kytea.git && cd kytea") logger.error("2. autoreconf -i") @@ -801,7 +802,8 @@ class XLMTokenizer(PreTrainedTokenizer): """ if lang and self.lang2id and lang not in self.lang2id: logger.error( - "Supplied language code not found in lang2id mapping. Please check that your language is supported by the loaded pretrained model." + "Supplied language code not found in lang2id mapping. Please check that your language is supported by" + " the loaded pretrained model." ) if bypass_tokenizer: text = text.split() diff --git a/src/transformers/models/xlm_prophetnet/configuration_xlm_prophetnet.py b/src/transformers/models/xlm_prophetnet/configuration_xlm_prophetnet.py index 2c3d21bd28..3025ed29f6 100644 --- a/src/transformers/models/xlm_prophetnet/configuration_xlm_prophetnet.py +++ b/src/transformers/models/xlm_prophetnet/configuration_xlm_prophetnet.py @@ -22,7 +22,9 @@ from ..prophetnet.configuration_prophetnet import ProphetNetConfig logger = logging.get_logger(__name__) XLM_PROPHETNET_PRETRAINED_CONFIG_ARCHIVE_MAP = { - "microsoft/xprophetnet-large-wiki100-cased": "https://huggingface.co/microsoft/xprophetnet-large-wiki100-cased/resolve/main/config.json", + "microsoft/xprophetnet-large-wiki100-cased": ( + "https://huggingface.co/microsoft/xprophetnet-large-wiki100-cased/resolve/main/config.json" + ), } diff --git a/src/transformers/models/xlm_prophetnet/tokenization_xlm_prophetnet.py b/src/transformers/models/xlm_prophetnet/tokenization_xlm_prophetnet.py index 48f68238f1..af83082879 100644 --- a/src/transformers/models/xlm_prophetnet/tokenization_xlm_prophetnet.py +++ b/src/transformers/models/xlm_prophetnet/tokenization_xlm_prophetnet.py @@ -30,7 +30,9 @@ VOCAB_FILES_NAMES = {"vocab_file": "prophetnet.tokenizer"} PRETRAINED_VOCAB_FILES_MAP = { "vocab_file": { - "microsoft/xprophetnet-large-wiki100-cased": "https://huggingface.co/microsoft/xprophetnet-large-wiki100-cased/resolve/main/prophetnet.tokenizer", + "microsoft/xprophetnet-large-wiki100-cased": ( + "https://huggingface.co/microsoft/xprophetnet-large-wiki100-cased/resolve/main/prophetnet.tokenizer" + ), } } @@ -159,8 +161,8 @@ class XLMProphetNetTokenizer(PreTrainedTokenizer): import sentencepiece as spm except ImportError: logger.warning( - "You need to install SentencePiece to use XLMRobertaTokenizer: https://github.com/google/sentencepiece " - "pip install sentencepiece" + "You need to install SentencePiece to use XLMRobertaTokenizer: https://github.com/google/sentencepiece" + " pip install sentencepiece" ) raise @@ -198,8 +200,8 @@ class XLMProphetNetTokenizer(PreTrainedTokenizer): import sentencepiece as spm except ImportError: logger.warning( - "You need to install SentencePiece to use XLMRobertaTokenizer: https://github.com/google/sentencepiece " - "pip install sentencepiece" + "You need to install SentencePiece to use XLMRobertaTokenizer: https://github.com/google/sentencepiece" + " pip install sentencepiece" ) raise diff --git a/src/transformers/models/xlm_roberta/configuration_xlm_roberta.py b/src/transformers/models/xlm_roberta/configuration_xlm_roberta.py index c1469bfca4..194b38a8c1 100644 --- a/src/transformers/models/xlm_roberta/configuration_xlm_roberta.py +++ b/src/transformers/models/xlm_roberta/configuration_xlm_roberta.py @@ -27,10 +27,18 @@ logger = logging.get_logger(__name__) XLM_ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP = { "xlm-roberta-base": "https://huggingface.co/xlm-roberta-base/resolve/main/config.json", "xlm-roberta-large": "https://huggingface.co/xlm-roberta-large/resolve/main/config.json", - "xlm-roberta-large-finetuned-conll02-dutch": "https://huggingface.co/xlm-roberta-large-finetuned-conll02-dutch/resolve/main/config.json", - "xlm-roberta-large-finetuned-conll02-spanish": "https://huggingface.co/xlm-roberta-large-finetuned-conll02-spanish/resolve/main/config.json", - "xlm-roberta-large-finetuned-conll03-english": "https://huggingface.co/xlm-roberta-large-finetuned-conll03-english/resolve/main/config.json", - "xlm-roberta-large-finetuned-conll03-german": "https://huggingface.co/xlm-roberta-large-finetuned-conll03-german/resolve/main/config.json", + "xlm-roberta-large-finetuned-conll02-dutch": ( + "https://huggingface.co/xlm-roberta-large-finetuned-conll02-dutch/resolve/main/config.json" + ), + "xlm-roberta-large-finetuned-conll02-spanish": ( + "https://huggingface.co/xlm-roberta-large-finetuned-conll02-spanish/resolve/main/config.json" + ), + "xlm-roberta-large-finetuned-conll03-english": ( + "https://huggingface.co/xlm-roberta-large-finetuned-conll03-english/resolve/main/config.json" + ), + "xlm-roberta-large-finetuned-conll03-german": ( + "https://huggingface.co/xlm-roberta-large-finetuned-conll03-german/resolve/main/config.json" + ), } diff --git a/src/transformers/models/xlm_roberta/tokenization_xlm_roberta.py b/src/transformers/models/xlm_roberta/tokenization_xlm_roberta.py index 072933a12e..40928d8dc3 100644 --- a/src/transformers/models/xlm_roberta/tokenization_xlm_roberta.py +++ b/src/transformers/models/xlm_roberta/tokenization_xlm_roberta.py @@ -35,10 +35,18 @@ PRETRAINED_VOCAB_FILES_MAP = { "vocab_file": { "xlm-roberta-base": "https://huggingface.co/xlm-roberta-base/resolve/main/sentencepiece.bpe.model", "xlm-roberta-large": "https://huggingface.co/xlm-roberta-large/resolve/main/sentencepiece.bpe.model", - "xlm-roberta-large-finetuned-conll02-dutch": "https://huggingface.co/xlm-roberta-large-finetuned-conll02-dutch/resolve/main/sentencepiece.bpe.model", - "xlm-roberta-large-finetuned-conll02-spanish": "https://huggingface.co/xlm-roberta-large-finetuned-conll02-spanish/resolve/main/sentencepiece.bpe.model", - "xlm-roberta-large-finetuned-conll03-english": "https://huggingface.co/xlm-roberta-large-finetuned-conll03-english/resolve/main/sentencepiece.bpe.model", - "xlm-roberta-large-finetuned-conll03-german": "https://huggingface.co/xlm-roberta-large-finetuned-conll03-german/resolve/main/sentencepiece.bpe.model", + "xlm-roberta-large-finetuned-conll02-dutch": ( + "https://huggingface.co/xlm-roberta-large-finetuned-conll02-dutch/resolve/main/sentencepiece.bpe.model" + ), + "xlm-roberta-large-finetuned-conll02-spanish": ( + "https://huggingface.co/xlm-roberta-large-finetuned-conll02-spanish/resolve/main/sentencepiece.bpe.model" + ), + "xlm-roberta-large-finetuned-conll03-english": ( + "https://huggingface.co/xlm-roberta-large-finetuned-conll03-english/resolve/main/sentencepiece.bpe.model" + ), + "xlm-roberta-large-finetuned-conll03-german": ( + "https://huggingface.co/xlm-roberta-large-finetuned-conll03-german/resolve/main/sentencepiece.bpe.model" + ), } } diff --git a/src/transformers/models/xlm_roberta/tokenization_xlm_roberta_fast.py b/src/transformers/models/xlm_roberta/tokenization_xlm_roberta_fast.py index 119d2fa080..f99e3c086a 100644 --- a/src/transformers/models/xlm_roberta/tokenization_xlm_roberta_fast.py +++ b/src/transformers/models/xlm_roberta/tokenization_xlm_roberta_fast.py @@ -38,18 +38,34 @@ PRETRAINED_VOCAB_FILES_MAP = { "vocab_file": { "xlm-roberta-base": "https://huggingface.co/xlm-roberta-base/resolve/main/sentencepiece.bpe.model", "xlm-roberta-large": "https://huggingface.co/xlm-roberta-large/resolve/main/sentencepiece.bpe.model", - "xlm-roberta-large-finetuned-conll02-dutch": "https://huggingface.co/xlm-roberta-large-finetuned-conll02-dutch/resolve/main/sentencepiece.bpe.model", - "xlm-roberta-large-finetuned-conll02-spanish": "https://huggingface.co/xlm-roberta-large-finetuned-conll02-spanish/resolve/main/sentencepiece.bpe.model", - "xlm-roberta-large-finetuned-conll03-english": "https://huggingface.co/xlm-roberta-large-finetuned-conll03-english/resolve/main/sentencepiece.bpe.model", - "xlm-roberta-large-finetuned-conll03-german": "https://huggingface.co/xlm-roberta-large-finetuned-conll03-german/resolve/main/sentencepiece.bpe.model", + "xlm-roberta-large-finetuned-conll02-dutch": ( + "https://huggingface.co/xlm-roberta-large-finetuned-conll02-dutch/resolve/main/sentencepiece.bpe.model" + ), + "xlm-roberta-large-finetuned-conll02-spanish": ( + "https://huggingface.co/xlm-roberta-large-finetuned-conll02-spanish/resolve/main/sentencepiece.bpe.model" + ), + "xlm-roberta-large-finetuned-conll03-english": ( + "https://huggingface.co/xlm-roberta-large-finetuned-conll03-english/resolve/main/sentencepiece.bpe.model" + ), + "xlm-roberta-large-finetuned-conll03-german": ( + "https://huggingface.co/xlm-roberta-large-finetuned-conll03-german/resolve/main/sentencepiece.bpe.model" + ), }, "tokenizer_file": { "xlm-roberta-base": "https://huggingface.co/xlm-roberta-base/resolve/main/tokenizer.json", "xlm-roberta-large": "https://huggingface.co/xlm-roberta-large/resolve/main/tokenizer.json", - "xlm-roberta-large-finetuned-conll02-dutch": "https://huggingface.co/xlm-roberta-large-finetuned-conll02-dutch/resolve/main/tokenizer.json", - "xlm-roberta-large-finetuned-conll02-spanish": "https://huggingface.co/xlm-roberta-large-finetuned-conll02-spanish/resolve/main/tokenizer.json", - "xlm-roberta-large-finetuned-conll03-english": "https://huggingface.co/xlm-roberta-large-finetuned-conll03-english/resolve/main/tokenizer.json", - "xlm-roberta-large-finetuned-conll03-german": "https://huggingface.co/xlm-roberta-large-finetuned-conll03-german/resolve/main/tokenizer.json", + "xlm-roberta-large-finetuned-conll02-dutch": ( + "https://huggingface.co/xlm-roberta-large-finetuned-conll02-dutch/resolve/main/tokenizer.json" + ), + "xlm-roberta-large-finetuned-conll02-spanish": ( + "https://huggingface.co/xlm-roberta-large-finetuned-conll02-spanish/resolve/main/tokenizer.json" + ), + "xlm-roberta-large-finetuned-conll03-english": ( + "https://huggingface.co/xlm-roberta-large-finetuned-conll03-english/resolve/main/tokenizer.json" + ), + "xlm-roberta-large-finetuned-conll03-german": ( + "https://huggingface.co/xlm-roberta-large-finetuned-conll03-german/resolve/main/tokenizer.json" + ), }, } diff --git a/src/transformers/models/xlm_roberta_xl/modeling_xlm_roberta_xl.py b/src/transformers/models/xlm_roberta_xl/modeling_xlm_roberta_xl.py index ab46aa8f03..70dd422157 100644 --- a/src/transformers/models/xlm_roberta_xl/modeling_xlm_roberta_xl.py +++ b/src/transformers/models/xlm_roberta_xl/modeling_xlm_roberta_xl.py @@ -415,7 +415,8 @@ class XLMRobertaXLLayer(nn.Module): if self.is_decoder and encoder_hidden_states is not None: if not hasattr(self, "crossattention"): raise ValueError( - f"If `encoder_hidden_states` are passed, {self} has to be instantiated with cross-attention layers by setting `config.add_cross_attention=True`" + f"If `encoder_hidden_states` are passed, {self} has to be instantiated with cross-attention layers" + " by setting `config.add_cross_attention=True`" ) # cross_attn cached key/values tuple is at positions 3,4 of past_key_value tuple diff --git a/src/transformers/models/xlnet/configuration_xlnet.py b/src/transformers/models/xlnet/configuration_xlnet.py index bc6f0f6835..5448f9248c 100644 --- a/src/transformers/models/xlnet/configuration_xlnet.py +++ b/src/transformers/models/xlnet/configuration_xlnet.py @@ -219,7 +219,8 @@ class XLNetConfig(PretrainedConfig): if "use_cache" in kwargs: warnings.warn( - "The `use_cache` argument is deprecated and will be removed in a future version, use `use_mems_eval` instead.", + "The `use_cache` argument is deprecated and will be removed in a future version, use `use_mems_eval`" + " instead.", FutureWarning, ) use_mems_eval = kwargs["use_cache"] diff --git a/src/transformers/models/xlnet/convert_xlnet_original_tf_checkpoint_to_pytorch.py b/src/transformers/models/xlnet/convert_xlnet_original_tf_checkpoint_to_pytorch.py index f6fc73ca0e..804b52b0dc 100755 --- a/src/transformers/models/xlnet/convert_xlnet_original_tf_checkpoint_to_pytorch.py +++ b/src/transformers/models/xlnet/convert_xlnet_original_tf_checkpoint_to_pytorch.py @@ -88,8 +88,10 @@ if __name__ == "__main__": default=None, type=str, required=True, - help="The config json file corresponding to the pre-trained XLNet model. \n" - "This specifies the model architecture.", + help=( + "The config json file corresponding to the pre-trained XLNet model. \n" + "This specifies the model architecture." + ), ) parser.add_argument( "--pytorch_dump_folder_path", diff --git a/src/transformers/models/xlnet/modeling_xlnet.py b/src/transformers/models/xlnet/modeling_xlnet.py index dc7f78eeb8..3226773e7f 100755 --- a/src/transformers/models/xlnet/modeling_xlnet.py +++ b/src/transformers/models/xlnet/modeling_xlnet.py @@ -1092,7 +1092,8 @@ class XLNetModel(XLNetPreTrainedModel): if "use_cache" in kwargs: warnings.warn( - "The `use_cache` argument is deprecated and will be removed in a future version, use `use_mems` instead.", + "The `use_cache` argument is deprecated and will be removed in a future version, use `use_mems`" + " instead.", FutureWarning, ) use_mems = kwargs["use_cache"] diff --git a/src/transformers/models/yolos/convert_yolos_to_pytorch.py b/src/transformers/models/yolos/convert_yolos_to_pytorch.py index add0ae772d..7f4161a632 100644 --- a/src/transformers/models/yolos/convert_yolos_to_pytorch.py +++ b/src/transformers/models/yolos/convert_yolos_to_pytorch.py @@ -247,7 +247,10 @@ if __name__ == "__main__": "--yolos_name", default="yolos_s_200_pre", type=str, - help="Name of the YOLOS model you'd like to convert. Should be one of 'yolos_ti', 'yolos_s_200_pre', 'yolos_s_300_pre', 'yolos_s_dWr', 'yolos_base'.", + help=( + "Name of the YOLOS model you'd like to convert. Should be one of 'yolos_ti', 'yolos_s_200_pre'," + " 'yolos_s_300_pre', 'yolos_s_dWr', 'yolos_base'." + ), ) parser.add_argument( "--checkpoint_path", default=None, type=str, help="Path to the original state dict (.pth file)." diff --git a/src/transformers/models/yolos/feature_extraction_yolos.py b/src/transformers/models/yolos/feature_extraction_yolos.py index 76b64ec837..e199d1ae7b 100644 --- a/src/transformers/models/yolos/feature_extraction_yolos.py +++ b/src/transformers/models/yolos/feature_extraction_yolos.py @@ -537,7 +537,8 @@ class YolosFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMixin) valid_masks_path = True if not valid_masks_path: raise ValueError( - "The path to the directory containing the mask PNG files should be provided as a `pathlib.Path` object." + "The path to the directory containing the mask PNG files should be provided as a" + " `pathlib.Path` object." ) if not is_batched: diff --git a/src/transformers/onnx/config.py b/src/transformers/onnx/config.py index 19f848b686..f97d61ea40 100644 --- a/src/transformers/onnx/config.py +++ b/src/transformers/onnx/config.py @@ -293,7 +293,8 @@ class OnnxConfig(ABC): raise ValueError("You cannot provide both a tokenizer and a preprocessor to generate dummy inputs.") if tokenizer is not None: warnings.warn( - "The `tokenizer` argument is deprecated and will be removed in version 5 of Transformers. Use `preprocessor` instead.", + "The `tokenizer` argument is deprecated and will be removed in version 5 of Transformers. Use" + " `preprocessor` instead.", FutureWarning, ) logger.warning("Overwriting the `preprocessor` argument with `tokenizer` to generate dummmy inputs.") @@ -410,7 +411,8 @@ class OnnxConfigWithPast(OnnxConfig, ABC): """ if not hasattr(self._config, "num_layers"): raise AttributeError( - "could not find the number of layers attribute in the model configuration, override the num_layers property of the model OnnxConfig to solve this" + "could not find the number of layers attribute in the model configuration, override the num_layers" + " property of the model OnnxConfig to solve this" ) return self._config.num_layers @@ -422,7 +424,8 @@ class OnnxConfigWithPast(OnnxConfig, ABC): """ if not hasattr(self._config, "num_attention_heads"): raise AttributeError( - "could not find the number of attention heads attribute in the model configuration, override the num_attention_heads property of the model OnnxConfig to solve this" + "could not find the number of attention heads attribute in the model configuration, override the" + " num_attention_heads property of the model OnnxConfig to solve this" ) return self._config.num_attention_heads @@ -530,7 +533,8 @@ class OnnxSeq2SeqConfigWithPast(OnnxConfigWithPast): num_layers = (self._config.encoder_layers, self._config.decoder_layers) else: raise AttributeError( - "could not find the number of encoder and decoder layers attributes in the model configuration, override the num_layers property of the model OnnxConfig to solve this" + "could not find the number of encoder and decoder layers attributes in the model configuration," + " override the num_layers property of the model OnnxConfig to solve this" ) return num_layers @@ -545,7 +549,9 @@ class OnnxSeq2SeqConfigWithPast(OnnxConfigWithPast): num_attention_heads = (self._config.encoder_attention_heads, self._config.decoder_attention_heads) else: raise AttributeError( - "could not find the number of attention heads for the encoder and the decoder attributes in the model configuration, override the num_attention_heads property of the model OnnxConfig to solve this" + "could not find the number of attention heads for the encoder and the decoder attributes in the" + " model configuration, override the num_attention_heads property of the model OnnxConfig to solve" + " this" ) return num_attention_heads diff --git a/src/transformers/onnx/convert.py b/src/transformers/onnx/convert.py index 69aca2a43a..2f1789bbdc 100644 --- a/src/transformers/onnx/convert.py +++ b/src/transformers/onnx/convert.py @@ -68,7 +68,7 @@ def check_onnxruntime_requirements(minimum_version: Version): raise ImportError( f"We found an older version of onnxruntime ({onnxruntime.__version__}) " f"but we require onnxruntime to be >= {minimum_version} to enable all the conversions options.\n" - f"Please update onnxruntime by running `pip install --upgrade onnxruntime`" + "Please update onnxruntime by running `pip install --upgrade onnxruntime`" ) except ImportError: @@ -111,7 +111,8 @@ def export_pytorch( raise ValueError("You cannot provide both a tokenizer and a preprocessor to export the model.") if tokenizer is not None: warnings.warn( - "The `tokenizer` argument is deprecated and will be removed in version 5 of Transformers. Use `preprocessor` instead.", + "The `tokenizer` argument is deprecated and will be removed in version 5 of Transformers. Use" + " `preprocessor` instead.", FutureWarning, ) logger.info("Overwriting the `preprocessor` argument with `tokenizer` to generate dummmy inputs.") @@ -168,9 +169,13 @@ def export_pytorch( message = str(err) if ( message - == "Exporting model exceed maximum protobuf size of 2GB. Please call torch.onnx.export without setting use_external_data_format parameter." + == "Exporting model exceed maximum protobuf size of 2GB. Please call torch.onnx.export without" + " setting use_external_data_format parameter." ): - message = "Exporting model exceed maximum protobuf size of 2GB. Please call torch.onnx.export without setting use_external_data_format parameter or try with torch 1.10+." + message = ( + "Exporting model exceed maximum protobuf size of 2GB. Please call torch.onnx.export" + " without setting use_external_data_format parameter or try with torch 1.10+." + ) raise RuntimeError(message) else: raise err @@ -227,7 +232,8 @@ def export_tensorflow( raise ValueError("You cannot provide both a tokenizer and preprocessor to export the model.") if tokenizer is not None: warnings.warn( - "The `tokenizer` argument is deprecated and will be removed in version 5 of Transformers. Use `preprocessor` instead.", + "The `tokenizer` argument is deprecated and will be removed in version 5 of Transformers. Use" + " `preprocessor` instead.", FutureWarning, ) logger.info("Overwriting the `preprocessor` argument with `tokenizer` to generate dummmy inputs.") @@ -292,7 +298,8 @@ def export( raise ValueError("You cannot provide both a tokenizer and a preprocessor to export the model.") if tokenizer is not None: warnings.warn( - "The `tokenizer` argument is deprecated and will be removed in version 5 of Transformers. Use `preprocessor` instead.", + "The `tokenizer` argument is deprecated and will be removed in version 5 of Transformers. Use" + " `preprocessor` instead.", FutureWarning, ) logger.info("Overwriting the `preprocessor` argument with `tokenizer` to generate dummmy inputs.") @@ -306,7 +313,8 @@ def export( if not config.is_torch_support_available: logger.warning( - f"Unsupported PyTorch version for this model. Minimum required is {config.torch_onnx_minimum_version}, got: {torch_version}" + f"Unsupported PyTorch version for this model. Minimum required is {config.torch_onnx_minimum_version}," + f" got: {torch_version}" ) if is_torch_available() and issubclass(type(model), PreTrainedModel): @@ -332,7 +340,8 @@ def validate_model_outputs( raise ValueError("You cannot provide both a tokenizer and a preprocessor to validatethe model outputs.") if tokenizer is not None: warnings.warn( - "The `tokenizer` argument is deprecated and will be removed in version 5 of Transformers. Use `preprocessor` instead.", + "The `tokenizer` argument is deprecated and will be removed in version 5 of Transformers. Use" + " `preprocessor` instead.", FutureWarning, ) logger.info("Overwriting the `preprocessor` argument with `tokenizer` to generate dummmy inputs.") diff --git a/src/transformers/onnx/features.py b/src/transformers/onnx/features.py index c75cef897c..23c5754620 100644 --- a/src/transformers/onnx/features.py +++ b/src/transformers/onnx/features.py @@ -65,7 +65,8 @@ if is_tf_available(): ) if not is_torch_available() and not is_tf_available(): logger.warning( - "The ONNX export features are only supported for PyTorch or TensorFlow. You will not be able to export models without one of these libraries installed." + "The ONNX export features are only supported for PyTorch or TensorFlow. You will not be able to export models" + " without one of these libraries installed." ) @@ -443,8 +444,7 @@ class FeaturesManager: task_to_automodel = FeaturesManager._TASKS_TO_TF_AUTOMODELS if task not in task_to_automodel: raise KeyError( - f"Unknown task: {feature}. " - f"Possible values are {list(FeaturesManager._TASKS_TO_AUTOMODELS.values())}" + f"Unknown task: {feature}. Possible values are {list(FeaturesManager._TASKS_TO_AUTOMODELS.values())}" ) return task_to_automodel[task] @@ -497,8 +497,7 @@ class FeaturesManager: model_features = FeaturesManager.get_supported_features_for_model_type(model_type, model_name=model_name) if feature not in model_features: raise ValueError( - f"{model.config.model_type} doesn't support feature {feature}. " - f"Supported values are: {model_features}" + f"{model.config.model_type} doesn't support feature {feature}. Supported values are: {model_features}" ) return model.config.model_type, FeaturesManager._SUPPORTED_MODEL_TYPE[model_type][feature] diff --git a/src/transformers/optimization.py b/src/transformers/optimization.py index 60b9dca783..b957acb6de 100644 --- a/src/transformers/optimization.py +++ b/src/transformers/optimization.py @@ -304,8 +304,9 @@ class AdamW(Optimizer): ): if not no_deprecation_warning: warnings.warn( - "This implementation of AdamW is deprecated and will be removed in a future version. Use the" - " PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning", + "This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch" + " implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this" + " warning", FutureWarning, ) require_version("torch>=1.5.0") # add_ with alpha diff --git a/src/transformers/pipelines/__init__.py b/src/transformers/pipelines/__init__.py index 1350669e45..de55d499ef 100755 --- a/src/transformers/pipelines/__init__.py +++ b/src/transformers/pipelines/__init__.py @@ -500,15 +500,15 @@ def pipeline( if model is None and tokenizer is not None: raise RuntimeError( - "Impossible to instantiate a pipeline with tokenizer specified but not the model " - "as the provided tokenizer may not be compatible with the default model. " - "Please provide a PreTrainedModel class or a path/identifier to a pretrained model when providing tokenizer." + "Impossible to instantiate a pipeline with tokenizer specified but not the model as the provided tokenizer" + " may not be compatible with the default model. Please provide a PreTrainedModel class or a" + " path/identifier to a pretrained model when providing tokenizer." ) if model is None and feature_extractor is not None: raise RuntimeError( - "Impossible to instantiate a pipeline with feature_extractor specified but not the model " - "as the provided feature_extractor may not be compatible with the default model. " - "Please provide a PreTrainedModel class or a path/identifier to a pretrained model when providing feature_extractor." + "Impossible to instantiate a pipeline with feature_extractor specified but not the model as the provided" + " feature_extractor may not be compatible with the default model. Please provide a PreTrainedModel class" + " or a path/identifier to a pretrained model when providing feature_extractor." ) if task is None and model is not None: @@ -642,7 +642,9 @@ def pipeline( kwargs["decoder"] = decoder except ImportError as e: logger.warning( - f"Could not load the `decoder` for {model_name}. Defaulting to raw CTC. Try to install `pyctcdecode` and `kenlm`: (`pip install pyctcdecode`, `pip install https://github.com/kpu/kenlm/archive/master.zip`): Error: {e}" + f"Could not load the `decoder` for {model_name}. Defaulting to raw CTC. Try to install" + " `pyctcdecode` and `kenlm`: (`pip install pyctcdecode`, `pip install" + f" https://github.com/kpu/kenlm/archive/master.zip`): Error: {e}" ) if task == "translation" and model.config.task_specific_params: diff --git a/src/transformers/pipelines/base.py b/src/transformers/pipelines/base.py index d54a17df1e..a33089547f 100644 --- a/src/transformers/pipelines/base.py +++ b/src/transformers/pipelines/base.py @@ -139,7 +139,8 @@ def pad_collate_fn(tokenizer, feature_extractor): for item in items: if set(item.keys()) != keys: raise ValueError( - f"The elements of the batch contain different keys. Cannot batch them ({set(item.keys())} != {keys})" + f"The elements of the batch contain different keys. Cannot batch them ({set(item.keys())} !=" + f" {keys})" ) # input_values, input_pixels, input_ids, ... padded = {} @@ -879,7 +880,8 @@ class Pipeline(_ScikitCompat): supported_models = supported_models_names if self.model.__class__.__name__ not in supported_models: logger.error( - f"The model '{self.model.__class__.__name__}' is not supported for {self.task}. Supported models are {supported_models}." + f"The model '{self.model.__class__.__name__}' is not supported for {self.task}. Supported models are" + f" {supported_models}." ) @abstractmethod @@ -994,7 +996,8 @@ class Pipeline(_ScikitCompat): self.call_count += 1 if self.call_count > 10 and self.framework == "pt" and self.device.type == "cuda": warnings.warn( - "You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset", + "You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a" + " dataset", UserWarning, ) @@ -1058,7 +1061,8 @@ class ChunkPipeline(Pipeline): os.environ["TOKENIZERS_PARALLELISM"] = "false" if num_workers > 1: logger.warning( - "For ChunkPipeline using num_workers>0 is likely to result in errors since everything is iterable, setting `num_workers=1` to guarantee correctness." + "For ChunkPipeline using num_workers>0 is likely to result in errors since everything is iterable," + " setting `num_workers=1` to guarantee correctness." ) num_workers = 1 dataset = PipelineChunkIterator(inputs, self.preprocess, preprocess_params) diff --git a/src/transformers/pipelines/fill_mask.py b/src/transformers/pipelines/fill_mask.py index 517b457a65..f461f6faa2 100644 --- a/src/transformers/pipelines/fill_mask.py +++ b/src/transformers/pipelines/fill_mask.py @@ -167,7 +167,7 @@ class FillMaskPipeline(Pipeline): if len(input_ids) == 0: logger.warning( f"The specified target token `{target}` does not exist in the model vocabulary. " - f"We cannot replace it with anything meaningful, ignoring it" + "We cannot replace it with anything meaningful, ignoring it" ) continue id_ = input_ids[0] diff --git a/src/transformers/pipelines/table_question_answering.py b/src/transformers/pipelines/table_question_answering.py index d94bb6d061..25dcd320cf 100644 --- a/src/transformers/pipelines/table_question_answering.py +++ b/src/transformers/pipelines/table_question_answering.py @@ -56,14 +56,14 @@ class TableQuestionAnsweringArgumentHandler(ArgumentHandler): tqa_pipeline_inputs = table else: raise ValueError( - f"If keyword argument `table` is a list of dictionaries, each dictionary should have a `table` " - f"and `query` key, but only dictionary has keys {table[0].keys()} `table` and `query` keys." + "If keyword argument `table` is a list of dictionaries, each dictionary should have a `table`" + f" and `query` key, but only dictionary has keys {table[0].keys()} `table` and `query` keys." ) elif Dataset is not None and isinstance(table, Dataset) or isinstance(table, types.GeneratorType): return table else: raise ValueError( - f"Invalid input. Keyword argument `table` should be either of type `dict` or `list`, but " + "Invalid input. Keyword argument `table` should be either of type `dict` or `list`, but " f"is {type(table)})" ) else: diff --git a/src/transformers/pipelines/text_generation.py b/src/transformers/pipelines/text_generation.py index dbaa0a9df7..4f210871a2 100644 --- a/src/transformers/pipelines/text_generation.py +++ b/src/transformers/pipelines/text_generation.py @@ -103,7 +103,8 @@ class TextGenerationPipeline(Pipeline): if handle_long_generation is not None: if handle_long_generation not in {"hole"}: raise ValueError( - f"{handle_long_generation} is not a valid value for `handle_long_generation` parameter expected [None, 'hole']" + f"{handle_long_generation} is not a valid value for `handle_long_generation` parameter expected" + " [None, 'hole']" ) preprocess_params["handle_long_generation"] = handle_long_generation @@ -192,7 +193,8 @@ class TextGenerationPipeline(Pipeline): keep_length = self.tokenizer.model_max_length - new_tokens if keep_length <= 0: raise ValueError( - "We cannot use `hole` to handle this generation the number of desired tokens exceeds the models max length" + "We cannot use `hole` to handle this generation the number of desired tokens exceeds the" + " models max length" ) inputs["input_ids"] = inputs["input_ids"][:, -keep_length:] diff --git a/src/transformers/pipelines/token_classification.py b/src/transformers/pipelines/token_classification.py index 4ea8d11415..72f0c5c9c7 100644 --- a/src/transformers/pipelines/token_classification.py +++ b/src/transformers/pipelines/token_classification.py @@ -133,11 +133,13 @@ class TokenClassificationPipeline(Pipeline): if grouped_entities is not None: warnings.warn( - f'`grouped_entities` is deprecated and will be removed in version v5.0.0, defaulted to `aggregation_strategy="{aggregation_strategy}"` instead.' + "`grouped_entities` is deprecated and will be removed in version v5.0.0, defaulted to" + f' `aggregation_strategy="{aggregation_strategy}"` instead.' ) if ignore_subwords is not None: warnings.warn( - f'`ignore_subwords` is deprecated and will be removed in version v5.0.0, defaulted to `aggregation_strategy="{aggregation_strategy}"` instead.' + "`ignore_subwords` is deprecated and will be removed in version v5.0.0, defaulted to" + f' `aggregation_strategy="{aggregation_strategy}"` instead.' ) if aggregation_strategy is not None: diff --git a/src/transformers/pipelines/zero_shot_classification.py b/src/transformers/pipelines/zero_shot_classification.py index 9d5d5bd61b..f98c87166c 100644 --- a/src/transformers/pipelines/zero_shot_classification.py +++ b/src/transformers/pipelines/zero_shot_classification.py @@ -86,7 +86,8 @@ class ZeroShotClassificationPipeline(ChunkPipeline): if self.tokenizer.pad_token is None: # Override for tokenizers not supporting padding logger.error( - "Tokenizer was not supporting padding necessary for zero-shot, attempting to use `pad_token=eos_token`" + "Tokenizer was not supporting padding necessary for zero-shot, attempting to use " + " `pad_token=eos_token`" ) self.tokenizer.pad_token = self.tokenizer.eos_token try: diff --git a/src/transformers/tokenization_utils.py b/src/transformers/tokenization_utils.py index 694b55cedd..6d33266c03 100644 --- a/src/transformers/tokenization_utils.py +++ b/src/transformers/tokenization_utils.py @@ -250,7 +250,8 @@ class Trie: for end in offsets: if start > end: logger.error( - "There was a bug in Trie algorithm in tokenization. Attempting to recover. Please report it anyway." + "There was a bug in Trie algorithm in tokenization. Attempting to recover. Please report it" + " anyway." ) continue elif start == end: @@ -627,11 +628,13 @@ class PreTrainedTokenizer(PreTrainedTokenizerBase): else: if is_split_into_words: raise ValueError( - f"Input {text} is not valid. Should be a string or a list/tuple of strings when `is_split_into_words=True`." + f"Input {text} is not valid. Should be a string or a list/tuple of strings when" + " `is_split_into_words=True`." ) else: raise ValueError( - f"Input {text} is not valid. Should be a string, a list/tuple of strings or a list/tuple of integers." + f"Input {text} is not valid. Should be a string, a list/tuple of strings or a list/tuple of" + " integers." ) if return_offsets_mapping: diff --git a/src/transformers/tokenization_utils_base.py b/src/transformers/tokenization_utils_base.py index 43d37e67cc..c127c19f1f 100644 --- a/src/transformers/tokenization_utils_base.py +++ b/src/transformers/tokenization_utils_base.py @@ -1502,12 +1502,12 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin): if value == self.model_max_length - self.num_special_tokens_to_add(pair=False) and self.verbose: if not self.deprecation_warnings.get("max_len_single_sentence", False): logger.warning( - "Setting 'max_len_single_sentence' is now deprecated. " "This value is automatically set up." + "Setting 'max_len_single_sentence' is now deprecated. This value is automatically set up." ) self.deprecation_warnings["max_len_single_sentence"] = True else: raise ValueError( - "Setting 'max_len_single_sentence' is now deprecated. " "This value is automatically set up." + "Setting 'max_len_single_sentence' is now deprecated. This value is automatically set up." ) @max_len_sentences_pair.setter @@ -1516,13 +1516,11 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin): if value == self.model_max_length - self.num_special_tokens_to_add(pair=True) and self.verbose: if not self.deprecation_warnings.get("max_len_sentences_pair", False): logger.warning( - "Setting 'max_len_sentences_pair' is now deprecated. " "This value is automatically set up." + "Setting 'max_len_sentences_pair' is now deprecated. This value is automatically set up." ) self.deprecation_warnings["max_len_sentences_pair"] = True else: - raise ValueError( - "Setting 'max_len_sentences_pair' is now deprecated. " "This value is automatically set up." - ) + raise ValueError("Setting 'max_len_sentences_pair' is now deprecated. This value is automatically set up.") def _set_processor_class(self, processor_class: str): """Sets processor class as an attribute.""" @@ -1530,9 +1528,10 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin): def __repr__(self) -> str: return ( - f"{'PreTrainedTokenizerFast' if self.is_fast else 'PreTrainedTokenizer'}(name_or_path='{self.name_or_path}', " - f"vocab_size={self.vocab_size}, model_max_len={self.model_max_length}, is_fast={self.is_fast}, " - f"padding_side='{self.padding_side}', truncation_side='{self.truncation_side}', special_tokens={self.special_tokens_map_extended})" + f"{'PreTrainedTokenizerFast' if self.is_fast else 'PreTrainedTokenizer'}(name_or_path='{self.name_or_path}'," + f" vocab_size={self.vocab_size}, model_max_len={self.model_max_length}, is_fast={self.is_fast}," + f" padding_side='{self.padding_side}', truncation_side='{self.truncation_side}'," + f" special_tokens={self.special_tokens_map_extended})" ) def get_vocab(self) -> Dict[str, int]: @@ -1873,10 +1872,10 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin): if config_tokenizer_class is not None: if cls.__name__.replace("Fast", "") != config_tokenizer_class.replace("Fast", ""): logger.warning( - "The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. " - "It may result in unexpected tokenization. \n" - f"The tokenizer class you load from this checkpoint is '{config_tokenizer_class}'. \n" - f"The class this function is called from is '{cls.__name__}'." + "The tokenizer class you load from this checkpoint is not the same type as the class this" + " function is called from. It may result in unexpected tokenization. \nThe tokenizer class you" + f" load from this checkpoint is '{config_tokenizer_class}'. \nThe class this function is called" + f" from is '{cls.__name__}'." ) # Update with newly provided kwargs @@ -1988,7 +1987,8 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin): added_tokens = tokenizer.sanitize_special_tokens() if added_tokens: logger.warning_advice( - "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained." + "Special tokens have been added in the vocabulary, make sure the associated word embeddings are" + " fine-tuned or trained." ) return tokenizer @@ -2270,11 +2270,11 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin): if verbose: if not self.deprecation_warnings.get("Truncation-not-explicitly-activated", False): logger.warning( - "Truncation was not explicitly activated but `max_length` is provided a specific value, " - "please use `truncation=True` to explicitly truncate examples to max length. " - "Defaulting to 'longest_first' truncation strategy. " - "If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy " - "more precisely by providing a specific strategy to `truncation`." + "Truncation was not explicitly activated but `max_length` is provided a specific value, please" + " use `truncation=True` to explicitly truncate examples to max length. Defaulting to" + " 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the" + " tokenizer you can select this strategy more precisely by providing a specific strategy to" + " `truncation`." ) self.deprecation_warnings["Truncation-not-explicitly-activated"] = True truncation = "longest_first" @@ -2316,14 +2316,14 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin): if truncation is False and old_truncation_strategy != "do_not_truncate": if verbose: warnings.warn( - "The `truncation_strategy` argument is deprecated and will be removed in a future version, " - "use `truncation=True` to truncate examples to a max length. You can give a specific " - "length with `max_length` (e.g. `max_length=45`) or leave max_length to None to truncate to the " - "maximal input size of the model (e.g. 512 for Bert). " - " If you have pairs of inputs, you can give a specific truncation strategy selected among " - "`truncation='only_first'` (will only truncate the first sentence in the pairs) " - "`truncation='only_second'` (will only truncate the second sentence in the pairs) " - "or `truncation='longest_first'` (will iteratively remove tokens from the longest sentence in the pairs).", + "The `truncation_strategy` argument is deprecated and will be removed in a future version, use" + " `truncation=True` to truncate examples to a max length. You can give a specific length with" + " `max_length` (e.g. `max_length=45`) or leave max_length to None to truncate to the maximal input" + " size of the model (e.g. 512 for Bert). If you have pairs of inputs, you can give a specific" + " truncation strategy selected among `truncation='only_first'` (will only truncate the first" + " sentence in the pairs) `truncation='only_second'` (will only truncate the second sentence in the" + " pairs) or `truncation='longest_first'` (will iteratively remove tokens from the longest sentence" + " in the pairs).", FutureWarning, ) truncation_strategy = TruncationStrategy(old_truncation_strategy) @@ -2346,8 +2346,8 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin): if verbose: if not self.deprecation_warnings.get("Asking-to-pad-to-max_length", False): logger.warning( - "Asking to pad to max_length but no maximum length is provided and the model has no predefined maximum length. " - "Default to no padding." + "Asking to pad to max_length but no maximum length is provided and the model has no" + " predefined maximum length. Default to no padding." ) self.deprecation_warnings["Asking-to-pad-to-max_length"] = True padding_strategy = PaddingStrategy.DO_NOT_PAD @@ -2359,8 +2359,8 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin): if verbose: if not self.deprecation_warnings.get("Asking-to-truncate-to-max_length", False): logger.warning( - "Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. " - "Default to no truncation." + "Asking to truncate to max_length but no maximum length is provided and the model has" + " no predefined maximum length. Default to no truncation." ) self.deprecation_warnings["Asking-to-truncate-to-max_length"] = True truncation_strategy = TruncationStrategy.DO_NOT_TRUNCATE @@ -2384,7 +2384,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin): and (max_length % pad_to_multiple_of != 0) ): raise ValueError( - f"Truncation and padding are both activated but " + "Truncation and padding are both activated but " f"truncation length ({max_length}) is not a multiple of pad_to_multiple_of ({pad_to_multiple_of})." ) @@ -2467,11 +2467,13 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin): if is_batched: if isinstance(text_pair, str): raise TypeError( - "when tokenizing batches of text, `text_pair` must be a list or tuple with the same length as `text`." + "when tokenizing batches of text, `text_pair` must be a list or tuple with the same length as" + " `text`." ) if text_pair is not None and len(text) != len(text_pair): raise ValueError( - f"batch length of `text`: {len(text)} does not match batch length of `text_pair`: {len(text_pair)}." + f"batch length of `text`: {len(text)} does not match batch length of `text_pair`:" + f" {len(text_pair)}." ) batch_text_or_text_pairs = list(zip(text, text_pair)) if text_pair is not None else text return self.batch_encode_plus( @@ -2826,7 +2828,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin): else: raise ValueError( f"type of {first_element} unknown: {type(first_element)}. " - f"Should be one of a python, numpy, pytorch or tensorflow object." + "Should be one of a python, numpy, pytorch or tensorflow object." ) for key, value in encoded_inputs.items(): @@ -3123,16 +3125,17 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin): ) if truncation_strategy == TruncationStrategy.ONLY_FIRST: error_msg = ( - error_msg + "Please select another truncation strategy than " + error_msg + + "Please select another truncation strategy than " f"{truncation_strategy}, for instance 'longest_first' or 'only_second'." ) logger.error(error_msg) elif truncation_strategy == TruncationStrategy.LONGEST_FIRST: logger.warning( - f"Be aware, overflowing tokens are not returned for the setting you have chosen," + "Be aware, overflowing tokens are not returned for the setting you have chosen," f" i.e. sequence pairs with the '{TruncationStrategy.LONGEST_FIRST.value}' " - f"truncation strategy. So the returned list will always be empty even if some " - f"tokens have been removed." + "truncation strategy. So the returned list will always be empty even if some " + "tokens have been removed." ) for _ in range(num_tokens_to_remove): if pair_ids is None or len(ids) > len(pair_ids): @@ -3165,7 +3168,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin): f"We need to remove {num_tokens_to_remove} to truncate the input " f"but the second sequence has a length {len(pair_ids)}. " f"Please select another truncation strategy than {truncation_strategy}, " - f"for instance 'longest_first' or 'only_first'." + "for instance 'longest_first' or 'only_first'." ) return (ids, pair_ids, overflowing_tokens) diff --git a/src/transformers/tokenization_utils_fast.py b/src/transformers/tokenization_utils_fast.py index 4f85a842dd..052de38894 100644 --- a/src/transformers/tokenization_utils_fast.py +++ b/src/transformers/tokenization_utils_fast.py @@ -567,8 +567,8 @@ class PreTrainedTokenizerFast(PreTrainedTokenizerBase): if self.slow_tokenizer_class is None and legacy_format is True: raise ValueError( - "Your tokenizer does not have a legacy version defined and therefore cannot register this version. You " - "might consider leaving the legacy_format at `None` or setting it to `False`." + "Your tokenizer does not have a legacy version defined and therefore cannot register this version. You" + " might consider leaving the legacy_format at `None` or setting it to `False`." ) save_slow = ( diff --git a/src/transformers/trainer.py b/src/transformers/trainer.py index ad221e1d88..8032fa27f9 100755 --- a/src/transformers/trainer.py +++ b/src/transformers/trainer.py @@ -328,8 +328,9 @@ class Trainer: else: if model_init is not None: warnings.warn( - "`Trainer` requires either a `model` or `model_init` argument, but not both. " - "`model_init` will overwrite your model when calling the `train` method. This will become a fatal error in the next release.", + "`Trainer` requires either a `model` or `model_init` argument, but not both. `model_init` will" + " overwrite your model when calling the `train` method. This will become a fatal error in the next" + " release.", FutureWarning, ) self.model_init = model_init @@ -520,7 +521,8 @@ class Trainer: else: if not is_apex_available(): raise ImportError( - "Using FP16 with APEX but APEX is not installed, please refer to https://www.github.com/nvidia/apex." + "Using FP16 with APEX but APEX is not installed, please refer to" + " https://www.github.com/nvidia/apex." ) self.use_apex = True @@ -1068,7 +1070,8 @@ class Trainer: for key, value in params.items(): if not hasattr(self.args, key): logger.warning( - f"Trying to set {key} in the hyperparameter search but there is no corresponding field in `TrainingArguments`." + f"Trying to set {key} in the hyperparameter search but there is no corresponding field in" + " `TrainingArguments`." ) continue old_attr = getattr(self.args, key, None) @@ -1361,7 +1364,8 @@ class Trainer: num_train_samples = args.max_steps * total_train_batch_size else: raise ValueError( - f"args.max_steps must be set to a positive value if dataloader does not have a length, was {args.max_steps}" + "args.max_steps must be set to a positive value if dataloader does not have a length, was" + f" {args.max_steps}" ) if DebugOption.UNDERFLOW_OVERFLOW in self.args.debug: @@ -1369,7 +1373,8 @@ class Trainer: # nn.DataParallel(model) replicates the model, creating new variables and module # references registered here no longer work on other gpus, breaking the module raise ValueError( - "Currently --debug underflow_overflow is not supported under DP. Please use DDP (torch.distributed.launch)." + "Currently --debug underflow_overflow is not supported under DP. Please use DDP" + " (torch.distributed.launch)." ) else: debug_overflow = DebugUnderflowOverflow(self.model) # noqa @@ -1634,7 +1639,7 @@ class Trainer: break if step < 0: logger.warning( - f"There seems to be not a single sample in your epoch_iterator, stopping training at step" + "There seems to be not a single sample in your epoch_iterator, stopping training at step" f" {self.state.global_step}! This is expected if you're using an IterableDataset and set" f" num_steps ({max_steps}) higher than the number of available samples." ) @@ -2293,8 +2298,9 @@ class Trainer: # This must be called on all ranks if not self.deepspeed.save_16bit_model(output_dir, WEIGHTS_NAME): logger.warning( - "deepspeed.save_16bit_model didn't save the model, since stage3_gather_16bit_weights_on_model_save=false. " - "Saving the full checkpoint instead, use zero_to_fp32.py to recover weights" + "deepspeed.save_16bit_model didn't save the model, since" + " stage3_gather_16bit_weights_on_model_save=false. Saving the full checkpoint instead, use" + " zero_to_fp32.py to recover weights" ) self.deepspeed.save_checkpoint(output_dir) diff --git a/src/transformers/trainer_callback.py b/src/transformers/trainer_callback.py index 92abe1ed50..06875b74e1 100644 --- a/src/transformers/trainer_callback.py +++ b/src/transformers/trainer_callback.py @@ -556,7 +556,8 @@ class EarlyStoppingCallback(TrainerCallback): if metric_value is None: logger.warning( - f"early stopping required metric_for_best_model, but did not find {metric_to_check} so early stopping is disabled" + f"early stopping required metric_for_best_model, but did not find {metric_to_check} so early stopping" + " is disabled" ) return diff --git a/src/transformers/trainer_utils.py b/src/transformers/trainer_utils.py index e418009af0..afc2e0d156 100644 --- a/src/transformers/trainer_utils.py +++ b/src/transformers/trainer_utils.py @@ -245,7 +245,7 @@ def default_hp_space_optuna(trial) -> Dict[str, float]: def default_hp_space_ray(trial) -> Dict[str, float]: from .integrations import is_ray_tune_available - assert is_ray_tune_available(), "This function needs ray installed: `pip " "install ray[tune]`" + assert is_ray_tune_available(), "This function needs ray installed: `pip install ray[tune]`" from ray import tune return { diff --git a/src/transformers/training_args.py b/src/transformers/training_args.py index f8b15ebc85..76e0132bcc 100644 --- a/src/transformers/training_args.py +++ b/src/transformers/training_args.py @@ -487,15 +487,19 @@ class TrainingArguments: per_gpu_train_batch_size: Optional[int] = field( default=None, metadata={ - "help": "Deprecated, the use of `--per_device_train_batch_size` is preferred. " - "Batch size per GPU/TPU core/CPU for training." + "help": ( + "Deprecated, the use of `--per_device_train_batch_size` is preferred. " + "Batch size per GPU/TPU core/CPU for training." + ) }, ) per_gpu_eval_batch_size: Optional[int] = field( default=None, metadata={ - "help": "Deprecated, the use of `--per_device_eval_batch_size` is preferred. " - "Batch size per GPU/TPU core/CPU for evaluation." + "help": ( + "Deprecated, the use of `--per_device_eval_batch_size` is preferred. " + "Batch size per GPU/TPU core/CPU for evaluation." + ) }, ) @@ -511,7 +515,10 @@ class TrainingArguments: eval_delay: Optional[float] = field( default=0, metadata={ - "help": "Number of epochs or steps to wait for before the first evaluation can be performed, depending on the evaluation_strategy." + "help": ( + "Number of epochs or steps to wait for before the first evaluation can be performed, depending on the" + " evaluation_strategy." + ) }, ) @@ -539,7 +546,11 @@ class TrainingArguments: log_level: Optional[str] = field( default="passive", metadata={ - "help": "Logger log level to use on the main node. Possible choices are the log levels as strings: 'debug', 'info', 'warning', 'error' and 'critical', plus a 'passive' level which doesn't set anything and lets the application set the level. Defaults to 'passive'.", + "help": ( + "Logger log level to use on the main node. Possible choices are the log levels as strings: 'debug'," + " 'info', 'warning', 'error' and 'critical', plus a 'passive' level which doesn't set anything and" + " lets the application set the level. Defaults to 'passive'." + ), "choices": trainer_log_levels.keys(), }, ) @@ -553,7 +564,10 @@ class TrainingArguments: log_on_each_node: bool = field( default=True, metadata={ - "help": "When doing a multinode distributed training, whether to log once per node or just once on the main node." + "help": ( + "When doing a multinode distributed training, whether to log once per node or just once on the main" + " node." + ) }, ) logging_dir: Optional[str] = field(default=None, metadata={"help": "Tensorboard log dir."}) @@ -581,7 +595,10 @@ class TrainingArguments: save_on_each_node: bool = field( default=False, metadata={ - "help": "When doing multi-node distributed training, whether to save models and checkpoints on each node, or only on the main one" + "help": ( + "When doing multi-node distributed training, whether to save models and checkpoints on each node, or" + " only on the main one" + ) }, ) no_cuda: bool = field(default=False, metadata={"help": "Do not use CUDA even when it is available"}) @@ -590,7 +607,10 @@ class TrainingArguments: bf16: bool = field( default=False, metadata={ - "help": "Whether to use bf16 (mixed) precision instead of 32-bit. Requires Ampere or higher NVIDIA architecture. This is an experimental API and it may change." + "help": ( + "Whether to use bf16 (mixed) precision instead of 32-bit. Requires Ampere or higher NVIDIA" + " architecture. This is an experimental API and it may change." + ) }, ) fp16: bool = field( @@ -613,7 +633,10 @@ class TrainingArguments: bf16_full_eval: bool = field( default=False, metadata={ - "help": "Whether to use full bfloat16 evaluation instead of 32-bit. This is an experimental API and it may change." + "help": ( + "Whether to use full bfloat16 evaluation instead of 32-bit. This is an experimental API and it may" + " change." + ) }, ) fp16_full_eval: bool = field( @@ -623,7 +646,10 @@ class TrainingArguments: tf32: Optional[bool] = field( default=None, metadata={ - "help": "Whether to enable tf32 mode, available in Ampere and newer GPU architectures. This is an experimental API and it may change." + "help": ( + "Whether to enable tf32 mode, available in Ampere and newer GPU architectures. This is an experimental" + " API and it may change." + ) }, ) local_rank: int = field(default=-1, metadata={"help": "For distributed training: local_rank"}) @@ -637,15 +663,19 @@ class TrainingArguments: tpu_metrics_debug: bool = field( default=False, metadata={ - "help": "Deprecated, the use of `--debug tpu_metrics_debug` is preferred. TPU: Whether to print debug metrics" + "help": ( + "Deprecated, the use of `--debug tpu_metrics_debug` is preferred. TPU: Whether to print debug metrics" + ) }, ) debug: str = field( default="", metadata={ - "help": "Whether or not to enable debug mode. Current options: " - "`underflow_overflow` (Detect underflow and overflow in activations and weights), " - "`tpu_metrics_debug` (print debug metrics on TPU)." + "help": ( + "Whether or not to enable debug mode. Current options: " + "`underflow_overflow` (Detect underflow and overflow in activations and weights), " + "`tpu_metrics_debug` (print debug metrics on TPU)." + ) }, ) @@ -656,7 +686,10 @@ class TrainingArguments: dataloader_num_workers: int = field( default=0, metadata={ - "help": "Number of subprocesses to use for data loading (PyTorch only). 0 means that the data will be loaded in the main process." + "help": ( + "Number of subprocesses to use for data loading (PyTorch only). 0 means that the data will be loaded" + " in the main process." + ) }, ) @@ -692,37 +725,51 @@ class TrainingArguments: ignore_data_skip: bool = field( default=False, metadata={ - "help": "When resuming training, whether or not to skip the first epochs and batches to get to the same training data." + "help": ( + "When resuming training, whether or not to skip the first epochs and batches to get to the same" + " training data." + ) }, ) sharded_ddp: str = field( default="", metadata={ - "help": "Whether or not to use sharded DDP training (in distributed training only). The base option " - "should be `simple`, `zero_dp_2` or `zero_dp_3` and you can add CPU-offload to `zero_dp_2` or `zero_dp_3` " - "like this: zero_dp_2 offload` or `zero_dp_3 offload`. You can add auto-wrap to `zero_dp_2` or `zero_dp_3` " - "with the same syntax: zero_dp_2 auto_wrap` or `zero_dp_3 auto_wrap`.", + "help": ( + "Whether or not to use sharded DDP training (in distributed training only). The base option should be" + " `simple`, `zero_dp_2` or `zero_dp_3` and you can add CPU-offload to `zero_dp_2` or `zero_dp_3` like" + " this: zero_dp_2 offload` or `zero_dp_3 offload`. You can add auto-wrap to `zero_dp_2` or `zero_dp_3`" + " with the same syntax: zero_dp_2 auto_wrap` or `zero_dp_3 auto_wrap`." + ), }, ) fsdp: str = field( default="", metadata={ - "help": "Whether or not to use PyTorch Fully Sharded Data Parallel (FSDP) training (in distributed training only). The base option " - "should be `full_shard` or `shard_grad_op` and you can add CPU-offload to `full_shard` or `shard_grad_op` " - "like this: full_shard offload` or `shard_grad_op offload`. You can add auto-wrap to `full_shard` or `shard_grad_op` " - "with the same syntax: full_shard auto_wrap` or `shard_grad_op auto_wrap`.", + "help": ( + "Whether or not to use PyTorch Fully Sharded Data Parallel (FSDP) training (in distributed training" + " only). The base option should be `full_shard` or `shard_grad_op` and you can add CPU-offload to" + " `full_shard` or `shard_grad_op` like this: full_shard offload` or `shard_grad_op offload`. You can" + " add auto-wrap to `full_shard` or `shard_grad_op` with the same syntax: full_shard auto_wrap` or" + " `shard_grad_op auto_wrap`." + ), }, ) fsdp_min_num_params: int = field( default=0, metadata={ - "help": "FSDP's minimum number of parameters for Default Auto Wrapping. (useful only when `fsdp` field is passed)." + "help": ( + "FSDP's minimum number of parameters for Default Auto Wrapping. (useful only when `fsdp` field is" + " passed)." + ) }, ) deepspeed: Optional[str] = field( default=None, metadata={ - "help": "Enable deepspeed and pass the path to deepspeed json config file (e.g. ds_config.json) or an already loaded json file as a dict" + "help": ( + "Enable deepspeed and pass the path to deepspeed json config file (e.g. ds_config.json) or an already" + " loaded json file as a dict" + ) }, ) label_smoothing_factor: float = field( @@ -747,15 +794,19 @@ class TrainingArguments: ddp_find_unused_parameters: Optional[bool] = field( default=None, metadata={ - "help": "When using distributed training, the value of the flag `find_unused_parameters` passed to " - "`DistributedDataParallel`." + "help": ( + "When using distributed training, the value of the flag `find_unused_parameters` passed to " + "`DistributedDataParallel`." + ) }, ) ddp_bucket_cap_mb: Optional[int] = field( default=None, metadata={ - "help": "When using distributed training, the value of the flag `bucket_cap_mb` passed to " - "`DistributedDataParallel`." + "help": ( + "When using distributed training, the value of the flag `bucket_cap_mb` passed to " + "`DistributedDataParallel`." + ) }, ) dataloader_pin_memory: bool = field( @@ -815,13 +866,19 @@ class TrainingArguments: auto_find_batch_size: bool = field( default=False, metadata={ - "help": "Whether to automatically decrease the batch size in half and rerun the training loop again each time a CUDA Out-of-Memory was reached" + "help": ( + "Whether to automatically decrease the batch size in half and rerun the training loop again each time" + " a CUDA Out-of-Memory was reached" + ) }, ) full_determinism: bool = field( default=False, metadata={ - "help": "Whether to call enable_full_determinism instead of set_seed for reproducibility in distributed training" + "help": ( + "Whether to call enable_full_determinism instead of set_seed for reproducibility in distributed" + " training" + ) }, ) @@ -851,7 +908,8 @@ class TrainingArguments: if isinstance(self.evaluation_strategy, EvaluationStrategy): warnings.warn( - "using `EvaluationStrategy` for `evaluation_strategy` is deprecated and will be removed in version 5 of 🤗 Transformers. Use `IntervalStrategy` instead", + "using `EvaluationStrategy` for `evaluation_strategy` is deprecated and will be removed in version 5" + " of 🤗 Transformers. Use `IntervalStrategy` instead", FutureWarning, ) # Go back to the underlying string or we won't be able to instantiate `IntervalStrategy` on it. @@ -873,7 +931,8 @@ class TrainingArguments: self.eval_steps = self.logging_steps else: raise ValueError( - f"evaluation strategy {self.evaluation_strategy} requires either non-zero --eval_steps or --logging_steps" + f"evaluation strategy {self.evaluation_strategy} requires either non-zero --eval_steps or" + " --logging_steps" ) # logging_steps must be non-zero for logging_strategy that is other than 'no' @@ -902,7 +961,8 @@ class TrainingArguments: if self.fp16_backend and self.fp16_backend != "auto": warnings.warn( - "`fp16_backend` is deprecated and will be removed in version 5 of 🤗 Transformers. Use `half_precision_backend` instead", + "`fp16_backend` is deprecated and will be removed in version 5 of 🤗 Transformers. Use" + " `half_precision_backend` instead", FutureWarning, ) self.half_precision_backend = self.fp16_backend @@ -915,7 +975,8 @@ class TrainingArguments: if self.bf16: if self.half_precision_backend == "apex": raise ValueError( - " `--half_precision_backend apex`: bf16 is not supported by apex. Use `--half_precision_backend amp` instead" + " `--half_precision_backend apex`: bf16 is not supported by apex. Use `--half_precision_backend" + " amp` instead" ) if not (self.sharded_ddp == "" or not self.sharded_ddp): raise ValueError("sharded_ddp is not supported with bf16") @@ -923,7 +984,8 @@ class TrainingArguments: self.optim = OptimizerNames(self.optim) if self.adafactor: warnings.warn( - "`--adafactor` is deprecated and will be removed in version 5 of 🤗 Transformers. Use `--optim adafactor` instead", + "`--adafactor` is deprecated and will be removed in version 5 of 🤗 Transformers. Use `--optim" + " adafactor` instead", FutureWarning, ) self.optim = OptimizerNames.ADAFACTOR @@ -935,7 +997,8 @@ class TrainingArguments: and (self.fp16 or self.fp16_full_eval or self.bf16 or self.bf16_full_eval) ): raise ValueError( - "Mixed precision training with AMP or APEX (`--fp16` or `--bf16`) and half precision evaluation (`--fp16_full_eval` or `--bf16_full_eval`) can only be used on CUDA devices." + "Mixed precision training with AMP or APEX (`--fp16` or `--bf16`) and half precision evaluation" + " (`--fp16_full_eval` or `--bf16_full_eval`) can only be used on CUDA devices." ) if is_torch_available() and self.tf32 is not None: @@ -970,7 +1033,8 @@ class TrainingArguments: raise ValueError("warmup_ratio must lie in range [0,1]") elif self.warmup_ratio > 0 and self.warmup_steps > 0: logger.info( - "Both warmup_ratio and warmup_steps given, warmup_steps will override any effect of warmup_ratio during training" + "Both warmup_ratio and warmup_steps given, warmup_steps will override any effect of warmup_ratio" + " during training" ) if isinstance(self.sharded_ddp, bool): @@ -1004,7 +1068,8 @@ class TrainingArguments: if self.tpu_metrics_debug: warnings.warn( - "using `--tpu_metrics_debug` is deprecated and will be removed in version 5 of 🤗 Transformers. Use `--debug tpu_metrics_debug` instead", + "using `--tpu_metrics_debug` is deprecated and will be removed in version 5 of 🤗 Transformers. Use" + " `--debug tpu_metrics_debug` instead", FutureWarning, ) self.debug += " tpu_metrics_debug" diff --git a/src/transformers/training_args_seq2seq.py b/src/transformers/training_args_seq2seq.py index ef3ccdf260..026dce81bc 100644 --- a/src/transformers/training_args_seq2seq.py +++ b/src/transformers/training_args_seq2seq.py @@ -51,14 +51,18 @@ class Seq2SeqTrainingArguments(TrainingArguments): generation_max_length: Optional[int] = field( default=None, metadata={ - "help": "The `max_length` to use on each evaluation loop when `predict_with_generate=True`. Will default " - "to the `max_length` value of the model configuration." + "help": ( + "The `max_length` to use on each evaluation loop when `predict_with_generate=True`. Will default " + "to the `max_length` value of the model configuration." + ) }, ) generation_num_beams: Optional[int] = field( default=None, metadata={ - "help": "The `num_beams` to use on each evaluation loop when `predict_with_generate=True`. Will default " - "to the `num_beams` value of the model configuration." + "help": ( + "The `num_beams` to use on each evaluation loop when `predict_with_generate=True`. Will default " + "to the `num_beams` value of the model configuration." + ) }, ) diff --git a/src/transformers/utils/__init__.py b/src/transformers/utils/__init__.py index 88891365f0..2106cdb007 100644 --- a/src/transformers/utils/__init__.py +++ b/src/transformers/utils/__init__.py @@ -171,8 +171,6 @@ def check_min_version(min_version): error_message += f" but the version found is {__version__}.\n" raise ImportError( error_message - + ( - "Check out https://huggingface.co/transformers/examples.html for the examples corresponding to other " - "versions of HuggingFace Transformers." - ) + + "Check out https://huggingface.co/transformers/examples.html for the examples corresponding to other " + "versions of HuggingFace Transformers." ) diff --git a/src/transformers/utils/hub.py b/src/transformers/utils/hub.py index 7386fe34f5..7f2c8c5662 100644 --- a/src/transformers/utils/hub.py +++ b/src/transformers/utils/hub.py @@ -77,11 +77,11 @@ if ( and "TRANSFORMERS_CACHE" not in os.environ ): logger.warning( - "In Transformers v4.0.0, the default path to cache downloaded models changed from " - "'~/.cache/torch/transformers' to '~/.cache/huggingface/transformers'. Since you don't seem to have overridden " - "and '~/.cache/torch/transformers' is a directory that exists, we're moving it to " - "'~/.cache/huggingface/transformers' to avoid redownloading models you have already in the cache. You should " - "only see this message once." + "In Transformers v4.0.0, the default path to cache downloaded models changed from" + " '~/.cache/torch/transformers' to '~/.cache/huggingface/transformers'. Since you don't seem to have" + " overridden and '~/.cache/torch/transformers' is a directory that exists, we're moving it to" + " '~/.cache/huggingface/transformers' to avoid redownloading models you have already in the cache. You should" + " only see this message once." ) shutil.move(old_default_cache_path, default_cache_path) @@ -408,7 +408,7 @@ def _raise_for_status(request): elif error_code == "EntryNotFound": raise EntryNotFoundError(f"404 Client Error: Entry Not Found for url: {request.url}") elif error_code == "RevisionNotFound": - raise RevisionNotFoundError((f"404 Client Error: Revision Not Found for url: {request.url}")) + raise RevisionNotFoundError(f"404 Client Error: Revision Not Found for url: {request.url}") request.raise_for_status() diff --git a/src/transformers/utils/import_utils.py b/src/transformers/utils/import_utils.py index 3ee89167b2..ce1d43cc78 100644 --- a/src/transformers/utils/import_utils.py +++ b/src/transformers/utils/import_utils.py @@ -872,7 +872,8 @@ class _LazyModule(ModuleType): return importlib.import_module("." + module_name, self.__name__) except Exception as e: raise RuntimeError( - f"Failed to import {self.__name__}.{module_name} because of the following error (look up to see its traceback):\n{e}" + f"Failed to import {self.__name__}.{module_name} because of the following error (look up to see its" + f" traceback):\n{e}" ) from e def __reduce__(self): diff --git a/src/transformers/utils/model_parallel_utils.py b/src/transformers/utils/model_parallel_utils.py index abddd6c60f..bcbe808013 100644 --- a/src/transformers/utils/model_parallel_utils.py +++ b/src/transformers/utils/model_parallel_utils.py @@ -32,13 +32,15 @@ def assert_device_map(device_map, num_blocks): if len(duplicate_blocks) != 0: raise ValueError( - "Duplicate attention blocks specified in device_map. Attention blocks must be specified to one device. These " - "attention blocks were specified more than once: " + str(duplicate_blocks) + "Duplicate attention blocks specified in device_map. Attention blocks must be specified to one device." + " These attention blocks were specified more than once: " + + str(duplicate_blocks) ) if len(missing_blocks) != 0: raise ValueError( "There are attention blocks for this model that are not specified in the device_map. Add these attention " - "blocks to a device on the device_map: " + str(missing_blocks) + "blocks to a device on the device_map: " + + str(missing_blocks) ) if len(extra_blocks) != 0: raise ValueError( diff --git a/src/transformers/utils/notebook.py b/src/transformers/utils/notebook.py index 0ffbdc8dee..f671ad737c 100644 --- a/src/transformers/utils/notebook.py +++ b/src/transformers/utils/notebook.py @@ -174,7 +174,10 @@ class NotebookProgressBar: elif self.predicted_remaining is None: self.label = f"[{spaced_value}/{self.total} {format_time(self.elapsed_time)}" else: - self.label = f"[{spaced_value}/{self.total} {format_time(self.elapsed_time)} < {format_time(self.predicted_remaining)}" + self.label = ( + f"[{spaced_value}/{self.total} {format_time(self.elapsed_time)} <" + f" {format_time(self.predicted_remaining)}" + ) self.label += f", {1/self.average_time_per_item:.2f} it/s" self.label += "]" if self.comment is None or len(self.comment) == 0 else f", {self.comment}]" self.display() diff --git a/src/transformers/utils/sentencepiece_model_pb2.py b/src/transformers/utils/sentencepiece_model_pb2.py index 5d52b365ca..41411cee8c 100644 --- a/src/transformers/utils/sentencepiece_model_pb2.py +++ b/src/transformers/utils/sentencepiece_model_pb2.py @@ -32,7 +32,53 @@ DESCRIPTOR = _descriptor.FileDescriptor( syntax="proto2", serialized_options=b"H\003", create_key=_descriptor._internal_create_key, - serialized_pb=b'\n\x19sentencepiece_model.proto\x12\rsentencepiece"\xa1\n\n\x0bTrainerSpec\x12\r\n\x05input\x18\x01 \x03(\t\x12\x14\n\x0cinput_format\x18\x07 \x01(\t\x12\x14\n\x0cmodel_prefix\x18\x02 \x01(\t\x12\x41\n\nmodel_type\x18\x03 \x01(\x0e\x32$.sentencepiece.TrainerSpec.ModelType:\x07UNIGRAM\x12\x18\n\nvocab_size\x18\x04 \x01(\x05:\x04\x38\x30\x30\x30\x12\x17\n\x0f\x61\x63\x63\x65pt_language\x18\x05 \x03(\t\x12 \n\x15self_test_sample_size\x18\x06 \x01(\x05:\x01\x30\x12"\n\x12\x63haracter_coverage\x18\n \x01(\x02:\x06\x30.9995\x12\x1e\n\x13input_sentence_size\x18\x0b \x01(\x04:\x01\x30\x12$\n\x16shuffle_input_sentence\x18\x13 \x01(\x08:\x04true\x12 \n\x14mining_sentence_size\x18\x0c \x01(\x05\x42\x02\x18\x01\x12"\n\x16training_sentence_size\x18\r \x01(\x05\x42\x02\x18\x01\x12(\n\x17seed_sentencepiece_size\x18\x0e \x01(\x05:\x07\x31\x30\x30\x30\x30\x30\x30\x12\x1e\n\x10shrinking_factor\x18\x0f \x01(\x02:\x04\x30.75\x12!\n\x13max_sentence_length\x18\x12 \x01(\x05:\x04\x34\x31\x39\x32\x12\x17\n\x0bnum_threads\x18\x10 \x01(\x05:\x02\x31\x36\x12\x1d\n\x12num_sub_iterations\x18\x11 \x01(\x05:\x01\x32\x12$\n\x18max_sentencepiece_length\x18\x14 \x01(\x05:\x02\x31\x36\x12%\n\x17split_by_unicode_script\x18\x15 \x01(\x08:\x04true\x12\x1d\n\x0fsplit_by_number\x18\x17 \x01(\x08:\x04true\x12!\n\x13split_by_whitespace\x18\x16 \x01(\x08:\x04true\x12)\n\x1atreat_whitespace_as_suffix\x18\x18 \x01(\x08:\x05\x66\x61lse\x12\x1b\n\x0csplit_digits\x18\x19 \x01(\x08:\x05\x66\x61lse\x12\x17\n\x0f\x63ontrol_symbols\x18\x1e \x03(\t\x12\x1c\n\x14user_defined_symbols\x18\x1f \x03(\t\x12\x16\n\x0erequired_chars\x18$ \x01(\t\x12\x1c\n\rbyte_fallback\x18# \x01(\x08:\x05\x66\x61lse\x12+\n\x1dvocabulary_output_piece_score\x18 \x01(\x08:\x04true\x12\x1e\n\x10hard_vocab_limit\x18! \x01(\x08:\x04true\x12\x1c\n\ruse_all_vocab\x18" \x01(\x08:\x05\x66\x61lse\x12\x11\n\x06unk_id\x18( \x01(\x05:\x01\x30\x12\x11\n\x06\x62os_id\x18) \x01(\x05:\x01\x31\x12\x11\n\x06\x65os_id\x18* \x01(\x05:\x01\x32\x12\x12\n\x06pad_id\x18+ \x01(\x05:\x02-1\x12\x18\n\tunk_piece\x18- \x01(\t:\x05\x12\x16\n\tbos_piece\x18. \x01(\t:\x03\x12\x17\n\teos_piece\x18/ \x01(\t:\x04\x12\x18\n\tpad_piece\x18\x30 \x01(\t:\x05\x12\x1a\n\x0bunk_surface\x18, \x01(\t:\x05 \xe2\x81\x87 \x12+\n\x1ctrain_extremely_large_corpus\x18\x31 \x01(\x08:\x05\x66\x61lse"5\n\tModelType\x12\x0b\n\x07UNIGRAM\x10\x01\x12\x07\n\x03\x42PE\x10\x02\x12\x08\n\x04WORD\x10\x03\x12\x08\n\x04\x43HAR\x10\x04*\t\x08\xc8\x01\x10\x80\x80\x80\x80\x02"\xd1\x01\n\x0eNormalizerSpec\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x1c\n\x14precompiled_charsmap\x18\x02 \x01(\x0c\x12\x1e\n\x10\x61\x64\x64_dummy_prefix\x18\x03 \x01(\x08:\x04true\x12&\n\x18remove_extra_whitespaces\x18\x04 \x01(\x08:\x04true\x12 \n\x12\x65scape_whitespaces\x18\x05 \x01(\x08:\x04true\x12\x1e\n\x16normalization_rule_tsv\x18\x06 \x01(\t*\t\x08\xc8\x01\x10\x80\x80\x80\x80\x02"y\n\x0cSelfTestData\x12\x33\n\x07samples\x18\x01 \x03(\x0b\x32".sentencepiece.SelfTestData.Sample\x1a)\n\x06Sample\x12\r\n\x05input\x18\x01 \x01(\t\x12\x10\n\x08\x65xpected\x18\x02 \x01(\t*\t\x08\xc8\x01\x10\x80\x80\x80\x80\x02"\xfe\x03\n\nModelProto\x12\x37\n\x06pieces\x18\x01 \x03(\x0b\x32\'.sentencepiece.ModelProto.SentencePiece\x12\x30\n\x0ctrainer_spec\x18\x02 \x01(\x0b\x32\x1a.sentencepiece.TrainerSpec\x12\x36\n\x0fnormalizer_spec\x18\x03 \x01(\x0b\x32\x1d.sentencepiece.NormalizerSpec\x12\x33\n\x0eself_test_data\x18\x04 \x01(\x0b\x32\x1b.sentencepiece.SelfTestData\x12\x38\n\x11\x64\x65normalizer_spec\x18\x05 \x01(\x0b\x32\x1d.sentencepiece.NormalizerSpec\x1a\xd2\x01\n\rSentencePiece\x12\r\n\x05piece\x18\x01 \x01(\t\x12\r\n\x05score\x18\x02 \x01(\x02\x12\x42\n\x04type\x18\x03 \x01(\x0e\x32,.sentencepiece.ModelProto.SentencePiece.Type:\x06NORMAL"T\n\x04Type\x12\n\n\x06NORMAL\x10\x01\x12\x0b\n\x07UNKNOWN\x10\x02\x12\x0b\n\x07\x43ONTROL\x10\x03\x12\x10\n\x0cUSER_DEFINED\x10\x04\x12\x08\n\x04\x42YTE\x10\x06\x12\n\n\x06UNUSED\x10\x05*\t\x08\xc8\x01\x10\x80\x80\x80\x80\x02*\t\x08\xc8\x01\x10\x80\x80\x80\x80\x02\x42\x02H\x03', + serialized_pb=( + b'\n\x19sentencepiece_model.proto\x12\rsentencepiece"\xa1\n\n\x0bTrainerSpec\x12\r\n\x05input\x18\x01' + b" \x03(\t\x12\x14\n\x0cinput_format\x18\x07 \x01(\t\x12\x14\n\x0cmodel_prefix\x18\x02" + b" \x01(\t\x12\x41\n\nmodel_type\x18\x03" + b" \x01(\x0e\x32$.sentencepiece.TrainerSpec.ModelType:\x07UNIGRAM\x12\x18\n\nvocab_size\x18\x04" + b" \x01(\x05:\x04\x38\x30\x30\x30\x12\x17\n\x0f\x61\x63\x63\x65pt_language\x18\x05 \x03(\t\x12" + b' \n\x15self_test_sample_size\x18\x06 \x01(\x05:\x01\x30\x12"\n\x12\x63haracter_coverage\x18\n' + b" \x01(\x02:\x06\x30.9995\x12\x1e\n\x13input_sentence_size\x18\x0b" + b" \x01(\x04:\x01\x30\x12$\n\x16shuffle_input_sentence\x18\x13 \x01(\x08:\x04true\x12" + b' \n\x14mining_sentence_size\x18\x0c \x01(\x05\x42\x02\x18\x01\x12"\n\x16training_sentence_size\x18\r' + b" \x01(\x05\x42\x02\x18\x01\x12(\n\x17seed_sentencepiece_size\x18\x0e" + b" \x01(\x05:\x07\x31\x30\x30\x30\x30\x30\x30\x12\x1e\n\x10shrinking_factor\x18\x0f" + b" \x01(\x02:\x04\x30.75\x12!\n\x13max_sentence_length\x18\x12" + b" \x01(\x05:\x04\x34\x31\x39\x32\x12\x17\n\x0bnum_threads\x18\x10" + b" \x01(\x05:\x02\x31\x36\x12\x1d\n\x12num_sub_iterations\x18\x11" + b" \x01(\x05:\x01\x32\x12$\n\x18max_sentencepiece_length\x18\x14" + b" \x01(\x05:\x02\x31\x36\x12%\n\x17split_by_unicode_script\x18\x15" + b" \x01(\x08:\x04true\x12\x1d\n\x0fsplit_by_number\x18\x17" + b" \x01(\x08:\x04true\x12!\n\x13split_by_whitespace\x18\x16" + b" \x01(\x08:\x04true\x12)\n\x1atreat_whitespace_as_suffix\x18\x18" + b" \x01(\x08:\x05\x66\x61lse\x12\x1b\n\x0csplit_digits\x18\x19" + b" \x01(\x08:\x05\x66\x61lse\x12\x17\n\x0f\x63ontrol_symbols\x18\x1e" + b" \x03(\t\x12\x1c\n\x14user_defined_symbols\x18\x1f \x03(\t\x12\x16\n\x0erequired_chars\x18$" + b" \x01(\t\x12\x1c\n\rbyte_fallback\x18# \x01(\x08:\x05\x66\x61lse\x12+\n\x1dvocabulary_output_piece_score\x18" + b' \x01(\x08:\x04true\x12\x1e\n\x10hard_vocab_limit\x18! \x01(\x08:\x04true\x12\x1c\n\ruse_all_vocab\x18"' + b" \x01(\x08:\x05\x66\x61lse\x12\x11\n\x06unk_id\x18( \x01(\x05:\x01\x30\x12\x11\n\x06\x62os_id\x18)" + b" \x01(\x05:\x01\x31\x12\x11\n\x06\x65os_id\x18* \x01(\x05:\x01\x32\x12\x12\n\x06pad_id\x18+" + b" \x01(\x05:\x02-1\x12\x18\n\tunk_piece\x18- \x01(\t:\x05\x12\x16\n\tbos_piece\x18." + b" \x01(\t:\x03\x12\x17\n\teos_piece\x18/ \x01(\t:\x04\x12\x18\n\tpad_piece\x18\x30" + b" \x01(\t:\x05\x12\x1a\n\x0bunk_surface\x18, \x01(\t:\x05 \xe2\x81\x87" + b" \x12+\n\x1ctrain_extremely_large_corpus\x18\x31" + b' \x01(\x08:\x05\x66\x61lse"5\n\tModelType\x12\x0b\n\x07UNIGRAM\x10\x01\x12\x07\n\x03\x42PE\x10\x02\x12\x08\n\x04WORD\x10\x03\x12\x08\n\x04\x43HAR\x10\x04*\t\x08\xc8\x01\x10\x80\x80\x80\x80\x02"\xd1\x01\n\x0eNormalizerSpec\x12\x0c\n\x04name\x18\x01' + b" \x01(\t\x12\x1c\n\x14precompiled_charsmap\x18\x02 \x01(\x0c\x12\x1e\n\x10\x61\x64\x64_dummy_prefix\x18\x03" + b" \x01(\x08:\x04true\x12&\n\x18remove_extra_whitespaces\x18\x04 \x01(\x08:\x04true\x12" + b" \n\x12\x65scape_whitespaces\x18\x05 \x01(\x08:\x04true\x12\x1e\n\x16normalization_rule_tsv\x18\x06" + b' \x01(\t*\t\x08\xc8\x01\x10\x80\x80\x80\x80\x02"y\n\x0cSelfTestData\x12\x33\n\x07samples\x18\x01' + b' \x03(\x0b\x32".sentencepiece.SelfTestData.Sample\x1a)\n\x06Sample\x12\r\n\x05input\x18\x01' + b" \x01(\t\x12\x10\n\x08\x65xpected\x18\x02" + b' \x01(\t*\t\x08\xc8\x01\x10\x80\x80\x80\x80\x02"\xfe\x03\n\nModelProto\x12\x37\n\x06pieces\x18\x01' + b" \x03(\x0b\x32'.sentencepiece.ModelProto.SentencePiece\x12\x30\n\x0ctrainer_spec\x18\x02" + b" \x01(\x0b\x32\x1a.sentencepiece.TrainerSpec\x12\x36\n\x0fnormalizer_spec\x18\x03" + b" \x01(\x0b\x32\x1d.sentencepiece.NormalizerSpec\x12\x33\n\x0eself_test_data\x18\x04" + b" \x01(\x0b\x32\x1b.sentencepiece.SelfTestData\x12\x38\n\x11\x64\x65normalizer_spec\x18\x05" + b" \x01(\x0b\x32\x1d.sentencepiece.NormalizerSpec\x1a\xd2\x01\n\rSentencePiece\x12\r\n\x05piece\x18\x01" + b" \x01(\t\x12\r\n\x05score\x18\x02 \x01(\x02\x12\x42\n\x04type\x18\x03" + b' \x01(\x0e\x32,.sentencepiece.ModelProto.SentencePiece.Type:\x06NORMAL"T\n\x04Type\x12\n\n\x06NORMAL\x10\x01\x12\x0b\n\x07UNKNOWN\x10\x02\x12\x0b\n\x07\x43ONTROL\x10\x03\x12\x10\n\x0cUSER_DEFINED\x10\x04\x12\x08\n\x04\x42YTE\x10\x06\x12\n\n\x06UNUSED\x10\x05*\t\x08\xc8\x01\x10\x80\x80\x80\x80\x02*\t\x08\xc8\x01\x10\x80\x80\x80\x80\x02\x42\x02H\x03' + ), ) diff --git a/src/transformers/utils/versions.py b/src/transformers/utils/versions.py index 26a160f1fd..14db9b55e5 100644 --- a/src/transformers/utils/versions.py +++ b/src/transformers/utils/versions.py @@ -77,7 +77,8 @@ def require_version(requirement: str, hint: Optional[str] = None) -> None: match = re.findall(r"^([^!=<>\s]+)([\s!=<>]{1,2}.+)", requirement) if not match: raise ValueError( - f"requirement needs to be in the pip package format, .e.g., package_a==1.23, or package_b>=1.23, but got {requirement}" + "requirement needs to be in the pip package format, .e.g., package_a==1.23, or package_b>=1.23, but" + f" got {requirement}" ) pkg, want_full = match[0] want_range = want_full.split(",") # there could be multiple requirements @@ -86,7 +87,8 @@ def require_version(requirement: str, hint: Optional[str] = None) -> None: match = re.findall(r"^([\s!=<>]{1,2})(.+)", w) if not match: raise ValueError( - f"requirement needs to be in the pip package format, .e.g., package_a==1.23, or package_b>=1.23, but got {requirement}" + "requirement needs to be in the pip package format, .e.g., package_a==1.23, or package_b>=1.23," + f" but got {requirement}" ) op, want_ver = match[0] wanted[op] = want_ver diff --git a/tests/extended/test_trainer_ext.py b/tests/extended/test_trainer_ext.py index 3d88ebda45..c555e0381e 100644 --- a/tests/extended/test_trainer_ext.py +++ b/tests/extended/test_trainer_ext.py @@ -278,7 +278,8 @@ class TestTrainerExt(TestCasePlus): self.assertGreater( gpu_total_mem_diff_bytes, bnb_saved_bytes * 0.8, # add a safety margin, if it saved slightly less - f"BNB should have saved about {bnb_saved_bytes} bytes, but the saved bytes were {gpu_total_mem_diff_bytes}", + f"BNB should have saved about {bnb_saved_bytes} bytes, but the saved bytes were" + f" {gpu_total_mem_diff_bytes}", ) def run_trainer( diff --git a/tests/generation/test_generation_beam_search.py b/tests/generation/test_generation_beam_search.py index 3971dcc79c..7ca4ac9b08 100644 --- a/tests/generation/test_generation_beam_search.py +++ b/tests/generation/test_generation_beam_search.py @@ -464,7 +464,7 @@ class ConstrainedBeamSearchTester: self.parent.assertNotEqual(sequences[2, -1].item(), self.eos_token_id) # test that the constraint is indeed fulfilled - for (output, constraint) in [(s, c) for s in sequences for c in constraints]: + for output, constraint in [(s, c) for s in sequences for c in constraints]: forced_token_ids = constraint.token_ids if isinstance(forced_token_ids[0], list): # disjunctive case diff --git a/tests/generation/test_generation_utils.py b/tests/generation/test_generation_utils.py index 6006dbe21c..707f1f84d7 100644 --- a/tests/generation/test_generation_utils.py +++ b/tests/generation/test_generation_utils.py @@ -1654,8 +1654,12 @@ class GenerationIntegrationTests(unittest.TestCase): self.assertListEqual( generated_text, [ - "The couple announced the birth of their son, Silas Randall Timberlake, in a statement. Silas was the middle name of Timberlake's maternal grandfather Bill Bomar. Randall is the musician's own middle name, as well as his father's first. It is the first baby for both of them.", - "Justin Timberlake and Jessica Biel have a son. The baby is named Silas Randall Timberlake. It is the first child for both. The couple announced the pregnancy in January. The name Silas is the middle name of Timberlake's maternal grandfather. It's also his own middle name.", + "The couple announced the birth of their son, Silas Randall Timberlake, in a statement. Silas was the" + " middle name of Timberlake's maternal grandfather Bill Bomar. Randall is the musician's own middle" + " name, as well as his father's first. It is the first baby for both of them.", + "Justin Timberlake and Jessica Biel have a son. The baby is named Silas Randall Timberlake. It is the" + " first child for both. The couple announced the pregnancy in January. The name Silas is the middle" + " name of Timberlake's maternal grandfather. It's also his own middle name.", ], ) @@ -2392,7 +2396,8 @@ class GenerationIntegrationTests(unittest.TestCase): self.assertListEqual( generated_text, [ - "The soldiers were not prepared and didn't know how big the big weapons would be, so they scared them off. They had no idea what to do", + "The soldiers were not prepared and didn't know how big the big weapons would be, so they scared them" + " off. They had no idea what to do", ], ) diff --git a/tests/models/bart/test_modeling_bart.py b/tests/models/bart/test_modeling_bart.py index 18fc66a4f5..279204b574 100644 --- a/tests/models/bart/test_modeling_bart.py +++ b/tests/models/bart/test_modeling_bart.py @@ -521,8 +521,47 @@ class FastIntegrationTests(unittest.TestCase): def test_xsum_1_1_generation(self): hf = self.xsum_1_1_model tok = self.tok - ARTICLE = 'The Palestinian Authority officially became the 123rd member of the International Criminal Court on Wednesday, a step that gives the court jurisdiction over alleged crimes in Palestinian territories. The formal accession was marked with a ceremony at The Hague, in the Netherlands, where the court is based. The Palestinians signed the ICC\'s founding Rome Statute in January, when they also accepted its jurisdiction over alleged crimes committed "in the occupied Palestinian territory, including East Jerusalem, since June 13, 2014." Later that month, the ICC opened a preliminary examination into the situation in Palestinian territories, paving the way for possible war crimes investigations against Israelis. As members of the court, Palestinians may be subject to counter-charges as well. Israel and the United States, neither of which is an ICC member, opposed the Palestinians\' efforts to join the body. But Palestinian Foreign Minister Riad al-Malki, speaking at Wednesday\'s ceremony, said it was a move toward greater justice. "As Palestine formally becomes a State Party to the Rome Statute today, the world is also a step closer to ending a long era of impunity and injustice," he said, according to an ICC news release. "Indeed, today brings us closer to our shared goals of justice and peace." Judge Kuniko Ozaki, a vice president of the ICC, said acceding to the treaty was just the first step for the Palestinians. "As the Rome Statute today enters into force for the State of Palestine, Palestine acquires all the rights as well as responsibilities that come with being a State Party to the Statute. These are substantive commitments, which cannot be taken lightly," she said. Rights group Human Rights Watch welcomed the development. "Governments seeking to penalize Palestine for joining the ICC should immediately end their pressure, and countries that support universal acceptance of the court\'s treaty should speak out to welcome its membership," said Balkees Jarrah, international justice counsel for the group. "What\'s objectionable is the attempts to undermine international justice, not Palestine\'s decision to join a treaty to which over 100 countries around the world are members." In January, when the preliminary ICC examination was opened, Israeli Prime Minister Benjamin Netanyahu described it as an outrage, saying the court was overstepping its boundaries. The United States also said it "strongly" disagreed with the court\'s decision. "As we have said repeatedly, we do not believe that Palestine is a state and therefore we do not believe that it is eligible to join the ICC," the State Department said in a statement. It urged the warring sides to resolve their differences through direct negotiations. "We will continue to oppose actions against Israel at the ICC as counterproductive to the cause of peace," it said. But the ICC begs to differ with the definition of a state for its purposes and refers to the territories as "Palestine." While a preliminary examination is not a formal investigation, it allows the court to review evidence and determine whether to investigate suspects on both sides. Prosecutor Fatou Bensouda said her office would "conduct its analysis in full independence and impartiality." The war between Israel and Hamas militants in Gaza last summer left more than 2,000 people dead. The inquiry will include alleged war crimes committed since June. The International Criminal Court was set up in 2002 to prosecute genocide, crimes against humanity and war crimes.' - EXPECTED = " The International Criminal Court (ICC) has announced that it has been announced by the International Criminal court." + ARTICLE = ( + "The Palestinian Authority officially became the 123rd member of the International Criminal Court on" + " Wednesday, a step that gives the court jurisdiction over alleged crimes in Palestinian territories. The" + " formal accession was marked with a ceremony at The Hague, in the Netherlands, where the court is based." + " The Palestinians signed the ICC's founding Rome Statute in January, when they also accepted its" + ' jurisdiction over alleged crimes committed "in the occupied Palestinian territory, including East' + ' Jerusalem, since June 13, 2014." Later that month, the ICC opened a preliminary examination into the' + " situation in Palestinian territories, paving the way for possible war crimes investigations against" + " Israelis. As members of the court, Palestinians may be subject to counter-charges as well. Israel and" + " the United States, neither of which is an ICC member, opposed the Palestinians' efforts to join the" + " body. But Palestinian Foreign Minister Riad al-Malki, speaking at Wednesday's ceremony, said it was a" + ' move toward greater justice. "As Palestine formally becomes a State Party to the Rome Statute today, the' + ' world is also a step closer to ending a long era of impunity and injustice," he said, according to an' + ' ICC news release. "Indeed, today brings us closer to our shared goals of justice and peace." Judge' + " Kuniko Ozaki, a vice president of the ICC, said acceding to the treaty was just the first step for the" + ' Palestinians. "As the Rome Statute today enters into force for the State of Palestine, Palestine' + " acquires all the rights as well as responsibilities that come with being a State Party to the Statute." + ' These are substantive commitments, which cannot be taken lightly," she said. Rights group Human Rights' + ' Watch welcomed the development. "Governments seeking to penalize Palestine for joining the ICC should' + " immediately end their pressure, and countries that support universal acceptance of the court's treaty" + ' should speak out to welcome its membership," said Balkees Jarrah, international justice counsel for the' + " group. \"What's objectionable is the attempts to undermine international justice, not Palestine's" + ' decision to join a treaty to which over 100 countries around the world are members." In January, when' + " the preliminary ICC examination was opened, Israeli Prime Minister Benjamin Netanyahu described it as an" + ' outrage, saying the court was overstepping its boundaries. The United States also said it "strongly"' + " disagreed with the court's decision. \"As we have said repeatedly, we do not believe that Palestine is a" + ' state and therefore we do not believe that it is eligible to join the ICC," the State Department said in' + ' a statement. It urged the warring sides to resolve their differences through direct negotiations. "We' + ' will continue to oppose actions against Israel at the ICC as counterproductive to the cause of peace,"' + " it said. But the ICC begs to differ with the definition of a state for its purposes and refers to the" + ' territories as "Palestine." While a preliminary examination is not a formal investigation, it allows the' + " court to review evidence and determine whether to investigate suspects on both sides. Prosecutor Fatou" + ' Bensouda said her office would "conduct its analysis in full independence and impartiality." The war' + " between Israel and Hamas militants in Gaza last summer left more than 2,000 people dead. The inquiry" + " will include alleged war crimes committed since June. The International Criminal Court was set up in" + " 2002 to prosecute genocide, crimes against humanity and war crimes." + ) + EXPECTED = ( + " The International Criminal Court (ICC) has announced that it has been announced by the International" + " Criminal court." + ) dct = tok(ARTICLE, return_tensors="pt") generated_ids = hf.generate(**dct, num_beams=4) @@ -534,8 +573,116 @@ class FastIntegrationTests(unittest.TestCase): batch = self.tok( [ - 'The Palestinian Authority officially became the 123rd member of the International Criminal Court on Wednesday, a step that gives the court jurisdiction over alleged crimes in Palestinian territories. The formal accession was marked with a ceremony at The Hague, in the Netherlands, where the court is based. The Palestinians signed the ICC\'s founding Rome Statute in January, when they also accepted its jurisdiction over alleged crimes committed "in the occupied Palestinian territory, including East Jerusalem, since June 13, 2014." Later that month, the ICC opened a preliminary examination into the situation in Palestinian territories, paving the way for possible war crimes investigations against Israelis. As members of the court, Palestinians may be subject to counter-charges as well. Israel and the United States, neither of which is an ICC member, opposed the Palestinians\' efforts to join the body. But Palestinian Foreign Minister Riad al-Malki, speaking at Wednesday\'s ceremony, said it was a move toward greater justice. "As Palestine formally becomes a State Party to the Rome Statute today, the world is also a step closer to ending a long era of impunity and injustice," he said, according to an ICC news release. "Indeed, today brings us closer to our shared goals of justice and peace." Judge Kuniko Ozaki, a vice president of the ICC, said acceding to the treaty was just the first step for the Palestinians. "As the Rome Statute today enters into force for the State of Palestine, Palestine acquires all the rights as well as responsibilities that come with being a State Party to the Statute. These are substantive commitments, which cannot be taken lightly," she said. Rights group Human Rights Watch welcomed the development. "Governments seeking to penalize Palestine for joining the ICC should immediately end their pressure, and countries that support universal acceptance of the court\'s treaty should speak out to welcome its membership," said Balkees Jarrah, international justice counsel for the group. "What\'s objectionable is the attempts to undermine international justice, not Palestine\'s decision to join a treaty to which over 100 countries around the world are members." In January, when the preliminary ICC examination was opened, Israeli Prime Minister Benjamin Netanyahu described it as an outrage, saying the court was overstepping its boundaries. The United States also said it "strongly" disagreed with the court\'s decision. "As we have said repeatedly, we do not believe that Palestine is a state and therefore we do not believe that it is eligible to join the ICC," the State Department said in a statement. It urged the warring sides to resolve their differences through direct negotiations. "We will continue to oppose actions against Israel at the ICC as counterproductive to the cause of peace," it said. But the ICC begs to differ with the definition of a state for its purposes and refers to the territories as "Palestine." While a preliminary examination is not a formal investigation, it allows the court to review evidence and determine whether to investigate suspects on both sides. Prosecutor Fatou Bensouda said her office would "conduct its analysis in full independence and impartiality." The war between Israel and Hamas militants in Gaza last summer left more than 2,000 people dead. The inquiry will include alleged war crimes committed since June. The International Criminal Court was set up in 2002 to prosecute genocide, crimes against humanity and war crimes.', - 'The French prosecutor leading an investigation into the crash of Germanwings Flight 9525 insisted Wednesday that he was not aware of any video footage from on board the plane. Marseille prosecutor Brice Robin told CNN that "so far no videos were used in the crash investigation." He added, "A person who has such a video needs to immediately give it to the investigators." Robin\'s comments follow claims by two magazines, German daily Bild and French Paris Match, of a cell phone video showing the harrowing final seconds from on board Germanwings Flight 9525 as it crashed into the French Alps. All 150 on board were killed. Paris Match and Bild reported that the video was recovered from a phone at the wreckage site. The two publications described the supposed video, but did not post it on their websites. The publications said that they watched the video, which was found by a source close to the investigation. "One can hear cries of \'My God\' in several languages," Paris Match reported. "Metallic banging can also be heard more than three times, perhaps of the pilot trying to open the cockpit door with a heavy object. Towards the end, after a heavy shake, stronger than the others, the screaming intensifies. Then nothing." "It is a very disturbing scene," said Julian Reichelt, editor-in-chief of Bild online. An official with France\'s accident investigation agency, the BEA, said the agency is not aware of any such video. Lt. Col. Jean-Marc Menichini, a French Gendarmerie spokesman in charge of communications on rescue efforts around the Germanwings crash site, told CNN that the reports were "completely wrong" and "unwarranted." Cell phones have been collected at the site, he said, but that they "hadn\'t been exploited yet." Menichini said he believed the cell phones would need to be sent to the Criminal Research Institute in Rosny sous-Bois, near Paris, in order to be analyzed by specialized technicians working hand-in-hand with investigators. But none of the cell phones found so far have been sent to the institute, Menichini said. Asked whether staff involved in the search could have leaked a memory card to the media, Menichini answered with a categorical "no." Reichelt told "Erin Burnett: Outfront" that he had watched the video and stood by the report, saying Bild and Paris Match are "very confident" that the clip is real. He noted that investigators only revealed they\'d recovered cell phones from the crash site after Bild and Paris Match published their reports. "That is something we did not know before. ... Overall we can say many things of the investigation weren\'t revealed by the investigation at the beginning," he said. What was mental state of Germanwings co-pilot? German airline Lufthansa confirmed Tuesday that co-pilot Andreas Lubitz had battled depression years before he took the controls of Germanwings Flight 9525, which he\'s accused of deliberately crashing last week in the French Alps. Lubitz told his Lufthansa flight training school in 2009 that he had a "previous episode of severe depression," the airline said Tuesday. Email correspondence between Lubitz and the school discovered in an internal investigation, Lufthansa said, included medical documents he submitted in connection with resuming his flight training. The announcement indicates that Lufthansa, the parent company of Germanwings, knew of Lubitz\'s battle with depression, allowed him to continue training and ultimately put him in the cockpit. Lufthansa, whose CEO Carsten Spohr previously said Lubitz was 100% fit to fly, described its statement Tuesday as a "swift and seamless clarification" and said it was sharing the information and documents -- including training and medical records -- with public prosecutors. Spohr traveled to the crash site Wednesday, where recovery teams have been working for the past week to recover human remains and plane debris scattered across a steep mountainside. He saw the crisis center set up in Seyne-les-Alpes, laid a wreath in the village of Le Vernet, closer to the crash site, where grieving families have left flowers at a simple stone memorial. Menichini told CNN late Tuesday that no visible human remains were left at the site but recovery teams would keep searching. French President Francois Hollande, speaking Tuesday, said that it should be possible to identify all the victims using DNA analysis by the end of the week, sooner than authorities had previously suggested. In the meantime, the recovery of the victims\' personal belongings will start Wednesday, Menichini said. Among those personal belongings could be more cell phones belonging to the 144 passengers and six crew on board. Check out the latest from our correspondents . The details about Lubitz\'s correspondence with the flight school during his training were among several developments as investigators continued to delve into what caused the crash and Lubitz\'s possible motive for downing the jet. A Lufthansa spokesperson told CNN on Tuesday that Lubitz had a valid medical certificate, had passed all his examinations and "held all the licenses required." Earlier, a spokesman for the prosecutor\'s office in Dusseldorf, Christoph Kumpa, said medical records reveal Lubitz suffered from suicidal tendencies at some point before his aviation career and underwent psychotherapy before he got his pilot\'s license. Kumpa emphasized there\'s no evidence suggesting Lubitz was suicidal or acting aggressively before the crash. Investigators are looking into whether Lubitz feared his medical condition would cause him to lose his pilot\'s license, a European government official briefed on the investigation told CNN on Tuesday. While flying was "a big part of his life," the source said, it\'s only one theory being considered. Another source, a law enforcement official briefed on the investigation, also told CNN that authorities believe the primary motive for Lubitz to bring down the plane was that he feared he would not be allowed to fly because of his medical problems. Lubitz\'s girlfriend told investigators he had seen an eye doctor and a neuropsychologist, both of whom deemed him unfit to work recently and concluded he had psychological issues, the European government official said. But no matter what details emerge about his previous mental health struggles, there\'s more to the story, said Brian Russell, a forensic psychologist. "Psychology can explain why somebody would turn rage inward on themselves about the fact that maybe they weren\'t going to keep doing their job and they\'re upset about that and so they\'re suicidal," he said. "But there is no mental illness that explains why somebody then feels entitled to also take that rage and turn it outward on 149 other people who had nothing to do with the person\'s problems." Germanwings crash compensation: What we know . Who was the captain of Germanwings Flight 9525? CNN\'s Margot Haddad reported from Marseille and Pamela Brown from Dusseldorf, while Laura Smith-Spark wrote from London. CNN\'s Frederik Pleitgen, Pamela Boykoff, Antonia Mortensen, Sandrine Amiel and Anna-Maja Rappard contributed to this report.', + "The Palestinian Authority officially became the 123rd member of the International Criminal Court on" + " Wednesday, a step that gives the court jurisdiction over alleged crimes in Palestinian territories." + " The formal accession was marked with a ceremony at The Hague, in the Netherlands, where the court is" + " based. The Palestinians signed the ICC's founding Rome Statute in January, when they also accepted" + ' its jurisdiction over alleged crimes committed "in the occupied Palestinian territory, including' + ' East Jerusalem, since June 13, 2014." Later that month, the ICC opened a preliminary examination' + " into the situation in Palestinian territories, paving the way for possible war crimes investigations" + " against Israelis. As members of the court, Palestinians may be subject to counter-charges as well." + " Israel and the United States, neither of which is an ICC member, opposed the Palestinians' efforts" + " to join the body. But Palestinian Foreign Minister Riad al-Malki, speaking at Wednesday's ceremony," + ' said it was a move toward greater justice. "As Palestine formally becomes a State Party to the Rome' + ' Statute today, the world is also a step closer to ending a long era of impunity and injustice," he' + ' said, according to an ICC news release. "Indeed, today brings us closer to our shared goals of' + ' justice and peace." Judge Kuniko Ozaki, a vice president of the ICC, said acceding to the treaty was' + ' just the first step for the Palestinians. "As the Rome Statute today enters into force for the State' + " of Palestine, Palestine acquires all the rights as well as responsibilities that come with being a" + ' State Party to the Statute. These are substantive commitments, which cannot be taken lightly," she' + ' said. Rights group Human Rights Watch welcomed the development. "Governments seeking to penalize' + " Palestine for joining the ICC should immediately end their pressure, and countries that support" + " universal acceptance of the court's treaty should speak out to welcome its membership,\" said" + " Balkees Jarrah, international justice counsel for the group. \"What's objectionable is the attempts" + " to undermine international justice, not Palestine's decision to join a treaty to which over 100" + ' countries around the world are members." In January, when the preliminary ICC examination was' + " opened, Israeli Prime Minister Benjamin Netanyahu described it as an outrage, saying the court was" + ' overstepping its boundaries. The United States also said it "strongly" disagreed with the court\'s' + ' decision. "As we have said repeatedly, we do not believe that Palestine is a state and therefore we' + ' do not believe that it is eligible to join the ICC," the State Department said in a statement. It' + ' urged the warring sides to resolve their differences through direct negotiations. "We will continue' + ' to oppose actions against Israel at the ICC as counterproductive to the cause of peace," it said.' + " But the ICC begs to differ with the definition of a state for its purposes and refers to the" + ' territories as "Palestine." While a preliminary examination is not a formal investigation, it allows' + " the court to review evidence and determine whether to investigate suspects on both sides. Prosecutor" + ' Fatou Bensouda said her office would "conduct its analysis in full independence and impartiality."' + " The war between Israel and Hamas militants in Gaza last summer left more than 2,000 people dead. The" + " inquiry will include alleged war crimes committed since June. The International Criminal Court was" + " set up in 2002 to prosecute genocide, crimes against humanity and war crimes.", + "The French prosecutor leading an investigation into the crash of Germanwings Flight 9525 insisted" + " Wednesday that he was not aware of any video footage from on board the plane. Marseille prosecutor" + ' Brice Robin told CNN that "so far no videos were used in the crash investigation." He added, "A' + " person who has such a video needs to immediately give it to the investigators.\" Robin's comments" + " follow claims by two magazines, German daily Bild and French Paris Match, of a cell phone video" + " showing the harrowing final seconds from on board Germanwings Flight 9525 as it crashed into the" + " French Alps. All 150 on board were killed. Paris Match and Bild reported that the video was" + " recovered from a phone at the wreckage site. The two publications described the supposed video, but" + " did not post it on their websites. The publications said that they watched the video, which was" + " found by a source close to the investigation. \"One can hear cries of 'My God' in several" + ' languages," Paris Match reported. "Metallic banging can also be heard more than three times, perhaps' + " of the pilot trying to open the cockpit door with a heavy object. Towards the end, after a heavy" + ' shake, stronger than the others, the screaming intensifies. Then nothing." "It is a very disturbing' + " scene,\" said Julian Reichelt, editor-in-chief of Bild online. An official with France's accident" + " investigation agency, the BEA, said the agency is not aware of any such video. Lt. Col. Jean-Marc" + " Menichini, a French Gendarmerie spokesman in charge of communications on rescue efforts around the" + ' Germanwings crash site, told CNN that the reports were "completely wrong" and "unwarranted." Cell' + ' phones have been collected at the site, he said, but that they "hadn\'t been exploited yet."' + " Menichini said he believed the cell phones would need to be sent to the Criminal Research Institute" + " in Rosny sous-Bois, near Paris, in order to be analyzed by specialized technicians working" + " hand-in-hand with investigators. But none of the cell phones found so far have been sent to the" + " institute, Menichini said. Asked whether staff involved in the search could have leaked a memory" + ' card to the media, Menichini answered with a categorical "no." Reichelt told "Erin Burnett:' + ' Outfront" that he had watched the video and stood by the report, saying Bild and Paris Match are' + ' "very confident" that the clip is real. He noted that investigators only revealed they\'d recovered' + ' cell phones from the crash site after Bild and Paris Match published their reports. "That is' + " something we did not know before. ... Overall we can say many things of the investigation weren't" + ' revealed by the investigation at the beginning," he said. What was mental state of Germanwings' + " co-pilot? German airline Lufthansa confirmed Tuesday that co-pilot Andreas Lubitz had battled" + " depression years before he took the controls of Germanwings Flight 9525, which he's accused of" + " deliberately crashing last week in the French Alps. Lubitz told his Lufthansa flight training school" + ' in 2009 that he had a "previous episode of severe depression," the airline said Tuesday. Email' + " correspondence between Lubitz and the school discovered in an internal investigation, Lufthansa" + " said, included medical documents he submitted in connection with resuming his flight training. The" + " announcement indicates that Lufthansa, the parent company of Germanwings, knew of Lubitz's battle" + " with depression, allowed him to continue training and ultimately put him in the cockpit. Lufthansa," + " whose CEO Carsten Spohr previously said Lubitz was 100% fit to fly, described its statement Tuesday" + ' as a "swift and seamless clarification" and said it was sharing the information and documents --' + " including training and medical records -- with public prosecutors. Spohr traveled to the crash site" + " Wednesday, where recovery teams have been working for the past week to recover human remains and" + " plane debris scattered across a steep mountainside. He saw the crisis center set up in" + " Seyne-les-Alpes, laid a wreath in the village of Le Vernet, closer to the crash site, where grieving" + " families have left flowers at a simple stone memorial. Menichini told CNN late Tuesday that no" + " visible human remains were left at the site but recovery teams would keep searching. French" + " President Francois Hollande, speaking Tuesday, said that it should be possible to identify all the" + " victims using DNA analysis by the end of the week, sooner than authorities had previously suggested." + " In the meantime, the recovery of the victims' personal belongings will start Wednesday, Menichini" + " said. Among those personal belongings could be more cell phones belonging to the 144 passengers and" + " six crew on board. Check out the latest from our correspondents . The details about Lubitz's" + " correspondence with the flight school during his training were among several developments as" + " investigators continued to delve into what caused the crash and Lubitz's possible motive for" + " downing the jet. A Lufthansa spokesperson told CNN on Tuesday that Lubitz had a valid medical" + ' certificate, had passed all his examinations and "held all the licenses required." Earlier, a' + " spokesman for the prosecutor's office in Dusseldorf, Christoph Kumpa, said medical records reveal" + " Lubitz suffered from suicidal tendencies at some point before his aviation career and underwent" + " psychotherapy before he got his pilot's license. Kumpa emphasized there's no evidence suggesting" + " Lubitz was suicidal or acting aggressively before the crash. Investigators are looking into whether" + " Lubitz feared his medical condition would cause him to lose his pilot's license, a European" + ' government official briefed on the investigation told CNN on Tuesday. While flying was "a big part' + " of his life,\" the source said, it's only one theory being considered. Another source, a law" + " enforcement official briefed on the investigation, also told CNN that authorities believe the" + " primary motive for Lubitz to bring down the plane was that he feared he would not be allowed to fly" + " because of his medical problems. Lubitz's girlfriend told investigators he had seen an eye doctor" + " and a neuropsychologist, both of whom deemed him unfit to work recently and concluded he had" + " psychological issues, the European government official said. But no matter what details emerge about" + " his previous mental health struggles, there's more to the story, said Brian Russell, a forensic" + ' psychologist. "Psychology can explain why somebody would turn rage inward on themselves about the' + " fact that maybe they weren't going to keep doing their job and they're upset about that and so" + ' they\'re suicidal," he said. "But there is no mental illness that explains why somebody then feels' + " entitled to also take that rage and turn it outward on 149 other people who had nothing to do with" + " the person's problems.\" Germanwings crash compensation: What we know . Who was the captain of" + " Germanwings Flight 9525? CNN's Margot Haddad reported from Marseille and Pamela Brown from" + " Dusseldorf, while Laura Smith-Spark wrote from London. CNN's Frederik Pleitgen, Pamela Boykoff," + " Antonia Mortensen, Sandrine Amiel and Anna-Maja Rappard contributed to this report.", ], return_tensors="pt", padding="longest", @@ -545,11 +692,13 @@ class FastIntegrationTests(unittest.TestCase): result = self.tok.batch_decode(generated_ids, skip_special_tokens=True) assert ( result[0] - == " The International Criminal Court (ICC) has announced that it has been announced by the International Criminal court." + == " The International Criminal Court (ICC) has announced that it has been announced by the International" + " Criminal court." ) assert ( result[1] - == " An investigation into the crash that killed at least 10 people in the French capital has been released by the French police investigating the crash." + == " An investigation into the crash that killed at least 10 people in the French capital has been" + " released by the French police investigating the crash." ) def test_encoder_equiv(self): @@ -557,8 +706,116 @@ class FastIntegrationTests(unittest.TestCase): batch = self.tok( [ - 'The Palestinian Authority officially became the 123rd member of the International Criminal Court on Wednesday, a step that gives the court jurisdiction over alleged crimes in Palestinian territories. The formal accession was marked with a ceremony at The Hague, in the Netherlands, where the court is based. The Palestinians signed the ICC\'s founding Rome Statute in January, when they also accepted its jurisdiction over alleged crimes committed "in the occupied Palestinian territory, including East Jerusalem, since June 13, 2014." Later that month, the ICC opened a preliminary examination into the situation in Palestinian territories, paving the way for possible war crimes investigations against Israelis. As members of the court, Palestinians may be subject to counter-charges as well. Israel and the United States, neither of which is an ICC member, opposed the Palestinians\' efforts to join the body. But Palestinian Foreign Minister Riad al-Malki, speaking at Wednesday\'s ceremony, said it was a move toward greater justice. "As Palestine formally becomes a State Party to the Rome Statute today, the world is also a step closer to ending a long era of impunity and injustice," he said, according to an ICC news release. "Indeed, today brings us closer to our shared goals of justice and peace." Judge Kuniko Ozaki, a vice president of the ICC, said acceding to the treaty was just the first step for the Palestinians. "As the Rome Statute today enters into force for the State of Palestine, Palestine acquires all the rights as well as responsibilities that come with being a State Party to the Statute. These are substantive commitments, which cannot be taken lightly," she said. Rights group Human Rights Watch welcomed the development. "Governments seeking to penalize Palestine for joining the ICC should immediately end their pressure, and countries that support universal acceptance of the court\'s treaty should speak out to welcome its membership," said Balkees Jarrah, international justice counsel for the group. "What\'s objectionable is the attempts to undermine international justice, not Palestine\'s decision to join a treaty to which over 100 countries around the world are members." In January, when the preliminary ICC examination was opened, Israeli Prime Minister Benjamin Netanyahu described it as an outrage, saying the court was overstepping its boundaries. The United States also said it "strongly" disagreed with the court\'s decision. "As we have said repeatedly, we do not believe that Palestine is a state and therefore we do not believe that it is eligible to join the ICC," the State Department said in a statement. It urged the warring sides to resolve their differences through direct negotiations. "We will continue to oppose actions against Israel at the ICC as counterproductive to the cause of peace," it said. But the ICC begs to differ with the definition of a state for its purposes and refers to the territories as "Palestine." While a preliminary examination is not a formal investigation, it allows the court to review evidence and determine whether to investigate suspects on both sides. Prosecutor Fatou Bensouda said her office would "conduct its analysis in full independence and impartiality." The war between Israel and Hamas militants in Gaza last summer left more than 2,000 people dead. The inquiry will include alleged war crimes committed since June. The International Criminal Court was set up in 2002 to prosecute genocide, crimes against humanity and war crimes.', - 'The French prosecutor leading an investigation into the crash of Germanwings Flight 9525 insisted Wednesday that he was not aware of any video footage from on board the plane. Marseille prosecutor Brice Robin told CNN that "so far no videos were used in the crash investigation." He added, "A person who has such a video needs to immediately give it to the investigators." Robin\'s comments follow claims by two magazines, German daily Bild and French Paris Match, of a cell phone video showing the harrowing final seconds from on board Germanwings Flight 9525 as it crashed into the French Alps. All 150 on board were killed. Paris Match and Bild reported that the video was recovered from a phone at the wreckage site. The two publications described the supposed video, but did not post it on their websites. The publications said that they watched the video, which was found by a source close to the investigation. "One can hear cries of \'My God\' in several languages," Paris Match reported. "Metallic banging can also be heard more than three times, perhaps of the pilot trying to open the cockpit door with a heavy object. Towards the end, after a heavy shake, stronger than the others, the screaming intensifies. Then nothing." "It is a very disturbing scene," said Julian Reichelt, editor-in-chief of Bild online. An official with France\'s accident investigation agency, the BEA, said the agency is not aware of any such video. Lt. Col. Jean-Marc Menichini, a French Gendarmerie spokesman in charge of communications on rescue efforts around the Germanwings crash site, told CNN that the reports were "completely wrong" and "unwarranted." Cell phones have been collected at the site, he said, but that they "hadn\'t been exploited yet." Menichini said he believed the cell phones would need to be sent to the Criminal Research Institute in Rosny sous-Bois, near Paris, in order to be analyzed by specialized technicians working hand-in-hand with investigators. But none of the cell phones found so far have been sent to the institute, Menichini said. Asked whether staff involved in the search could have leaked a memory card to the media, Menichini answered with a categorical "no." Reichelt told "Erin Burnett: Outfront" that he had watched the video and stood by the report, saying Bild and Paris Match are "very confident" that the clip is real. He noted that investigators only revealed they\'d recovered cell phones from the crash site after Bild and Paris Match published their reports. "That is something we did not know before. ... Overall we can say many things of the investigation weren\'t revealed by the investigation at the beginning," he said. What was mental state of Germanwings co-pilot? German airline Lufthansa confirmed Tuesday that co-pilot Andreas Lubitz had battled depression years before he took the controls of Germanwings Flight 9525, which he\'s accused of deliberately crashing last week in the French Alps. Lubitz told his Lufthansa flight training school in 2009 that he had a "previous episode of severe depression," the airline said Tuesday. Email correspondence between Lubitz and the school discovered in an internal investigation, Lufthansa said, included medical documents he submitted in connection with resuming his flight training. The announcement indicates that Lufthansa, the parent company of Germanwings, knew of Lubitz\'s battle with depression, allowed him to continue training and ultimately put him in the cockpit. Lufthansa, whose CEO Carsten Spohr previously said Lubitz was 100% fit to fly, described its statement Tuesday as a "swift and seamless clarification" and said it was sharing the information and documents -- including training and medical records -- with public prosecutors. Spohr traveled to the crash site Wednesday, where recovery teams have been working for the past week to recover human remains and plane debris scattered across a steep mountainside. He saw the crisis center set up in Seyne-les-Alpes, laid a wreath in the village of Le Vernet, closer to the crash site, where grieving families have left flowers at a simple stone memorial. Menichini told CNN late Tuesday that no visible human remains were left at the site but recovery teams would keep searching. French President Francois Hollande, speaking Tuesday, said that it should be possible to identify all the victims using DNA analysis by the end of the week, sooner than authorities had previously suggested. In the meantime, the recovery of the victims\' personal belongings will start Wednesday, Menichini said. Among those personal belongings could be more cell phones belonging to the 144 passengers and six crew on board. Check out the latest from our correspondents . The details about Lubitz\'s correspondence with the flight school during his training were among several developments as investigators continued to delve into what caused the crash and Lubitz\'s possible motive for downing the jet. A Lufthansa spokesperson told CNN on Tuesday that Lubitz had a valid medical certificate, had passed all his examinations and "held all the licenses required." Earlier, a spokesman for the prosecutor\'s office in Dusseldorf, Christoph Kumpa, said medical records reveal Lubitz suffered from suicidal tendencies at some point before his aviation career and underwent psychotherapy before he got his pilot\'s license. Kumpa emphasized there\'s no evidence suggesting Lubitz was suicidal or acting aggressively before the crash. Investigators are looking into whether Lubitz feared his medical condition would cause him to lose his pilot\'s license, a European government official briefed on the investigation told CNN on Tuesday. While flying was "a big part of his life," the source said, it\'s only one theory being considered. Another source, a law enforcement official briefed on the investigation, also told CNN that authorities believe the primary motive for Lubitz to bring down the plane was that he feared he would not be allowed to fly because of his medical problems. Lubitz\'s girlfriend told investigators he had seen an eye doctor and a neuropsychologist, both of whom deemed him unfit to work recently and concluded he had psychological issues, the European government official said. But no matter what details emerge about his previous mental health struggles, there\'s more to the story, said Brian Russell, a forensic psychologist. "Psychology can explain why somebody would turn rage inward on themselves about the fact that maybe they weren\'t going to keep doing their job and they\'re upset about that and so they\'re suicidal," he said. "But there is no mental illness that explains why somebody then feels entitled to also take that rage and turn it outward on 149 other people who had nothing to do with the person\'s problems." Germanwings crash compensation: What we know . Who was the captain of Germanwings Flight 9525? CNN\'s Margot Haddad reported from Marseille and Pamela Brown from Dusseldorf, while Laura Smith-Spark wrote from London. CNN\'s Frederik Pleitgen, Pamela Boykoff, Antonia Mortensen, Sandrine Amiel and Anna-Maja Rappard contributed to this report.', + "The Palestinian Authority officially became the 123rd member of the International Criminal Court on" + " Wednesday, a step that gives the court jurisdiction over alleged crimes in Palestinian territories." + " The formal accession was marked with a ceremony at The Hague, in the Netherlands, where the court is" + " based. The Palestinians signed the ICC's founding Rome Statute in January, when they also accepted" + ' its jurisdiction over alleged crimes committed "in the occupied Palestinian territory, including' + ' East Jerusalem, since June 13, 2014." Later that month, the ICC opened a preliminary examination' + " into the situation in Palestinian territories, paving the way for possible war crimes investigations" + " against Israelis. As members of the court, Palestinians may be subject to counter-charges as well." + " Israel and the United States, neither of which is an ICC member, opposed the Palestinians' efforts" + " to join the body. But Palestinian Foreign Minister Riad al-Malki, speaking at Wednesday's ceremony," + ' said it was a move toward greater justice. "As Palestine formally becomes a State Party to the Rome' + ' Statute today, the world is also a step closer to ending a long era of impunity and injustice," he' + ' said, according to an ICC news release. "Indeed, today brings us closer to our shared goals of' + ' justice and peace." Judge Kuniko Ozaki, a vice president of the ICC, said acceding to the treaty was' + ' just the first step for the Palestinians. "As the Rome Statute today enters into force for the State' + " of Palestine, Palestine acquires all the rights as well as responsibilities that come with being a" + ' State Party to the Statute. These are substantive commitments, which cannot be taken lightly," she' + ' said. Rights group Human Rights Watch welcomed the development. "Governments seeking to penalize' + " Palestine for joining the ICC should immediately end their pressure, and countries that support" + " universal acceptance of the court's treaty should speak out to welcome its membership,\" said" + " Balkees Jarrah, international justice counsel for the group. \"What's objectionable is the attempts" + " to undermine international justice, not Palestine's decision to join a treaty to which over 100" + ' countries around the world are members." In January, when the preliminary ICC examination was' + " opened, Israeli Prime Minister Benjamin Netanyahu described it as an outrage, saying the court was" + ' overstepping its boundaries. The United States also said it "strongly" disagreed with the court\'s' + ' decision. "As we have said repeatedly, we do not believe that Palestine is a state and therefore we' + ' do not believe that it is eligible to join the ICC," the State Department said in a statement. It' + ' urged the warring sides to resolve their differences through direct negotiations. "We will continue' + ' to oppose actions against Israel at the ICC as counterproductive to the cause of peace," it said.' + " But the ICC begs to differ with the definition of a state for its purposes and refers to the" + ' territories as "Palestine." While a preliminary examination is not a formal investigation, it allows' + " the court to review evidence and determine whether to investigate suspects on both sides. Prosecutor" + ' Fatou Bensouda said her office would "conduct its analysis in full independence and impartiality."' + " The war between Israel and Hamas militants in Gaza last summer left more than 2,000 people dead. The" + " inquiry will include alleged war crimes committed since June. The International Criminal Court was" + " set up in 2002 to prosecute genocide, crimes against humanity and war crimes.", + "The French prosecutor leading an investigation into the crash of Germanwings Flight 9525 insisted" + " Wednesday that he was not aware of any video footage from on board the plane. Marseille prosecutor" + ' Brice Robin told CNN that "so far no videos were used in the crash investigation." He added, "A' + " person who has such a video needs to immediately give it to the investigators.\" Robin's comments" + " follow claims by two magazines, German daily Bild and French Paris Match, of a cell phone video" + " showing the harrowing final seconds from on board Germanwings Flight 9525 as it crashed into the" + " French Alps. All 150 on board were killed. Paris Match and Bild reported that the video was" + " recovered from a phone at the wreckage site. The two publications described the supposed video, but" + " did not post it on their websites. The publications said that they watched the video, which was" + " found by a source close to the investigation. \"One can hear cries of 'My God' in several" + ' languages," Paris Match reported. "Metallic banging can also be heard more than three times, perhaps' + " of the pilot trying to open the cockpit door with a heavy object. Towards the end, after a heavy" + ' shake, stronger than the others, the screaming intensifies. Then nothing." "It is a very disturbing' + " scene,\" said Julian Reichelt, editor-in-chief of Bild online. An official with France's accident" + " investigation agency, the BEA, said the agency is not aware of any such video. Lt. Col. Jean-Marc" + " Menichini, a French Gendarmerie spokesman in charge of communications on rescue efforts around the" + ' Germanwings crash site, told CNN that the reports were "completely wrong" and "unwarranted." Cell' + ' phones have been collected at the site, he said, but that they "hadn\'t been exploited yet."' + " Menichini said he believed the cell phones would need to be sent to the Criminal Research Institute" + " in Rosny sous-Bois, near Paris, in order to be analyzed by specialized technicians working" + " hand-in-hand with investigators. But none of the cell phones found so far have been sent to the" + " institute, Menichini said. Asked whether staff involved in the search could have leaked a memory" + ' card to the media, Menichini answered with a categorical "no." Reichelt told "Erin Burnett:' + ' Outfront" that he had watched the video and stood by the report, saying Bild and Paris Match are' + ' "very confident" that the clip is real. He noted that investigators only revealed they\'d recovered' + ' cell phones from the crash site after Bild and Paris Match published their reports. "That is' + " something we did not know before. ... Overall we can say many things of the investigation weren't" + ' revealed by the investigation at the beginning," he said. What was mental state of Germanwings' + " co-pilot? German airline Lufthansa confirmed Tuesday that co-pilot Andreas Lubitz had battled" + " depression years before he took the controls of Germanwings Flight 9525, which he's accused of" + " deliberately crashing last week in the French Alps. Lubitz told his Lufthansa flight training school" + ' in 2009 that he had a "previous episode of severe depression," the airline said Tuesday. Email' + " correspondence between Lubitz and the school discovered in an internal investigation, Lufthansa" + " said, included medical documents he submitted in connection with resuming his flight training. The" + " announcement indicates that Lufthansa, the parent company of Germanwings, knew of Lubitz's battle" + " with depression, allowed him to continue training and ultimately put him in the cockpit. Lufthansa," + " whose CEO Carsten Spohr previously said Lubitz was 100% fit to fly, described its statement Tuesday" + ' as a "swift and seamless clarification" and said it was sharing the information and documents --' + " including training and medical records -- with public prosecutors. Spohr traveled to the crash site" + " Wednesday, where recovery teams have been working for the past week to recover human remains and" + " plane debris scattered across a steep mountainside. He saw the crisis center set up in" + " Seyne-les-Alpes, laid a wreath in the village of Le Vernet, closer to the crash site, where grieving" + " families have left flowers at a simple stone memorial. Menichini told CNN late Tuesday that no" + " visible human remains were left at the site but recovery teams would keep searching. French" + " President Francois Hollande, speaking Tuesday, said that it should be possible to identify all the" + " victims using DNA analysis by the end of the week, sooner than authorities had previously suggested." + " In the meantime, the recovery of the victims' personal belongings will start Wednesday, Menichini" + " said. Among those personal belongings could be more cell phones belonging to the 144 passengers and" + " six crew on board. Check out the latest from our correspondents . The details about Lubitz's" + " correspondence with the flight school during his training were among several developments as" + " investigators continued to delve into what caused the crash and Lubitz's possible motive for" + " downing the jet. A Lufthansa spokesperson told CNN on Tuesday that Lubitz had a valid medical" + ' certificate, had passed all his examinations and "held all the licenses required." Earlier, a' + " spokesman for the prosecutor's office in Dusseldorf, Christoph Kumpa, said medical records reveal" + " Lubitz suffered from suicidal tendencies at some point before his aviation career and underwent" + " psychotherapy before he got his pilot's license. Kumpa emphasized there's no evidence suggesting" + " Lubitz was suicidal or acting aggressively before the crash. Investigators are looking into whether" + " Lubitz feared his medical condition would cause him to lose his pilot's license, a European" + ' government official briefed on the investigation told CNN on Tuesday. While flying was "a big part' + " of his life,\" the source said, it's only one theory being considered. Another source, a law" + " enforcement official briefed on the investigation, also told CNN that authorities believe the" + " primary motive for Lubitz to bring down the plane was that he feared he would not be allowed to fly" + " because of his medical problems. Lubitz's girlfriend told investigators he had seen an eye doctor" + " and a neuropsychologist, both of whom deemed him unfit to work recently and concluded he had" + " psychological issues, the European government official said. But no matter what details emerge about" + " his previous mental health struggles, there's more to the story, said Brian Russell, a forensic" + ' psychologist. "Psychology can explain why somebody would turn rage inward on themselves about the' + " fact that maybe they weren't going to keep doing their job and they're upset about that and so" + ' they\'re suicidal," he said. "But there is no mental illness that explains why somebody then feels' + " entitled to also take that rage and turn it outward on 149 other people who had nothing to do with" + " the person's problems.\" Germanwings crash compensation: What we know . Who was the captain of" + " Germanwings Flight 9525? CNN's Margot Haddad reported from Marseille and Pamela Brown from" + " Dusseldorf, while Laura Smith-Spark wrote from London. CNN's Frederik Pleitgen, Pamela Boykoff," + " Antonia Mortensen, Sandrine Amiel and Anna-Maja Rappard contributed to this report.", ], return_tensors="pt", padding="longest", @@ -641,7 +898,10 @@ class BartModelIntegrationTests(unittest.TestCase): PGE_ARTICLE = """ PG&E stated it scheduled the blackouts in response to forecasts for high winds amid dry conditions. The aim is to reduce the risk of wildfires. Nearly 800 thousand customers were scheduled to be affected by the shutoffs which were expected to last through at least midday tomorrow.""" - EXPECTED_SUMMARY = "California's largest power company has begun shutting off electricity to thousands of customers in the state." + EXPECTED_SUMMARY = ( + "California's largest power company has begun shutting off electricity to thousands of customers in the" + " state." + ) dct = tok.batch_encode_plus( [PGE_ARTICLE], max_length=1024, @@ -679,14 +939,197 @@ class BartModelIntegrationTests(unittest.TestCase): hf = BartForConditionalGeneration.from_pretrained("facebook/bart-large-cnn").to(torch_device) tok = BartTokenizer.from_pretrained("facebook/bart-large") - FRANCE_ARTICLE = ' Marseille, France (CNN)The French prosecutor leading an investigation into the crash of Germanwings Flight 9525 insisted Wednesday that he was not aware of any video footage from on board the plane. Marseille prosecutor Brice Robin told CNN that "so far no videos were used in the crash investigation." He added, "A person who has such a video needs to immediately give it to the investigators." Robin\'s comments follow claims by two magazines, German daily Bild and French Paris Match, of a cell phone video showing the harrowing final seconds from on board Germanwings Flight 9525 as it crashed into the French Alps. All 150 on board were killed. Paris Match and Bild reported that the video was recovered from a phone at the wreckage site. The two publications described the supposed video, but did not post it on their websites. The publications said that they watched the video, which was found by a source close to the investigation. "One can hear cries of \'My God\' in several languages," Paris Match reported. "Metallic banging can also be heard more than three times, perhaps of the pilot trying to open the cockpit door with a heavy object. Towards the end, after a heavy shake, stronger than the others, the screaming intensifies. Then nothing." "It is a very disturbing scene," said Julian Reichelt, editor-in-chief of Bild online. An official with France\'s accident investigation agency, the BEA, said the agency is not aware of any such video. Lt. Col. Jean-Marc Menichini, a French Gendarmerie spokesman in charge of communications on rescue efforts around the Germanwings crash site, told CNN that the reports were "completely wrong" and "unwarranted." Cell phones have been collected at the site, he said, but that they "hadn\'t been exploited yet." Menichini said he believed the cell phones would need to be sent to the Criminal Research Institute in Rosny sous-Bois, near Paris, in order to be analyzed by specialized technicians working hand-in-hand with investigators. But none of the cell phones found so far have been sent to the institute, Menichini said. Asked whether staff involved in the search could have leaked a memory card to the media, Menichini answered with a categorical "no." Reichelt told "Erin Burnett: Outfront" that he had watched the video and stood by the report, saying Bild and Paris Match are "very confident" that the clip is real. He noted that investigators only revealed they\'d recovered cell phones from the crash site after Bild and Paris Match published their reports. "That is something we did not know before. ... Overall we can say many things of the investigation weren\'t revealed by the investigation at the beginning," he said. What was mental state of Germanwings co-pilot? German airline Lufthansa confirmed Tuesday that co-pilot Andreas Lubitz had battled depression years before he took the controls of Germanwings Flight 9525, which he\'s accused of deliberately crashing last week in the French Alps. Lubitz told his Lufthansa flight training school in 2009 that he had a "previous episode of severe depression," the airline said Tuesday. Email correspondence between Lubitz and the school discovered in an internal investigation, Lufthansa said, included medical documents he submitted in connection with resuming his flight training. The announcement indicates that Lufthansa, the parent company of Germanwings, knew of Lubitz\'s battle with depression, allowed him to continue training and ultimately put him in the cockpit. Lufthansa, whose CEO Carsten Spohr previously said Lubitz was 100% fit to fly, described its statement Tuesday as a "swift and seamless clarification" and said it was sharing the information and documents -- including training and medical records -- with public prosecutors. Spohr traveled to the crash site Wednesday, where recovery teams have been working for the past week to recover human remains and plane debris scattered across a steep mountainside. He saw the crisis center set up in Seyne-les-Alpes, laid a wreath in the village of Le Vernet, closer to the crash site, where grieving families have left flowers at a simple stone memorial. Menichini told CNN late Tuesday that no visible human remains were left at the site but recovery teams would keep searching. French President Francois Hollande, speaking Tuesday, said that it should be possible to identify all the victims using DNA analysis by the end of the week, sooner than authorities had previously suggested. In the meantime, the recovery of the victims\' personal belongings will start Wednesday, Menichini said. Among those personal belongings could be more cell phones belonging to the 144 passengers and six crew on board. Check out the latest from our correspondents . The details about Lubitz\'s correspondence with the flight school during his training were among several developments as investigators continued to delve into what caused the crash and Lubitz\'s possible motive for downing the jet. A Lufthansa spokesperson told CNN on Tuesday that Lubitz had a valid medical certificate, had passed all his examinations and "held all the licenses required." Earlier, a spokesman for the prosecutor\'s office in Dusseldorf, Christoph Kumpa, said medical records reveal Lubitz suffered from suicidal tendencies at some point before his aviation career and underwent psychotherapy before he got his pilot\'s license. Kumpa emphasized there\'s no evidence suggesting Lubitz was suicidal or acting aggressively before the crash. Investigators are looking into whether Lubitz feared his medical condition would cause him to lose his pilot\'s license, a European government official briefed on the investigation told CNN on Tuesday. While flying was "a big part of his life," the source said, it\'s only one theory being considered. Another source, a law enforcement official briefed on the investigation, also told CNN that authorities believe the primary motive for Lubitz to bring down the plane was that he feared he would not be allowed to fly because of his medical problems. Lubitz\'s girlfriend told investigators he had seen an eye doctor and a neuropsychologist, both of whom deemed him unfit to work recently and concluded he had psychological issues, the European government official said. But no matter what details emerge about his previous mental health struggles, there\'s more to the story, said Brian Russell, a forensic psychologist. "Psychology can explain why somebody would turn rage inward on themselves about the fact that maybe they weren\'t going to keep doing their job and they\'re upset about that and so they\'re suicidal," he said. "But there is no mental illness that explains why somebody then feels entitled to also take that rage and turn it outward on 149 other people who had nothing to do with the person\'s problems." Germanwings crash compensation: What we know . Who was the captain of Germanwings Flight 9525? CNN\'s Margot Haddad reported from Marseille and Pamela Brown from Dusseldorf, while Laura Smith-Spark wrote from London. CNN\'s Frederik Pleitgen, Pamela Boykoff, Antonia Mortensen, Sandrine Amiel and Anna-Maja Rappard contributed to this report.' # @noq + FRANCE_ARTICLE = ( # @noq + " Marseille, France (CNN)The French prosecutor leading an investigation into the crash of Germanwings" + " Flight 9525 insisted Wednesday that he was not aware of any video footage from on board the plane." + ' Marseille prosecutor Brice Robin told CNN that "so far no videos were used in the crash investigation."' + ' He added, "A person who has such a video needs to immediately give it to the investigators." Robin\'s' + " comments follow claims by two magazines, German daily Bild and French Paris Match, of a cell phone video" + " showing the harrowing final seconds from on board Germanwings Flight 9525 as it crashed into the French" + " Alps. All 150 on board were killed. Paris Match and Bild reported that the video was recovered from a" + " phone at the wreckage site. The two publications described the supposed video, but did not post it on" + " their websites. The publications said that they watched the video, which was found by a source close to" + " the investigation. \"One can hear cries of 'My God' in several languages,\" Paris Match reported." + ' "Metallic banging can also be heard more than three times, perhaps of the pilot trying to open the' + " cockpit door with a heavy object. Towards the end, after a heavy shake, stronger than the others, the" + ' screaming intensifies. Then nothing." "It is a very disturbing scene," said Julian Reichelt,' + " editor-in-chief of Bild online. An official with France's accident investigation agency, the BEA, said" + " the agency is not aware of any such video. Lt. Col. Jean-Marc Menichini, a French Gendarmerie spokesman" + " in charge of communications on rescue efforts around the Germanwings crash site, told CNN that the" + ' reports were "completely wrong" and "unwarranted." Cell phones have been collected at the site, he said,' + ' but that they "hadn\'t been exploited yet." Menichini said he believed the cell phones would need to be' + " sent to the Criminal Research Institute in Rosny sous-Bois, near Paris, in order to be analyzed by" + " specialized technicians working hand-in-hand with investigators. But none of the cell phones found so" + " far have been sent to the institute, Menichini said. Asked whether staff involved in the search could" + ' have leaked a memory card to the media, Menichini answered with a categorical "no." Reichelt told "Erin' + ' Burnett: Outfront" that he had watched the video and stood by the report, saying Bild and Paris Match' + ' are "very confident" that the clip is real. He noted that investigators only revealed they\'d recovered' + ' cell phones from the crash site after Bild and Paris Match published their reports. "That is something' + " we did not know before. ... Overall we can say many things of the investigation weren't revealed by the" + ' investigation at the beginning," he said. What was mental state of Germanwings co-pilot? German airline' + " Lufthansa confirmed Tuesday that co-pilot Andreas Lubitz had battled depression years before he took the" + " controls of Germanwings Flight 9525, which he's accused of deliberately crashing last week in the" + ' French Alps. Lubitz told his Lufthansa flight training school in 2009 that he had a "previous episode of' + ' severe depression," the airline said Tuesday. Email correspondence between Lubitz and the school' + " discovered in an internal investigation, Lufthansa said, included medical documents he submitted in" + " connection with resuming his flight training. The announcement indicates that Lufthansa, the parent" + " company of Germanwings, knew of Lubitz's battle with depression, allowed him to continue training and" + " ultimately put him in the cockpit. Lufthansa, whose CEO Carsten Spohr previously said Lubitz was 100%" + ' fit to fly, described its statement Tuesday as a "swift and seamless clarification" and said it was' + " sharing the information and documents -- including training and medical records -- with public" + " prosecutors. Spohr traveled to the crash site Wednesday, where recovery teams have been working for the" + " past week to recover human remains and plane debris scattered across a steep mountainside. He saw the" + " crisis center set up in Seyne-les-Alpes, laid a wreath in the village of Le Vernet, closer to the crash" + " site, where grieving families have left flowers at a simple stone memorial. Menichini told CNN late" + " Tuesday that no visible human remains were left at the site but recovery teams would keep searching." + " French President Francois Hollande, speaking Tuesday, said that it should be possible to identify all" + " the victims using DNA analysis by the end of the week, sooner than authorities had previously suggested." + " In the meantime, the recovery of the victims' personal belongings will start Wednesday, Menichini said." + " Among those personal belongings could be more cell phones belonging to the 144 passengers and six crew" + " on board. Check out the latest from our correspondents . The details about Lubitz's correspondence with" + " the flight school during his training were among several developments as investigators continued to" + " delve into what caused the crash and Lubitz's possible motive for downing the jet. A Lufthansa" + " spokesperson told CNN on Tuesday that Lubitz had a valid medical certificate, had passed all his" + ' examinations and "held all the licenses required." Earlier, a spokesman for the prosecutor\'s office in' + " Dusseldorf, Christoph Kumpa, said medical records reveal Lubitz suffered from suicidal tendencies at" + " some point before his aviation career and underwent psychotherapy before he got his pilot's license." + " Kumpa emphasized there's no evidence suggesting Lubitz was suicidal or acting aggressively before the" + " crash. Investigators are looking into whether Lubitz feared his medical condition would cause him to" + " lose his pilot's license, a European government official briefed on the investigation told CNN on" + ' Tuesday. While flying was "a big part of his life," the source said, it\'s only one theory being' + " considered. Another source, a law enforcement official briefed on the investigation, also told CNN that" + " authorities believe the primary motive for Lubitz to bring down the plane was that he feared he would" + " not be allowed to fly because of his medical problems. Lubitz's girlfriend told investigators he had" + " seen an eye doctor and a neuropsychologist, both of whom deemed him unfit to work recently and concluded" + " he had psychological issues, the European government official said. But no matter what details emerge" + " about his previous mental health struggles, there's more to the story, said Brian Russell, a forensic" + ' psychologist. "Psychology can explain why somebody would turn rage inward on themselves about the fact' + " that maybe they weren't going to keep doing their job and they're upset about that and so they're" + ' suicidal," he said. "But there is no mental illness that explains why somebody then feels entitled to' + " also take that rage and turn it outward on 149 other people who had nothing to do with the person's" + ' problems." Germanwings crash compensation: What we know . Who was the captain of Germanwings Flight' + " 9525? CNN's Margot Haddad reported from Marseille and Pamela Brown from Dusseldorf, while Laura" + " Smith-Spark wrote from London. CNN's Frederik Pleitgen, Pamela Boykoff, Antonia Mortensen, Sandrine" + " Amiel and Anna-Maja Rappard contributed to this report." + ) - SHORTER_ARTICLE = ' (CNN)The Palestinian Authority officially became the 123rd member of the International Criminal Court on Wednesday, a step that gives the court jurisdiction over alleged crimes in Palestinian territories. The formal accession was marked with a ceremony at The Hague, in the Netherlands, where the court is based. The Palestinians signed the ICC\'s founding Rome Statute in January, when they also accepted its jurisdiction over alleged crimes committed "in the occupied Palestinian territory, including East Jerusalem, since June 13, 2014." Later that month, the ICC opened a preliminary examination into the situation in Palestinian territories, paving the way for possible war crimes investigations against Israelis. As members of the court, Palestinians may be subject to counter-charges as well. Israel and the United States, neither of which is an ICC member, opposed the Palestinians\' efforts to join the body. But Palestinian Foreign Minister Riad al-Malki, speaking at Wednesday\'s ceremony, said it was a move toward greater justice. "As Palestine formally becomes a State Party to the Rome Statute today, the world is also a step closer to ending a long era of impunity and injustice," he said, according to an ICC news release. "Indeed, today brings us closer to our shared goals of justice and peace." Judge Kuniko Ozaki, a vice president of the ICC, said acceding to the treaty was just the first step for the Palestinians. "As the Rome Statute today enters into force for the State of Palestine, Palestine acquires all the rights as well as responsibilities that come with being a State Party to the Statute. These are substantive commitments, which cannot be taken lightly," she said. Rights group Human Rights Watch welcomed the development. "Governments seeking to penalize Palestine for joining the ICC should immediately end their pressure, and countries that support universal acceptance of the court\'s treaty should speak out to welcome its membership," said Balkees Jarrah, international justice counsel for the group. "What\'s objectionable is the attempts to undermine international justice, not Palestine\'s decision to join a treaty to which over 100 countries around the world are members." In January, when the preliminary ICC examination was opened, Israeli Prime Minister Benjamin Netanyahu described it as an outrage, saying the court was overstepping its boundaries. The United States also said it "strongly" disagreed with the court\'s decision. "As we have said repeatedly, we do not believe that Palestine is a state and therefore we do not believe that it is eligible to join the ICC," the State Department said in a statement. It urged the warring sides to resolve their differences through direct negotiations. "We will continue to oppose actions against Israel at the ICC as counterproductive to the cause of peace," it said. But the ICC begs to differ with the definition of a state for its purposes and refers to the territories as "Palestine." While a preliminary examination is not a formal investigation, it allows the court to review evidence and determine whether to investigate suspects on both sides. Prosecutor Fatou Bensouda said her office would "conduct its analysis in full independence and impartiality." The war between Israel and Hamas militants in Gaza last summer left more than 2,000 people dead. The inquiry will include alleged war crimes committed since June. The International Criminal Court was set up in 2002 to prosecute genocide, crimes against humanity and war crimes. CNN\'s Vasco Cotovio, Kareem Khadder and Faith Karimi contributed to this report.' + SHORTER_ARTICLE = ( + " (CNN)The Palestinian Authority officially became the 123rd member of the International Criminal Court on" + " Wednesday, a step that gives the court jurisdiction over alleged crimes in Palestinian territories. The" + " formal accession was marked with a ceremony at The Hague, in the Netherlands, where the court is based." + " The Palestinians signed the ICC's founding Rome Statute in January, when they also accepted its" + ' jurisdiction over alleged crimes committed "in the occupied Palestinian territory, including East' + ' Jerusalem, since June 13, 2014." Later that month, the ICC opened a preliminary examination into the' + " situation in Palestinian territories, paving the way for possible war crimes investigations against" + " Israelis. As members of the court, Palestinians may be subject to counter-charges as well. Israel and" + " the United States, neither of which is an ICC member, opposed the Palestinians' efforts to join the" + " body. But Palestinian Foreign Minister Riad al-Malki, speaking at Wednesday's ceremony, said it was a" + ' move toward greater justice. "As Palestine formally becomes a State Party to the Rome Statute today, the' + ' world is also a step closer to ending a long era of impunity and injustice," he said, according to an' + ' ICC news release. "Indeed, today brings us closer to our shared goals of justice and peace." Judge' + " Kuniko Ozaki, a vice president of the ICC, said acceding to the treaty was just the first step for the" + ' Palestinians. "As the Rome Statute today enters into force for the State of Palestine, Palestine' + " acquires all the rights as well as responsibilities that come with being a State Party to the Statute." + ' These are substantive commitments, which cannot be taken lightly," she said. Rights group Human Rights' + ' Watch welcomed the development. "Governments seeking to penalize Palestine for joining the ICC should' + " immediately end their pressure, and countries that support universal acceptance of the court's treaty" + ' should speak out to welcome its membership," said Balkees Jarrah, international justice counsel for the' + " group. \"What's objectionable is the attempts to undermine international justice, not Palestine's" + ' decision to join a treaty to which over 100 countries around the world are members." In January, when' + " the preliminary ICC examination was opened, Israeli Prime Minister Benjamin Netanyahu described it as an" + ' outrage, saying the court was overstepping its boundaries. The United States also said it "strongly"' + " disagreed with the court's decision. \"As we have said repeatedly, we do not believe that Palestine is a" + ' state and therefore we do not believe that it is eligible to join the ICC," the State Department said in' + ' a statement. It urged the warring sides to resolve their differences through direct negotiations. "We' + ' will continue to oppose actions against Israel at the ICC as counterproductive to the cause of peace,"' + " it said. But the ICC begs to differ with the definition of a state for its purposes and refers to the" + ' territories as "Palestine." While a preliminary examination is not a formal investigation, it allows the' + " court to review evidence and determine whether to investigate suspects on both sides. Prosecutor Fatou" + ' Bensouda said her office would "conduct its analysis in full independence and impartiality." The war' + " between Israel and Hamas militants in Gaza last summer left more than 2,000 people dead. The inquiry" + " will include alleged war crimes committed since June. The International Criminal Court was set up in" + " 2002 to prosecute genocide, crimes against humanity and war crimes. CNN's Vasco Cotovio, Kareem Khadder" + " and Faith Karimi contributed to this report." + ) # The below article tests that we don't add any hypotheses outside of the top n_beams - IRAN_ARTICLE = " (CNN)The United States and its negotiating partners reached a very strong framework agreement with Iran in Lausanne, Switzerland, on Thursday that limits Iran's nuclear program in such a way as to effectively block it from building a nuclear weapon. Expect pushback anyway, if the recent past is any harbinger. Just last month, in an attempt to head off such an agreement, House Speaker John Boehner invited Israeli Prime Minister Benjamin Netanyahu to preemptively blast it before Congress, and 47 senators sent a letter to the Iranian leadership warning them away from a deal. The debate that has already begun since the announcement of the new framework will likely result in more heat than light. It will not be helped by the gathering swirl of dubious assumptions and doubtful assertions. Let us address some of these: . The most misleading assertion, despite universal rejection by experts, is that the negotiations' objective at the outset was the total elimination of any nuclear program in Iran. That is the position of Netanyahu and his acolytes in the U.S. Congress. But that is not and never was the objective. If it had been, there would have been no Iranian team at the negotiating table. Rather, the objective has always been to structure an agreement or series of agreements so that Iran could not covertly develop a nuclear arsenal before the United States and its allies could respond. The new framework has exceeded expectations in achieving that goal. It would reduce Iran's low-enriched uranium stockpile, cut by two-thirds its number of installed centrifuges and implement a rigorous inspection regime. Another dubious assumption of opponents is that the Iranian nuclear program is a covert weapons program. Despite sharp accusations by some in the United States and its allies, Iran denies having such a program, and U.S. intelligence contends that Iran has not yet made the decision to build a nuclear weapon. Iran's continued cooperation with International Atomic Energy Agency inspections is further evidence on this point, and we'll know even more about Iran's program in the coming months and years because of the deal. In fact, the inspections provisions that are part of this agreement are designed to protect against any covert action by the Iranians. What's more, the rhetoric of some members of Congress has implied that the negotiations have been between only the United States and Iran (i.e., the 47 senators' letter warning that a deal might be killed by Congress or a future president). This of course is not the case. The talks were between Iran and the five permanent members of the U.N. Security Council (United States, United Kingdom, France, China and Russia) plus Germany, dubbed the P5+1. While the United States has played a leading role in the effort, it negotiated the terms alongside its partners. If the agreement reached by the P5+1 is rejected by Congress, it could result in an unraveling of the sanctions on Iran and threaten NATO cohesion in other areas. Another questionable assertion is that this agreement contains a sunset clause, after which Iran will be free to do as it pleases. Again, this is not the case. Some of the restrictions on Iran's nuclear activities, such as uranium enrichment, will be eased or eliminated over time, as long as 15 years. But most importantly, the framework agreement includes Iran's ratification of the Additional Protocol, which allows IAEA inspectors expanded access to nuclear sites both declared and nondeclared. This provision will be permanent. It does not sunset. Thus, going forward, if Iran decides to enrich uranium to weapons-grade levels, monitors will be able to detect such a move in a matter of days and alert the U.N. Security Council. Many in Congress have said that the agreement should be a formal treaty requiring the Senate to \"advise and consent.\" But the issue is not suited for a treaty. Treaties impose equivalent obligations on all signatories. For example, the New START treaty limits Russia and the United States to 1,550 deployed strategic warheads. But any agreement with Iran will not be so balanced. The restrictions and obligations in the final framework agreement will be imposed almost exclusively on Iran. The P5+1 are obligated only to ease and eventually remove most but not all economic sanctions, which were imposed as leverage to gain this final deal. Finally some insist that any agreement must address Iranian missile programs, human rights violations or support for Hamas or Hezbollah. As important as these issues are, and they must indeed be addressed, they are unrelated to the most important aim of a nuclear deal: preventing a nuclear Iran. To include them in the negotiations would be a poison pill. This agreement should be judged on its merits and on how it affects the security of our negotiating partners and allies, including Israel. Those judgments should be fact-based, not based on questionable assertions or dubious assumptions." + IRAN_ARTICLE = ( + " (CNN)The United States and its negotiating partners reached a very strong framework agreement with Iran" + " in Lausanne, Switzerland, on Thursday that limits Iran's nuclear program in such a way as to effectively" + " block it from building a nuclear weapon. Expect pushback anyway, if the recent past is any harbinger." + " Just last month, in an attempt to head off such an agreement, House Speaker John Boehner invited Israeli" + " Prime Minister Benjamin Netanyahu to preemptively blast it before Congress, and 47 senators sent a" + " letter to the Iranian leadership warning them away from a deal. The debate that has already begun since" + " the announcement of the new framework will likely result in more heat than light. It will not be helped" + " by the gathering swirl of dubious assumptions and doubtful assertions. Let us address some of these: ." + " The most misleading assertion, despite universal rejection by experts, is that the negotiations'" + " objective at the outset was the total elimination of any nuclear program in Iran. That is the position" + " of Netanyahu and his acolytes in the U.S. Congress. But that is not and never was the objective. If it" + " had been, there would have been no Iranian team at the negotiating table. Rather, the objective has" + " always been to structure an agreement or series of agreements so that Iran could not covertly develop a" + " nuclear arsenal before the United States and its allies could respond. The new framework has exceeded" + " expectations in achieving that goal. It would reduce Iran's low-enriched uranium stockpile, cut by" + " two-thirds its number of installed centrifuges and implement a rigorous inspection regime. Another" + " dubious assumption of opponents is that the Iranian nuclear program is a covert weapons program. Despite" + " sharp accusations by some in the United States and its allies, Iran denies having such a program, and" + " U.S. intelligence contends that Iran has not yet made the decision to build a nuclear weapon. Iran's" + " continued cooperation with International Atomic Energy Agency inspections is further evidence on this" + " point, and we'll know even more about Iran's program in the coming months and years because of the deal." + " In fact, the inspections provisions that are part of this agreement are designed to protect against any" + " covert action by the Iranians. What's more, the rhetoric of some members of Congress has implied that" + " the negotiations have been between only the United States and Iran (i.e., the 47 senators' letter" + " warning that a deal might be killed by Congress or a future president). This of course is not the case." + " The talks were between Iran and the five permanent members of the U.N. Security Council (United States," + " United Kingdom, France, China and Russia) plus Germany, dubbed the P5+1. While the United States has" + " played a leading role in the effort, it negotiated the terms alongside its partners. If the agreement" + " reached by the P5+1 is rejected by Congress, it could result in an unraveling of the sanctions on Iran" + " and threaten NATO cohesion in other areas. Another questionable assertion is that this agreement" + " contains a sunset clause, after which Iran will be free to do as it pleases. Again, this is not the" + " case. Some of the restrictions on Iran's nuclear activities, such as uranium enrichment, will be eased" + " or eliminated over time, as long as 15 years. But most importantly, the framework agreement includes" + " Iran's ratification of the Additional Protocol, which allows IAEA inspectors expanded access to nuclear" + " sites both declared and nondeclared. This provision will be permanent. It does not sunset. Thus, going" + " forward, if Iran decides to enrich uranium to weapons-grade levels, monitors will be able to detect such" + " a move in a matter of days and alert the U.N. Security Council. Many in Congress have said that the" + ' agreement should be a formal treaty requiring the Senate to "advise and consent." But the issue is not' + " suited for a treaty. Treaties impose equivalent obligations on all signatories. For example, the New" + " START treaty limits Russia and the United States to 1,550 deployed strategic warheads. But any agreement" + " with Iran will not be so balanced. The restrictions and obligations in the final framework agreement" + " will be imposed almost exclusively on Iran. The P5+1 are obligated only to ease and eventually remove" + " most but not all economic sanctions, which were imposed as leverage to gain this final deal. Finally" + " some insist that any agreement must address Iranian missile programs, human rights violations or support" + " for Hamas or Hezbollah. As important as these issues are, and they must indeed be addressed, they are" + " unrelated to the most important aim of a nuclear deal: preventing a nuclear Iran. To include them in" + " the negotiations would be a poison pill. This agreement should be judged on its merits and on how it" + " affects the security of our negotiating partners and allies, including Israel. Those judgments should be" + " fact-based, not based on questionable assertions or dubious assumptions." + ) - ARTICLE_SUBWAY = ' New York (CNN)When Liana Barrientos was 23 years old, she got married in Westchester County, New York. A year later, she got married again in Westchester County, but to a different man and without divorcing her first husband. Only 18 days after that marriage, she got hitched yet again. Then, Barrientos declared "I do" five more times, sometimes only within two weeks of each other. In 2010, she married once more, this time in the Bronx. In an application for a marriage license, she stated it was her "first and only" marriage. Barrientos, now 39, is facing two criminal counts of "offering a false instrument for filing in the first degree," referring to her false statements on the 2010 marriage license application, according to court documents. Prosecutors said the marriages were part of an immigration scam. On Friday, she pleaded not guilty at State Supreme Court in the Bronx, according to her attorney, Christopher Wright, who declined to comment further. After leaving court, Barrientos was arrested and charged with theft of service and criminal trespass for allegedly sneaking into the New York subway through an emergency exit, said Detective Annette Markowski, a police spokeswoman. In total, Barrientos has been married 10 times, with nine of her marriages occurring between 1999 and 2002. All occurred either in Westchester County, Long Island, New Jersey or the Bronx. She is believed to still be married to four men, and at one time, she was married to eight men at once, prosecutors say. Prosecutors said the immigration scam involved some of her husbands, who filed for permanent residence status shortly after the marriages. Any divorces happened only after such filings were approved. It was unclear whether any of the men will be prosecuted. The case was referred to the Bronx District Attorney\'s Office by Immigration and Customs Enforcement and the Department of Homeland Security\'s Investigation Division. Seven of the men are from so-called "red-flagged" countries, including Egypt, Turkey, Georgia, Pakistan and Mali. Her eighth husband, Rashid Rajput, was deported in 2006 to his native Pakistan after an investigation by the Joint Terrorism Task Force. If convicted, Barrientos faces up to four years in prison. Her next court appearance is scheduled for May 18.' + ARTICLE_SUBWAY = ( + " New York (CNN)When Liana Barrientos was 23 years old, she got married in Westchester County, New York. A" + " year later, she got married again in Westchester County, but to a different man and without divorcing" + " her first husband. Only 18 days after that marriage, she got hitched yet again. Then, Barrientos" + ' declared "I do" five more times, sometimes only within two weeks of each other. In 2010, she married' + " once more, this time in the Bronx. In an application for a marriage license, she stated it was her" + ' "first and only" marriage. Barrientos, now 39, is facing two criminal counts of "offering a false' + ' instrument for filing in the first degree," referring to her false statements on the 2010 marriage' + " license application, according to court documents. Prosecutors said the marriages were part of an" + " immigration scam. On Friday, she pleaded not guilty at State Supreme Court in the Bronx, according to" + " her attorney, Christopher Wright, who declined to comment further. After leaving court, Barrientos was" + " arrested and charged with theft of service and criminal trespass for allegedly sneaking into the New" + " York subway through an emergency exit, said Detective Annette Markowski, a police spokeswoman. In total," + " Barrientos has been married 10 times, with nine of her marriages occurring between 1999 and 2002. All" + " occurred either in Westchester County, Long Island, New Jersey or the Bronx. She is believed to still be" + " married to four men, and at one time, she was married to eight men at once, prosecutors say. Prosecutors" + " said the immigration scam involved some of her husbands, who filed for permanent residence status" + " shortly after the marriages. Any divorces happened only after such filings were approved. It was" + " unclear whether any of the men will be prosecuted. The case was referred to the Bronx District" + " Attorney's Office by Immigration and Customs Enforcement and the Department of Homeland Security's" + ' Investigation Division. Seven of the men are from so-called "red-flagged" countries, including Egypt,' + " Turkey, Georgia, Pakistan and Mali. Her eighth husband, Rashid Rajput, was deported in 2006 to his" + " native Pakistan after an investigation by the Joint Terrorism Task Force. If convicted, Barrientos faces" + " up to four years in prison. Her next court appearance is scheduled for May 18." + ) dct = tok.batch_encode_plus( [FRANCE_ARTICLE, SHORTER_ARTICLE, IRAN_ARTICLE, ARTICLE_SUBWAY], diff --git a/tests/models/bart/test_modeling_flax_bart.py b/tests/models/bart/test_modeling_flax_bart.py index ef4f9d3852..54a6ff4534 100644 --- a/tests/models/bart/test_modeling_flax_bart.py +++ b/tests/models/bart/test_modeling_flax_bart.py @@ -420,7 +420,10 @@ class FlaxBartModelTest(FlaxModelTesterMixin, unittest.TestCase, FlaxGenerationT model = FlaxBartForConditionalGeneration.from_pretrained("sshleifer/distilbart-cnn-6-6") tokenizer = BartTokenizer.from_pretrained("sshleifer/distilbart-cnn-6-6") - input_str = "This sentence is made of three parts. Each part is important on its own. One part is about animals, the other part about planes, and the last part about housing." + input_str = ( + "This sentence is made of three parts. Each part is important on its own. One part is about animals, the" + " other part about planes, and the last part about housing." + ) input_ids = tokenizer(input_str, return_tensors="np").input_ids sequences = model.generate(input_ids, num_beams=2, max_length=20).sequences @@ -436,14 +439,197 @@ class FlaxBartModelTest(FlaxModelTesterMixin, unittest.TestCase, FlaxGenerationT model = FlaxBartForConditionalGeneration.from_pretrained("facebook/bart-large-cnn") tokenizer = BartTokenizer.from_pretrained("facebook/bart-large-cnn") - FRANCE_ARTICLE = ' Marseille, France (CNN)The French prosecutor leading an investigation into the crash of Germanwings Flight 9525 insisted Wednesday that he was not aware of any video footage from on board the plane. Marseille prosecutor Brice Robin told CNN that "so far no videos were used in the crash investigation." He added, "A person who has such a video needs to immediately give it to the investigators." Robin\'s comments follow claims by two magazines, German daily Bild and French Paris Match, of a cell phone video showing the harrowing final seconds from on board Germanwings Flight 9525 as it crashed into the French Alps. All 150 on board were killed. Paris Match and Bild reported that the video was recovered from a phone at the wreckage site. The two publications described the supposed video, but did not post it on their websites. The publications said that they watched the video, which was found by a source close to the investigation. "One can hear cries of \'My God\' in several languages," Paris Match reported. "Metallic banging can also be heard more than three times, perhaps of the pilot trying to open the cockpit door with a heavy object. Towards the end, after a heavy shake, stronger than the others, the screaming intensifies. Then nothing." "It is a very disturbing scene," said Julian Reichelt, editor-in-chief of Bild online. An official with France\'s accident investigation agency, the BEA, said the agency is not aware of any such video. Lt. Col. Jean-Marc Menichini, a French Gendarmerie spokesman in charge of communications on rescue efforts around the Germanwings crash site, told CNN that the reports were "completely wrong" and "unwarranted." Cell phones have been collected at the site, he said, but that they "hadn\'t been exploited yet." Menichini said he believed the cell phones would need to be sent to the Criminal Research Institute in Rosny sous-Bois, near Paris, in order to be analyzed by specialized technicians working hand-in-hand with investigators. But none of the cell phones found so far have been sent to the institute, Menichini said. Asked whether staff involved in the search could have leaked a memory card to the media, Menichini answered with a categorical "no." Reichelt told "Erin Burnett: Outfront" that he had watched the video and stood by the report, saying Bild and Paris Match are "very confident" that the clip is real. He noted that investigators only revealed they\'d recovered cell phones from the crash site after Bild and Paris Match published their reports. "That is something we did not know before. ... Overall we can say many things of the investigation weren\'t revealed by the investigation at the beginning," he said. What was mental state of Germanwings co-pilot? German airline Lufthansa confirmed Tuesday that co-pilot Andreas Lubitz had battled depression years before he took the controls of Germanwings Flight 9525, which he\'s accused of deliberately crashing last week in the French Alps. Lubitz told his Lufthansa flight training school in 2009 that he had a "previous episode of severe depression," the airline said Tuesday. Email correspondence between Lubitz and the school discovered in an internal investigation, Lufthansa said, included medical documents he submitted in connection with resuming his flight training. The announcement indicates that Lufthansa, the parent company of Germanwings, knew of Lubitz\'s battle with depression, allowed him to continue training and ultimately put him in the cockpit. Lufthansa, whose CEO Carsten Spohr previously said Lubitz was 100% fit to fly, described its statement Tuesday as a "swift and seamless clarification" and said it was sharing the information and documents -- including training and medical records -- with public prosecutors. Spohr traveled to the crash site Wednesday, where recovery teams have been working for the past week to recover human remains and plane debris scattered across a steep mountainside. He saw the crisis center set up in Seyne-les-Alpes, laid a wreath in the village of Le Vernet, closer to the crash site, where grieving families have left flowers at a simple stone memorial. Menichini told CNN late Tuesday that no visible human remains were left at the site but recovery teams would keep searching. French President Francois Hollande, speaking Tuesday, said that it should be possible to identify all the victims using DNA analysis by the end of the week, sooner than authorities had previously suggested. In the meantime, the recovery of the victims\' personal belongings will start Wednesday, Menichini said. Among those personal belongings could be more cell phones belonging to the 144 passengers and six crew on board. Check out the latest from our correspondents . The details about Lubitz\'s correspondence with the flight school during his training were among several developments as investigators continued to delve into what caused the crash and Lubitz\'s possible motive for downing the jet. A Lufthansa spokesperson told CNN on Tuesday that Lubitz had a valid medical certificate, had passed all his examinations and "held all the licenses required." Earlier, a spokesman for the prosecutor\'s office in Dusseldorf, Christoph Kumpa, said medical records reveal Lubitz suffered from suicidal tendencies at some point before his aviation career and underwent psychotherapy before he got his pilot\'s license. Kumpa emphasized there\'s no evidence suggesting Lubitz was suicidal or acting aggressively before the crash. Investigators are looking into whether Lubitz feared his medical condition would cause him to lose his pilot\'s license, a European government official briefed on the investigation told CNN on Tuesday. While flying was "a big part of his life," the source said, it\'s only one theory being considered. Another source, a law enforcement official briefed on the investigation, also told CNN that authorities believe the primary motive for Lubitz to bring down the plane was that he feared he would not be allowed to fly because of his medical problems. Lubitz\'s girlfriend told investigators he had seen an eye doctor and a neuropsychologist, both of whom deemed him unfit to work recently and concluded he had psychological issues, the European government official said. But no matter what details emerge about his previous mental health struggles, there\'s more to the story, said Brian Russell, a forensic psychologist. "Psychology can explain why somebody would turn rage inward on themselves about the fact that maybe they weren\'t going to keep doing their job and they\'re upset about that and so they\'re suicidal," he said. "But there is no mental illness that explains why somebody then feels entitled to also take that rage and turn it outward on 149 other people who had nothing to do with the person\'s problems." Germanwings crash compensation: What we know . Who was the captain of Germanwings Flight 9525? CNN\'s Margot Haddad reported from Marseille and Pamela Brown from Dusseldorf, while Laura Smith-Spark wrote from London. CNN\'s Frederik Pleitgen, Pamela Boykoff, Antonia Mortensen, Sandrine Amiel and Anna-Maja Rappard contributed to this report.' # @noq + FRANCE_ARTICLE = ( # @noq + " Marseille, France (CNN)The French prosecutor leading an investigation into the crash of Germanwings" + " Flight 9525 insisted Wednesday that he was not aware of any video footage from on board the plane." + ' Marseille prosecutor Brice Robin told CNN that "so far no videos were used in the crash investigation."' + ' He added, "A person who has such a video needs to immediately give it to the investigators." Robin\'s' + " comments follow claims by two magazines, German daily Bild and French Paris Match, of a cell phone video" + " showing the harrowing final seconds from on board Germanwings Flight 9525 as it crashed into the French" + " Alps. All 150 on board were killed. Paris Match and Bild reported that the video was recovered from a" + " phone at the wreckage site. The two publications described the supposed video, but did not post it on" + " their websites. The publications said that they watched the video, which was found by a source close to" + " the investigation. \"One can hear cries of 'My God' in several languages,\" Paris Match reported." + ' "Metallic banging can also be heard more than three times, perhaps of the pilot trying to open the' + " cockpit door with a heavy object. Towards the end, after a heavy shake, stronger than the others, the" + ' screaming intensifies. Then nothing." "It is a very disturbing scene," said Julian Reichelt,' + " editor-in-chief of Bild online. An official with France's accident investigation agency, the BEA, said" + " the agency is not aware of any such video. Lt. Col. Jean-Marc Menichini, a French Gendarmerie spokesman" + " in charge of communications on rescue efforts around the Germanwings crash site, told CNN that the" + ' reports were "completely wrong" and "unwarranted." Cell phones have been collected at the site, he said,' + ' but that they "hadn\'t been exploited yet." Menichini said he believed the cell phones would need to be' + " sent to the Criminal Research Institute in Rosny sous-Bois, near Paris, in order to be analyzed by" + " specialized technicians working hand-in-hand with investigators. But none of the cell phones found so" + " far have been sent to the institute, Menichini said. Asked whether staff involved in the search could" + ' have leaked a memory card to the media, Menichini answered with a categorical "no." Reichelt told "Erin' + ' Burnett: Outfront" that he had watched the video and stood by the report, saying Bild and Paris Match' + ' are "very confident" that the clip is real. He noted that investigators only revealed they\'d recovered' + ' cell phones from the crash site after Bild and Paris Match published their reports. "That is something' + " we did not know before. ... Overall we can say many things of the investigation weren't revealed by the" + ' investigation at the beginning," he said. What was mental state of Germanwings co-pilot? German airline' + " Lufthansa confirmed Tuesday that co-pilot Andreas Lubitz had battled depression years before he took the" + " controls of Germanwings Flight 9525, which he's accused of deliberately crashing last week in the" + ' French Alps. Lubitz told his Lufthansa flight training school in 2009 that he had a "previous episode of' + ' severe depression," the airline said Tuesday. Email correspondence between Lubitz and the school' + " discovered in an internal investigation, Lufthansa said, included medical documents he submitted in" + " connection with resuming his flight training. The announcement indicates that Lufthansa, the parent" + " company of Germanwings, knew of Lubitz's battle with depression, allowed him to continue training and" + " ultimately put him in the cockpit. Lufthansa, whose CEO Carsten Spohr previously said Lubitz was 100%" + ' fit to fly, described its statement Tuesday as a "swift and seamless clarification" and said it was' + " sharing the information and documents -- including training and medical records -- with public" + " prosecutors. Spohr traveled to the crash site Wednesday, where recovery teams have been working for the" + " past week to recover human remains and plane debris scattered across a steep mountainside. He saw the" + " crisis center set up in Seyne-les-Alpes, laid a wreath in the village of Le Vernet, closer to the crash" + " site, where grieving families have left flowers at a simple stone memorial. Menichini told CNN late" + " Tuesday that no visible human remains were left at the site but recovery teams would keep searching." + " French President Francois Hollande, speaking Tuesday, said that it should be possible to identify all" + " the victims using DNA analysis by the end of the week, sooner than authorities had previously suggested." + " In the meantime, the recovery of the victims' personal belongings will start Wednesday, Menichini said." + " Among those personal belongings could be more cell phones belonging to the 144 passengers and six crew" + " on board. Check out the latest from our correspondents . The details about Lubitz's correspondence with" + " the flight school during his training were among several developments as investigators continued to" + " delve into what caused the crash and Lubitz's possible motive for downing the jet. A Lufthansa" + " spokesperson told CNN on Tuesday that Lubitz had a valid medical certificate, had passed all his" + ' examinations and "held all the licenses required." Earlier, a spokesman for the prosecutor\'s office in' + " Dusseldorf, Christoph Kumpa, said medical records reveal Lubitz suffered from suicidal tendencies at" + " some point before his aviation career and underwent psychotherapy before he got his pilot's license." + " Kumpa emphasized there's no evidence suggesting Lubitz was suicidal or acting aggressively before the" + " crash. Investigators are looking into whether Lubitz feared his medical condition would cause him to" + " lose his pilot's license, a European government official briefed on the investigation told CNN on" + ' Tuesday. While flying was "a big part of his life," the source said, it\'s only one theory being' + " considered. Another source, a law enforcement official briefed on the investigation, also told CNN that" + " authorities believe the primary motive for Lubitz to bring down the plane was that he feared he would" + " not be allowed to fly because of his medical problems. Lubitz's girlfriend told investigators he had" + " seen an eye doctor and a neuropsychologist, both of whom deemed him unfit to work recently and concluded" + " he had psychological issues, the European government official said. But no matter what details emerge" + " about his previous mental health struggles, there's more to the story, said Brian Russell, a forensic" + ' psychologist. "Psychology can explain why somebody would turn rage inward on themselves about the fact' + " that maybe they weren't going to keep doing their job and they're upset about that and so they're" + ' suicidal," he said. "But there is no mental illness that explains why somebody then feels entitled to' + " also take that rage and turn it outward on 149 other people who had nothing to do with the person's" + ' problems." Germanwings crash compensation: What we know . Who was the captain of Germanwings Flight' + " 9525? CNN's Margot Haddad reported from Marseille and Pamela Brown from Dusseldorf, while Laura" + " Smith-Spark wrote from London. CNN's Frederik Pleitgen, Pamela Boykoff, Antonia Mortensen, Sandrine" + " Amiel and Anna-Maja Rappard contributed to this report." + ) - SHORTER_ARTICLE = ' (CNN)The Palestinian Authority officially became the 123rd member of the International Criminal Court on Wednesday, a step that gives the court jurisdiction over alleged crimes in Palestinian territories. The formal accession was marked with a ceremony at The Hague, in the Netherlands, where the court is based. The Palestinians signed the ICC\'s founding Rome Statute in January, when they also accepted its jurisdiction over alleged crimes committed "in the occupied Palestinian territory, including East Jerusalem, since June 13, 2014." Later that month, the ICC opened a preliminary examination into the situation in Palestinian territories, paving the way for possible war crimes investigations against Israelis. As members of the court, Palestinians may be subject to counter-charges as well. Israel and the United States, neither of which is an ICC member, opposed the Palestinians\' efforts to join the body. But Palestinian Foreign Minister Riad al-Malki, speaking at Wednesday\'s ceremony, said it was a move toward greater justice. "As Palestine formally becomes a State Party to the Rome Statute today, the world is also a step closer to ending a long era of impunity and injustice," he said, according to an ICC news release. "Indeed, today brings us closer to our shared goals of justice and peace." Judge Kuniko Ozaki, a vice president of the ICC, said acceding to the treaty was just the first step for the Palestinians. "As the Rome Statute today enters into force for the State of Palestine, Palestine acquires all the rights as well as responsibilities that come with being a State Party to the Statute. These are substantive commitments, which cannot be taken lightly," she said. Rights group Human Rights Watch welcomed the development. "Governments seeking to penalize Palestine for joining the ICC should immediately end their pressure, and countries that support universal acceptance of the court\'s treaty should speak out to welcome its membership," said Balkees Jarrah, international justice counsel for the group. "What\'s objectionable is the attempts to undermine international justice, not Palestine\'s decision to join a treaty to which over 100 countries around the world are members." In January, when the preliminary ICC examination was opened, Israeli Prime Minister Benjamin Netanyahu described it as an outrage, saying the court was overstepping its boundaries. The United States also said it "strongly" disagreed with the court\'s decision. "As we have said repeatedly, we do not believe that Palestine is a state and therefore we do not believe that it is eligible to join the ICC," the State Department said in a statement. It urged the warring sides to resolve their differences through direct negotiations. "We will continue to oppose actions against Israel at the ICC as counterproductive to the cause of peace," it said. But the ICC begs to differ with the definition of a state for its purposes and refers to the territories as "Palestine." While a preliminary examination is not a formal investigation, it allows the court to review evidence and determine whether to investigate suspects on both sides. Prosecutor Fatou Bensouda said her office would "conduct its analysis in full independence and impartiality." The war between Israel and Hamas militants in Gaza last summer left more than 2,000 people dead. The inquiry will include alleged war crimes committed since June. The International Criminal Court was set up in 2002 to prosecute genocide, crimes against humanity and war crimes. CNN\'s Vasco Cotovio, Kareem Khadder and Faith Karimi contributed to this report.' + SHORTER_ARTICLE = ( + " (CNN)The Palestinian Authority officially became the 123rd member of the International Criminal Court on" + " Wednesday, a step that gives the court jurisdiction over alleged crimes in Palestinian territories. The" + " formal accession was marked with a ceremony at The Hague, in the Netherlands, where the court is based." + " The Palestinians signed the ICC's founding Rome Statute in January, when they also accepted its" + ' jurisdiction over alleged crimes committed "in the occupied Palestinian territory, including East' + ' Jerusalem, since June 13, 2014." Later that month, the ICC opened a preliminary examination into the' + " situation in Palestinian territories, paving the way for possible war crimes investigations against" + " Israelis. As members of the court, Palestinians may be subject to counter-charges as well. Israel and" + " the United States, neither of which is an ICC member, opposed the Palestinians' efforts to join the" + " body. But Palestinian Foreign Minister Riad al-Malki, speaking at Wednesday's ceremony, said it was a" + ' move toward greater justice. "As Palestine formally becomes a State Party to the Rome Statute today, the' + ' world is also a step closer to ending a long era of impunity and injustice," he said, according to an' + ' ICC news release. "Indeed, today brings us closer to our shared goals of justice and peace." Judge' + " Kuniko Ozaki, a vice president of the ICC, said acceding to the treaty was just the first step for the" + ' Palestinians. "As the Rome Statute today enters into force for the State of Palestine, Palestine' + " acquires all the rights as well as responsibilities that come with being a State Party to the Statute." + ' These are substantive commitments, which cannot be taken lightly," she said. Rights group Human Rights' + ' Watch welcomed the development. "Governments seeking to penalize Palestine for joining the ICC should' + " immediately end their pressure, and countries that support universal acceptance of the court's treaty" + ' should speak out to welcome its membership," said Balkees Jarrah, international justice counsel for the' + " group. \"What's objectionable is the attempts to undermine international justice, not Palestine's" + ' decision to join a treaty to which over 100 countries around the world are members." In January, when' + " the preliminary ICC examination was opened, Israeli Prime Minister Benjamin Netanyahu described it as an" + ' outrage, saying the court was overstepping its boundaries. The United States also said it "strongly"' + " disagreed with the court's decision. \"As we have said repeatedly, we do not believe that Palestine is a" + ' state and therefore we do not believe that it is eligible to join the ICC," the State Department said in' + ' a statement. It urged the warring sides to resolve their differences through direct negotiations. "We' + ' will continue to oppose actions against Israel at the ICC as counterproductive to the cause of peace,"' + " it said. But the ICC begs to differ with the definition of a state for its purposes and refers to the" + ' territories as "Palestine." While a preliminary examination is not a formal investigation, it allows the' + " court to review evidence and determine whether to investigate suspects on both sides. Prosecutor Fatou" + ' Bensouda said her office would "conduct its analysis in full independence and impartiality." The war' + " between Israel and Hamas militants in Gaza last summer left more than 2,000 people dead. The inquiry" + " will include alleged war crimes committed since June. The International Criminal Court was set up in" + " 2002 to prosecute genocide, crimes against humanity and war crimes. CNN's Vasco Cotovio, Kareem Khadder" + " and Faith Karimi contributed to this report." + ) # The below article tests that we don't add any hypotheses outside of the top n_beams - IRAN_ARTICLE = " (CNN)The United States and its negotiating partners reached a very strong framework agreement with Iran in Lausanne, Switzerland, on Thursday that limits Iran's nuclear program in such a way as to effectively block it from building a nuclear weapon. Expect pushback anyway, if the recent past is any harbinger. Just last month, in an attempt to head off such an agreement, House Speaker John Boehner invited Israeli Prime Minister Benjamin Netanyahu to preemptively blast it before Congress, and 47 senators sent a letter to the Iranian leadership warning them away from a deal. The debate that has already begun since the announcement of the new framework will likely result in more heat than light. It will not be helped by the gathering swirl of dubious assumptions and doubtful assertions. Let us address some of these: . The most misleading assertion, despite universal rejection by experts, is that the negotiations' objective at the outset was the total elimination of any nuclear program in Iran. That is the position of Netanyahu and his acolytes in the U.S. Congress. But that is not and never was the objective. If it had been, there would have been no Iranian team at the negotiating table. Rather, the objective has always been to structure an agreement or series of agreements so that Iran could not covertly develop a nuclear arsenal before the United States and its allies could respond. The new framework has exceeded expectations in achieving that goal. It would reduce Iran's low-enriched uranium stockpile, cut by two-thirds its number of installed centrifuges and implement a rigorous inspection regime. Another dubious assumption of opponents is that the Iranian nuclear program is a covert weapons program. Despite sharp accusations by some in the United States and its allies, Iran denies having such a program, and U.S. intelligence contends that Iran has not yet made the decision to build a nuclear weapon. Iran's continued cooperation with International Atomic Energy Agency inspections is further evidence on this point, and we'll know even more about Iran's program in the coming months and years because of the deal. In fact, the inspections provisions that are part of this agreement are designed to protect against any covert action by the Iranians. What's more, the rhetoric of some members of Congress has implied that the negotiations have been between only the United States and Iran (i.e., the 47 senators' letter warning that a deal might be killed by Congress or a future president). This of course is not the case. The talks were between Iran and the five permanent members of the U.N. Security Council (United States, United Kingdom, France, China and Russia) plus Germany, dubbed the P5+1. While the United States has played a leading role in the effort, it negotiated the terms alongside its partners. If the agreement reached by the P5+1 is rejected by Congress, it could result in an unraveling of the sanctions on Iran and threaten NATO cohesion in other areas. Another questionable assertion is that this agreement contains a sunset clause, after which Iran will be free to do as it pleases. Again, this is not the case. Some of the restrictions on Iran's nuclear activities, such as uranium enrichment, will be eased or eliminated over time, as long as 15 years. But most importantly, the framework agreement includes Iran's ratification of the Additional Protocol, which allows IAEA inspectors expanded access to nuclear sites both declared and nondeclared. This provision will be permanent. It does not sunset. Thus, going forward, if Iran decides to enrich uranium to weapons-grade levels, monitors will be able to detect such a move in a matter of days and alert the U.N. Security Council. Many in Congress have said that the agreement should be a formal treaty requiring the Senate to \"advise and consent.\" But the issue is not suited for a treaty. Treaties impose equivalent obligations on all signatories. For example, the New START treaty limits Russia and the United States to 1,550 deployed strategic warheads. But any agreement with Iran will not be so balanced. The restrictions and obligations in the final framework agreement will be imposed almost exclusively on Iran. The P5+1 are obligated only to ease and eventually remove most but not all economic sanctions, which were imposed as leverage to gain this final deal. Finally some insist that any agreement must address Iranian missile programs, human rights violations or support for Hamas or Hezbollah. As important as these issues are, and they must indeed be addressed, they are unrelated to the most important aim of a nuclear deal: preventing a nuclear Iran. To include them in the negotiations would be a poison pill. This agreement should be judged on its merits and on how it affects the security of our negotiating partners and allies, including Israel. Those judgments should be fact-based, not based on questionable assertions or dubious assumptions." + IRAN_ARTICLE = ( + " (CNN)The United States and its negotiating partners reached a very strong framework agreement with Iran" + " in Lausanne, Switzerland, on Thursday that limits Iran's nuclear program in such a way as to effectively" + " block it from building a nuclear weapon. Expect pushback anyway, if the recent past is any harbinger." + " Just last month, in an attempt to head off such an agreement, House Speaker John Boehner invited Israeli" + " Prime Minister Benjamin Netanyahu to preemptively blast it before Congress, and 47 senators sent a" + " letter to the Iranian leadership warning them away from a deal. The debate that has already begun since" + " the announcement of the new framework will likely result in more heat than light. It will not be helped" + " by the gathering swirl of dubious assumptions and doubtful assertions. Let us address some of these: ." + " The most misleading assertion, despite universal rejection by experts, is that the negotiations'" + " objective at the outset was the total elimination of any nuclear program in Iran. That is the position" + " of Netanyahu and his acolytes in the U.S. Congress. But that is not and never was the objective. If it" + " had been, there would have been no Iranian team at the negotiating table. Rather, the objective has" + " always been to structure an agreement or series of agreements so that Iran could not covertly develop a" + " nuclear arsenal before the United States and its allies could respond. The new framework has exceeded" + " expectations in achieving that goal. It would reduce Iran's low-enriched uranium stockpile, cut by" + " two-thirds its number of installed centrifuges and implement a rigorous inspection regime. Another" + " dubious assumption of opponents is that the Iranian nuclear program is a covert weapons program. Despite" + " sharp accusations by some in the United States and its allies, Iran denies having such a program, and" + " U.S. intelligence contends that Iran has not yet made the decision to build a nuclear weapon. Iran's" + " continued cooperation with International Atomic Energy Agency inspections is further evidence on this" + " point, and we'll know even more about Iran's program in the coming months and years because of the deal." + " In fact, the inspections provisions that are part of this agreement are designed to protect against any" + " covert action by the Iranians. What's more, the rhetoric of some members of Congress has implied that" + " the negotiations have been between only the United States and Iran (i.e., the 47 senators' letter" + " warning that a deal might be killed by Congress or a future president). This of course is not the case." + " The talks were between Iran and the five permanent members of the U.N. Security Council (United States," + " United Kingdom, France, China and Russia) plus Germany, dubbed the P5+1. While the United States has" + " played a leading role in the effort, it negotiated the terms alongside its partners. If the agreement" + " reached by the P5+1 is rejected by Congress, it could result in an unraveling of the sanctions on Iran" + " and threaten NATO cohesion in other areas. Another questionable assertion is that this agreement" + " contains a sunset clause, after which Iran will be free to do as it pleases. Again, this is not the" + " case. Some of the restrictions on Iran's nuclear activities, such as uranium enrichment, will be eased" + " or eliminated over time, as long as 15 years. But most importantly, the framework agreement includes" + " Iran's ratification of the Additional Protocol, which allows IAEA inspectors expanded access to nuclear" + " sites both declared and nondeclared. This provision will be permanent. It does not sunset. Thus, going" + " forward, if Iran decides to enrich uranium to weapons-grade levels, monitors will be able to detect such" + " a move in a matter of days and alert the U.N. Security Council. Many in Congress have said that the" + ' agreement should be a formal treaty requiring the Senate to "advise and consent." But the issue is not' + " suited for a treaty. Treaties impose equivalent obligations on all signatories. For example, the New" + " START treaty limits Russia and the United States to 1,550 deployed strategic warheads. But any agreement" + " with Iran will not be so balanced. The restrictions and obligations in the final framework agreement" + " will be imposed almost exclusively on Iran. The P5+1 are obligated only to ease and eventually remove" + " most but not all economic sanctions, which were imposed as leverage to gain this final deal. Finally" + " some insist that any agreement must address Iranian missile programs, human rights violations or support" + " for Hamas or Hezbollah. As important as these issues are, and they must indeed be addressed, they are" + " unrelated to the most important aim of a nuclear deal: preventing a nuclear Iran. To include them in" + " the negotiations would be a poison pill. This agreement should be judged on its merits and on how it" + " affects the security of our negotiating partners and allies, including Israel. Those judgments should be" + " fact-based, not based on questionable assertions or dubious assumptions." + ) - ARTICLE_SUBWAY = ' New York (CNN)When Liana Barrientos was 23 years old, she got married in Westchester County, New York. A year later, she got married again in Westchester County, but to a different man and without divorcing her first husband. Only 18 days after that marriage, she got hitched yet again. Then, Barrientos declared "I do" five more times, sometimes only within two weeks of each other. In 2010, she married once more, this time in the Bronx. In an application for a marriage license, she stated it was her "first and only" marriage. Barrientos, now 39, is facing two criminal counts of "offering a false instrument for filing in the first degree," referring to her false statements on the 2010 marriage license application, according to court documents. Prosecutors said the marriages were part of an immigration scam. On Friday, she pleaded not guilty at State Supreme Court in the Bronx, according to her attorney, Christopher Wright, who declined to comment further. After leaving court, Barrientos was arrested and charged with theft of service and criminal trespass for allegedly sneaking into the New York subway through an emergency exit, said Detective Annette Markowski, a police spokeswoman. In total, Barrientos has been married 10 times, with nine of her marriages occurring between 1999 and 2002. All occurred either in Westchester County, Long Island, New Jersey or the Bronx. She is believed to still be married to four men, and at one time, she was married to eight men at once, prosecutors say. Prosecutors said the immigration scam involved some of her husbands, who filed for permanent residence status shortly after the marriages. Any divorces happened only after such filings were approved. It was unclear whether any of the men will be prosecuted. The case was referred to the Bronx District Attorney\'s Office by Immigration and Customs Enforcement and the Department of Homeland Security\'s Investigation Division. Seven of the men are from so-called "red-flagged" countries, including Egypt, Turkey, Georgia, Pakistan and Mali. Her eighth husband, Rashid Rajput, was deported in 2006 to his native Pakistan after an investigation by the Joint Terrorism Task Force. If convicted, Barrientos faces up to four years in prison. Her next court appearance is scheduled for May 18.' + ARTICLE_SUBWAY = ( + " New York (CNN)When Liana Barrientos was 23 years old, she got married in Westchester County, New York. A" + " year later, she got married again in Westchester County, but to a different man and without divorcing" + " her first husband. Only 18 days after that marriage, she got hitched yet again. Then, Barrientos" + ' declared "I do" five more times, sometimes only within two weeks of each other. In 2010, she married' + " once more, this time in the Bronx. In an application for a marriage license, she stated it was her" + ' "first and only" marriage. Barrientos, now 39, is facing two criminal counts of "offering a false' + ' instrument for filing in the first degree," referring to her false statements on the 2010 marriage' + " license application, according to court documents. Prosecutors said the marriages were part of an" + " immigration scam. On Friday, she pleaded not guilty at State Supreme Court in the Bronx, according to" + " her attorney, Christopher Wright, who declined to comment further. After leaving court, Barrientos was" + " arrested and charged with theft of service and criminal trespass for allegedly sneaking into the New" + " York subway through an emergency exit, said Detective Annette Markowski, a police spokeswoman. In total," + " Barrientos has been married 10 times, with nine of her marriages occurring between 1999 and 2002. All" + " occurred either in Westchester County, Long Island, New Jersey or the Bronx. She is believed to still be" + " married to four men, and at one time, she was married to eight men at once, prosecutors say. Prosecutors" + " said the immigration scam involved some of her husbands, who filed for permanent residence status" + " shortly after the marriages. Any divorces happened only after such filings were approved. It was" + " unclear whether any of the men will be prosecuted. The case was referred to the Bronx District" + " Attorney's Office by Immigration and Customs Enforcement and the Department of Homeland Security's" + ' Investigation Division. Seven of the men are from so-called "red-flagged" countries, including Egypt,' + " Turkey, Georgia, Pakistan and Mali. Her eighth husband, Rashid Rajput, was deported in 2006 to his" + " native Pakistan after an investigation by the Joint Terrorism Task Force. If convicted, Barrientos faces" + " up to four years in prison. Her next court appearance is scheduled for May 18." + ) dct = tokenizer.batch_encode_plus( [FRANCE_ARTICLE, SHORTER_ARTICLE, IRAN_ARTICLE, ARTICLE_SUBWAY], @@ -463,10 +649,21 @@ class FlaxBartModelTest(FlaxModelTesterMixin, unittest.TestCase, FlaxGenerationT assert (hypotheses_batch[:, 1] == 0).all().item() EXPECTED = [ - "A French prosecutor says he is not aware of any video footage from on board the plane. Two German magazines claim to have found a cell phone video showing the crash. The publications say they watched the video, which was found by a source close to the investigation. All 150 on board the Germanwings flight were killed.", - "Palestinian Authority becomes 123rd member of the International Criminal Court. The move gives the court jurisdiction over alleged crimes in Palestinian territories. Israel and the United States opposed the Palestinians' efforts to join the body. But Palestinian Foreign Minister Riad al-Malki said it was a move toward greater justice.", - "U.S. and its negotiating partners reached a strong framework agreement with Iran. Peter Bergen: The debate that has already begun will likely result in more heat than light. Bergen: The most misleading assertion is that the negotiations' objective at the outset was the total elimination of any nuclear program.", - "Liana Barrientos, 39, has been married 10 times, sometimes within two weeks of each other. Prosecutors say the marriages were part of an immigration scam. She pleaded not guilty at State Supreme Court in the Bronx on Friday. If convicted, Barrientos faces up to four years in prison.", + "A French prosecutor says he is not aware of any video footage from on board the plane. Two German" + " magazines claim to have found a cell phone video showing the crash. The publications say they watched" + " the video, which was found by a source close to the investigation. All 150 on board the Germanwings" + " flight were killed.", + "Palestinian Authority becomes 123rd member of the International Criminal Court. The move gives the court" + " jurisdiction over alleged crimes in Palestinian territories. Israel and the United States opposed the" + " Palestinians' efforts to join the body. But Palestinian Foreign Minister Riad al-Malki said it was a" + " move toward greater justice.", + "U.S. and its negotiating partners reached a strong framework agreement with Iran. Peter Bergen: The" + " debate that has already begun will likely result in more heat than light. Bergen: The most misleading" + " assertion is that the negotiations' objective at the outset was the total elimination of any nuclear" + " program.", + "Liana Barrientos, 39, has been married 10 times, sometimes within two weeks of each other. Prosecutors" + " say the marriages were part of an immigration scam. She pleaded not guilty at State Supreme Court in the" + " Bronx on Friday. If convicted, Barrientos faces up to four years in prison.", ] generated_summaries = tokenizer.batch_decode( diff --git a/tests/models/bart/test_modeling_tf_bart.py b/tests/models/bart/test_modeling_tf_bart.py index 29c61a1e40..1e599c6b1b 100644 --- a/tests/models/bart/test_modeling_tf_bart.py +++ b/tests/models/bart/test_modeling_tf_bart.py @@ -375,18 +375,221 @@ class TFBartModelIntegrationTest(unittest.TestCase): hf = TFBartForConditionalGeneration.from_pretrained("facebook/bart-large-cnn") tok = self.tok - FRANCE_ARTICLE = ' Marseille, France (CNN)The French prosecutor leading an investigation into the crash of Germanwings Flight 9525 insisted Wednesday that he was not aware of any video footage from on board the plane. Marseille prosecutor Brice Robin told CNN that "so far no videos were used in the crash investigation." He added, "A person who has such a video needs to immediately give it to the investigators." Robin\'s comments follow claims by two magazines, German daily Bild and French Paris Match, of a cell phone video showing the harrowing final seconds from on board Germanwings Flight 9525 as it crashed into the French Alps. All 150 on board were killed. Paris Match and Bild reported that the video was recovered from a phone at the wreckage site. The two publications described the supposed video, but did not post it on their websites. The publications said that they watched the video, which was found by a source close to the investigation. "One can hear cries of \'My God\' in several languages," Paris Match reported. "Metallic banging can also be heard more than three times, perhaps of the pilot trying to open the cockpit door with a heavy object. Towards the end, after a heavy shake, stronger than the others, the screaming intensifies. Then nothing." "It is a very disturbing scene," said Julian Reichelt, editor-in-chief of Bild online. An official with France\'s accident investigation agency, the BEA, said the agency is not aware of any such video. Lt. Col. Jean-Marc Menichini, a French Gendarmerie spokesman in charge of communications on rescue efforts around the Germanwings crash site, told CNN that the reports were "completely wrong" and "unwarranted." Cell phones have been collected at the site, he said, but that they "hadn\'t been exploited yet." Menichini said he believed the cell phones would need to be sent to the Criminal Research Institute in Rosny sous-Bois, near Paris, in order to be analyzed by specialized technicians working hand-in-hand with investigators. But none of the cell phones found so far have been sent to the institute, Menichini said. Asked whether staff involved in the search could have leaked a memory card to the media, Menichini answered with a categorical "no." Reichelt told "Erin Burnett: Outfront" that he had watched the video and stood by the report, saying Bild and Paris Match are "very confident" that the clip is real. He noted that investigators only revealed they\'d recovered cell phones from the crash site after Bild and Paris Match published their reports. "That is something we did not know before. ... Overall we can say many things of the investigation weren\'t revealed by the investigation at the beginning," he said. What was mental state of Germanwings co-pilot? German airline Lufthansa confirmed Tuesday that co-pilot Andreas Lubitz had battled depression years before he took the controls of Germanwings Flight 9525, which he\'s accused of deliberately crashing last week in the French Alps. Lubitz told his Lufthansa flight training school in 2009 that he had a "previous episode of severe depression," the airline said Tuesday. Email correspondence between Lubitz and the school discovered in an internal investigation, Lufthansa said, included medical documents he submitted in connection with resuming his flight training. The announcement indicates that Lufthansa, the parent company of Germanwings, knew of Lubitz\'s battle with depression, allowed him to continue training and ultimately put him in the cockpit. Lufthansa, whose CEO Carsten Spohr previously said Lubitz was 100% fit to fly, described its statement Tuesday as a "swift and seamless clarification" and said it was sharing the information and documents -- including training and medical records -- with public prosecutors. Spohr traveled to the crash site Wednesday, where recovery teams have been working for the past week to recover human remains and plane debris scattered across a steep mountainside. He saw the crisis center set up in Seyne-les-Alpes, laid a wreath in the village of Le Vernet, closer to the crash site, where grieving families have left flowers at a simple stone memorial. Menichini told CNN late Tuesday that no visible human remains were left at the site but recovery teams would keep searching. French President Francois Hollande, speaking Tuesday, said that it should be possible to identify all the victims using DNA analysis by the end of the week, sooner than authorities had previously suggested. In the meantime, the recovery of the victims\' personal belongings will start Wednesday, Menichini said. Among those personal belongings could be more cell phones belonging to the 144 passengers and six crew on board. Check out the latest from our correspondents . The details about Lubitz\'s correspondence with the flight school during his training were among several developments as investigators continued to delve into what caused the crash and Lubitz\'s possible motive for downing the jet. A Lufthansa spokesperson told CNN on Tuesday that Lubitz had a valid medical certificate, had passed all his examinations and "held all the licenses required." Earlier, a spokesman for the prosecutor\'s office in Dusseldorf, Christoph Kumpa, said medical records reveal Lubitz suffered from suicidal tendencies at some point before his aviation career and underwent psychotherapy before he got his pilot\'s license. Kumpa emphasized there\'s no evidence suggesting Lubitz was suicidal or acting aggressively before the crash. Investigators are looking into whether Lubitz feared his medical condition would cause him to lose his pilot\'s license, a European government official briefed on the investigation told CNN on Tuesday. While flying was "a big part of his life," the source said, it\'s only one theory being considered. Another source, a law enforcement official briefed on the investigation, also told CNN that authorities believe the primary motive for Lubitz to bring down the plane was that he feared he would not be allowed to fly because of his medical problems. Lubitz\'s girlfriend told investigators he had seen an eye doctor and a neuropsychologist, both of whom deemed him unfit to work recently and concluded he had psychological issues, the European government official said. But no matter what details emerge about his previous mental health struggles, there\'s more to the story, said Brian Russell, a forensic psychologist. "Psychology can explain why somebody would turn rage inward on themselves about the fact that maybe they weren\'t going to keep doing their job and they\'re upset about that and so they\'re suicidal," he said. "But there is no mental illness that explains why somebody then feels entitled to also take that rage and turn it outward on 149 other people who had nothing to do with the person\'s problems." Germanwings crash compensation: What we know . Who was the captain of Germanwings Flight 9525? CNN\'s Margot Haddad reported from Marseille and Pamela Brown from Dusseldorf, while Laura Smith-Spark wrote from London. CNN\'s Frederik Pleitgen, Pamela Boykoff, Antonia Mortensen, Sandrine Amiel and Anna-Maja Rappard contributed to this report.' # @noqa - EXPECTED_SUMMARY_FRANCE = 'French prosecutor says he\'s not aware of any video footage from on board the plane. German daily Bild and French Paris Match claim to have found a cell phone video of the crash. A French Gendarmerie spokesman calls the reports "completely wrong" and "unwarranted" German airline Lufthansa confirms co-pilot Andreas Lubitz had battled depression.' + FRANCE_ARTICLE = ( # @noqa + " Marseille, France (CNN)The French prosecutor leading an investigation into the crash of Germanwings" + " Flight 9525 insisted Wednesday that he was not aware of any video footage from on board the plane." + ' Marseille prosecutor Brice Robin told CNN that "so far no videos were used in the crash investigation."' + ' He added, "A person who has such a video needs to immediately give it to the investigators." Robin\'s' + " comments follow claims by two magazines, German daily Bild and French Paris Match, of a cell phone video" + " showing the harrowing final seconds from on board Germanwings Flight 9525 as it crashed into the French" + " Alps. All 150 on board were killed. Paris Match and Bild reported that the video was recovered from a" + " phone at the wreckage site. The two publications described the supposed video, but did not post it on" + " their websites. The publications said that they watched the video, which was found by a source close to" + " the investigation. \"One can hear cries of 'My God' in several languages,\" Paris Match reported." + ' "Metallic banging can also be heard more than three times, perhaps of the pilot trying to open the' + " cockpit door with a heavy object. Towards the end, after a heavy shake, stronger than the others, the" + ' screaming intensifies. Then nothing." "It is a very disturbing scene," said Julian Reichelt,' + " editor-in-chief of Bild online. An official with France's accident investigation agency, the BEA, said" + " the agency is not aware of any such video. Lt. Col. Jean-Marc Menichini, a French Gendarmerie spokesman" + " in charge of communications on rescue efforts around the Germanwings crash site, told CNN that the" + ' reports were "completely wrong" and "unwarranted." Cell phones have been collected at the site, he said,' + ' but that they "hadn\'t been exploited yet." Menichini said he believed the cell phones would need to be' + " sent to the Criminal Research Institute in Rosny sous-Bois, near Paris, in order to be analyzed by" + " specialized technicians working hand-in-hand with investigators. But none of the cell phones found so" + " far have been sent to the institute, Menichini said. Asked whether staff involved in the search could" + ' have leaked a memory card to the media, Menichini answered with a categorical "no." Reichelt told "Erin' + ' Burnett: Outfront" that he had watched the video and stood by the report, saying Bild and Paris Match' + ' are "very confident" that the clip is real. He noted that investigators only revealed they\'d recovered' + ' cell phones from the crash site after Bild and Paris Match published their reports. "That is something' + " we did not know before. ... Overall we can say many things of the investigation weren't revealed by the" + ' investigation at the beginning," he said. What was mental state of Germanwings co-pilot? German airline' + " Lufthansa confirmed Tuesday that co-pilot Andreas Lubitz had battled depression years before he took the" + " controls of Germanwings Flight 9525, which he's accused of deliberately crashing last week in the" + ' French Alps. Lubitz told his Lufthansa flight training school in 2009 that he had a "previous episode of' + ' severe depression," the airline said Tuesday. Email correspondence between Lubitz and the school' + " discovered in an internal investigation, Lufthansa said, included medical documents he submitted in" + " connection with resuming his flight training. The announcement indicates that Lufthansa, the parent" + " company of Germanwings, knew of Lubitz's battle with depression, allowed him to continue training and" + " ultimately put him in the cockpit. Lufthansa, whose CEO Carsten Spohr previously said Lubitz was 100%" + ' fit to fly, described its statement Tuesday as a "swift and seamless clarification" and said it was' + " sharing the information and documents -- including training and medical records -- with public" + " prosecutors. Spohr traveled to the crash site Wednesday, where recovery teams have been working for the" + " past week to recover human remains and plane debris scattered across a steep mountainside. He saw the" + " crisis center set up in Seyne-les-Alpes, laid a wreath in the village of Le Vernet, closer to the crash" + " site, where grieving families have left flowers at a simple stone memorial. Menichini told CNN late" + " Tuesday that no visible human remains were left at the site but recovery teams would keep searching." + " French President Francois Hollande, speaking Tuesday, said that it should be possible to identify all" + " the victims using DNA analysis by the end of the week, sooner than authorities had previously suggested." + " In the meantime, the recovery of the victims' personal belongings will start Wednesday, Menichini said." + " Among those personal belongings could be more cell phones belonging to the 144 passengers and six crew" + " on board. Check out the latest from our correspondents . The details about Lubitz's correspondence with" + " the flight school during his training were among several developments as investigators continued to" + " delve into what caused the crash and Lubitz's possible motive for downing the jet. A Lufthansa" + " spokesperson told CNN on Tuesday that Lubitz had a valid medical certificate, had passed all his" + ' examinations and "held all the licenses required." Earlier, a spokesman for the prosecutor\'s office in' + " Dusseldorf, Christoph Kumpa, said medical records reveal Lubitz suffered from suicidal tendencies at" + " some point before his aviation career and underwent psychotherapy before he got his pilot's license." + " Kumpa emphasized there's no evidence suggesting Lubitz was suicidal or acting aggressively before the" + " crash. Investigators are looking into whether Lubitz feared his medical condition would cause him to" + " lose his pilot's license, a European government official briefed on the investigation told CNN on" + ' Tuesday. While flying was "a big part of his life," the source said, it\'s only one theory being' + " considered. Another source, a law enforcement official briefed on the investigation, also told CNN that" + " authorities believe the primary motive for Lubitz to bring down the plane was that he feared he would" + " not be allowed to fly because of his medical problems. Lubitz's girlfriend told investigators he had" + " seen an eye doctor and a neuropsychologist, both of whom deemed him unfit to work recently and concluded" + " he had psychological issues, the European government official said. But no matter what details emerge" + " about his previous mental health struggles, there's more to the story, said Brian Russell, a forensic" + ' psychologist. "Psychology can explain why somebody would turn rage inward on themselves about the fact' + " that maybe they weren't going to keep doing their job and they're upset about that and so they're" + ' suicidal," he said. "But there is no mental illness that explains why somebody then feels entitled to' + " also take that rage and turn it outward on 149 other people who had nothing to do with the person's" + ' problems." Germanwings crash compensation: What we know . Who was the captain of Germanwings Flight' + " 9525? CNN's Margot Haddad reported from Marseille and Pamela Brown from Dusseldorf, while Laura" + " Smith-Spark wrote from London. CNN's Frederik Pleitgen, Pamela Boykoff, Antonia Mortensen, Sandrine" + " Amiel and Anna-Maja Rappard contributed to this report." + ) + EXPECTED_SUMMARY_FRANCE = ( + "French prosecutor says he's not aware of any video footage from on board the plane. German daily Bild" + " and French Paris Match claim to have found a cell phone video of the crash. A French Gendarmerie" + ' spokesman calls the reports "completely wrong" and "unwarranted" German airline Lufthansa confirms' + " co-pilot Andreas Lubitz had battled depression." + ) - SHORTER_ARTICLE = ' (CNN)The Palestinian Authority officially became the 123rd member of the International Criminal Court on Wednesday, a step that gives the court jurisdiction over alleged crimes in Palestinian territories. The formal accession was marked with a ceremony at The Hague, in the Netherlands, where the court is based. The Palestinians signed the ICC\'s founding Rome Statute in January, when they also accepted its jurisdiction over alleged crimes committed "in the occupied Palestinian territory, including East Jerusalem, since June 13, 2014." Later that month, the ICC opened a preliminary examination into the situation in Palestinian territories, paving the way for possible war crimes investigations against Israelis. As members of the court, Palestinians may be subject to counter-charges as well. Israel and the United States, neither of which is an ICC member, opposed the Palestinians\' efforts to join the body. But Palestinian Foreign Minister Riad al-Malki, speaking at Wednesday\'s ceremony, said it was a move toward greater justice. "As Palestine formally becomes a State Party to the Rome Statute today, the world is also a step closer to ending a long era of impunity and injustice," he said, according to an ICC news release. "Indeed, today brings us closer to our shared goals of justice and peace." Judge Kuniko Ozaki, a vice president of the ICC, said acceding to the treaty was just the first step for the Palestinians. "As the Rome Statute today enters into force for the State of Palestine, Palestine acquires all the rights as well as responsibilities that come with being a State Party to the Statute. These are substantive commitments, which cannot be taken lightly," she said. Rights group Human Rights Watch welcomed the development. "Governments seeking to penalize Palestine for joining the ICC should immediately end their pressure, and countries that support universal acceptance of the court\'s treaty should speak out to welcome its membership," said Balkees Jarrah, international justice counsel for the group. "What\'s objectionable is the attempts to undermine international justice, not Palestine\'s decision to join a treaty to which over 100 countries around the world are members." In January, when the preliminary ICC examination was opened, Israeli Prime Minister Benjamin Netanyahu described it as an outrage, saying the court was overstepping its boundaries. The United States also said it "strongly" disagreed with the court\'s decision. "As we have said repeatedly, we do not believe that Palestine is a state and therefore we do not believe that it is eligible to join the ICC," the State Department said in a statement. It urged the warring sides to resolve their differences through direct negotiations. "We will continue to oppose actions against Israel at the ICC as counterproductive to the cause of peace," it said. But the ICC begs to differ with the definition of a state for its purposes and refers to the territories as "Palestine." While a preliminary examination is not a formal investigation, it allows the court to review evidence and determine whether to investigate suspects on both sides. Prosecutor Fatou Bensouda said her office would "conduct its analysis in full independence and impartiality." The war between Israel and Hamas militants in Gaza last summer left more than 2,000 people dead. The inquiry will include alleged war crimes committed since June. The International Criminal Court was set up in 2002 to prosecute genocide, crimes against humanity and war crimes. CNN\'s Vasco Cotovio, Kareem Khadder and Faith Karimi contributed to this report.' - EXPECTED_SUMMARY_SHORTER = "The Palestinian Authority becomes the 123rd member of the International Criminal Court. The move gives the court jurisdiction over alleged crimes in Palestinian territories. Israel and the United States opposed the Palestinians' efforts to join the body. But Palestinian Foreign Minister Riad al-Malki said it was a move toward greater justice." + SHORTER_ARTICLE = ( + " (CNN)The Palestinian Authority officially became the 123rd member of the International Criminal Court on" + " Wednesday, a step that gives the court jurisdiction over alleged crimes in Palestinian territories. The" + " formal accession was marked with a ceremony at The Hague, in the Netherlands, where the court is based." + " The Palestinians signed the ICC's founding Rome Statute in January, when they also accepted its" + ' jurisdiction over alleged crimes committed "in the occupied Palestinian territory, including East' + ' Jerusalem, since June 13, 2014." Later that month, the ICC opened a preliminary examination into the' + " situation in Palestinian territories, paving the way for possible war crimes investigations against" + " Israelis. As members of the court, Palestinians may be subject to counter-charges as well. Israel and" + " the United States, neither of which is an ICC member, opposed the Palestinians' efforts to join the" + " body. But Palestinian Foreign Minister Riad al-Malki, speaking at Wednesday's ceremony, said it was a" + ' move toward greater justice. "As Palestine formally becomes a State Party to the Rome Statute today, the' + ' world is also a step closer to ending a long era of impunity and injustice," he said, according to an' + ' ICC news release. "Indeed, today brings us closer to our shared goals of justice and peace." Judge' + " Kuniko Ozaki, a vice president of the ICC, said acceding to the treaty was just the first step for the" + ' Palestinians. "As the Rome Statute today enters into force for the State of Palestine, Palestine' + " acquires all the rights as well as responsibilities that come with being a State Party to the Statute." + ' These are substantive commitments, which cannot be taken lightly," she said. Rights group Human Rights' + ' Watch welcomed the development. "Governments seeking to penalize Palestine for joining the ICC should' + " immediately end their pressure, and countries that support universal acceptance of the court's treaty" + ' should speak out to welcome its membership," said Balkees Jarrah, international justice counsel for the' + " group. \"What's objectionable is the attempts to undermine international justice, not Palestine's" + ' decision to join a treaty to which over 100 countries around the world are members." In January, when' + " the preliminary ICC examination was opened, Israeli Prime Minister Benjamin Netanyahu described it as an" + ' outrage, saying the court was overstepping its boundaries. The United States also said it "strongly"' + " disagreed with the court's decision. \"As we have said repeatedly, we do not believe that Palestine is a" + ' state and therefore we do not believe that it is eligible to join the ICC," the State Department said in' + ' a statement. It urged the warring sides to resolve their differences through direct negotiations. "We' + ' will continue to oppose actions against Israel at the ICC as counterproductive to the cause of peace,"' + " it said. But the ICC begs to differ with the definition of a state for its purposes and refers to the" + ' territories as "Palestine." While a preliminary examination is not a formal investigation, it allows the' + " court to review evidence and determine whether to investigate suspects on both sides. Prosecutor Fatou" + ' Bensouda said her office would "conduct its analysis in full independence and impartiality." The war' + " between Israel and Hamas militants in Gaza last summer left more than 2,000 people dead. The inquiry" + " will include alleged war crimes committed since June. The International Criminal Court was set up in" + " 2002 to prosecute genocide, crimes against humanity and war crimes. CNN's Vasco Cotovio, Kareem Khadder" + " and Faith Karimi contributed to this report." + ) + EXPECTED_SUMMARY_SHORTER = ( + "The Palestinian Authority becomes the 123rd member of the International Criminal Court. The move gives" + " the court jurisdiction over alleged crimes in Palestinian territories. Israel and the United States" + " opposed the Palestinians' efforts to join the body. But Palestinian Foreign Minister Riad al-Malki said" + " it was a move toward greater justice." + ) # The below article tests that we don't add any hypotheses outside of the top n_beams - IRAN_ARTICLE = " (CNN)The United States and its negotiating partners reached a very strong framework agreement with Iran in Lausanne, Switzerland, on Thursday that limits Iran's nuclear program in such a way as to effectively block it from building a nuclear weapon. Expect pushback anyway, if the recent past is any harbinger. Just last month, in an attempt to head off such an agreement, House Speaker John Boehner invited Israeli Prime Minister Benjamin Netanyahu to preemptively blast it before Congress, and 47 senators sent a letter to the Iranian leadership warning them away from a deal. The debate that has already begun since the announcement of the new framework will likely result in more heat than light. It will not be helped by the gathering swirl of dubious assumptions and doubtful assertions. Let us address some of these: . The most misleading assertion, despite universal rejection by experts, is that the negotiations' objective at the outset was the total elimination of any nuclear program in Iran. That is the position of Netanyahu and his acolytes in the U.S. Congress. But that is not and never was the objective. If it had been, there would have been no Iranian team at the negotiating table. Rather, the objective has always been to structure an agreement or series of agreements so that Iran could not covertly develop a nuclear arsenal before the United States and its allies could respond. The new framework has exceeded expectations in achieving that goal. It would reduce Iran's low-enriched uranium stockpile, cut by two-thirds its number of installed centrifuges and implement a rigorous inspection regime. Another dubious assumption of opponents is that the Iranian nuclear program is a covert weapons program. Despite sharp accusations by some in the United States and its allies, Iran denies having such a program, and U.S. intelligence contends that Iran has not yet made the decision to build a nuclear weapon. Iran's continued cooperation with International Atomic Energy Agency inspections is further evidence on this point, and we'll know even more about Iran's program in the coming months and years because of the deal. In fact, the inspections provisions that are part of this agreement are designed to protect against any covert action by the Iranians. What's more, the rhetoric of some members of Congress has implied that the negotiations have been between only the United States and Iran (i.e., the 47 senators' letter warning that a deal might be killed by Congress or a future president). This of course is not the case. The talks were between Iran and the five permanent members of the U.N. Security Council (United States, United Kingdom, France, China and Russia) plus Germany, dubbed the P5+1. While the United States has played a leading role in the effort, it negotiated the terms alongside its partners. If the agreement reached by the P5+1 is rejected by Congress, it could result in an unraveling of the sanctions on Iran and threaten NATO cohesion in other areas. Another questionable assertion is that this agreement contains a sunset clause, after which Iran will be free to do as it pleases. Again, this is not the case. Some of the restrictions on Iran's nuclear activities, such as uranium enrichment, will be eased or eliminated over time, as long as 15 years. But most importantly, the framework agreement includes Iran's ratification of the Additional Protocol, which allows IAEA inspectors expanded access to nuclear sites both declared and nondeclared. This provision will be permanent. It does not sunset. Thus, going forward, if Iran decides to enrich uranium to weapons-grade levels, monitors will be able to detect such a move in a matter of days and alert the U.N. Security Council. Many in Congress have said that the agreement should be a formal treaty requiring the Senate to \"advise and consent.\" But the issue is not suited for a treaty. Treaties impose equivalent obligations on all signatories. For example, the New START treaty limits Russia and the United States to 1,550 deployed strategic warheads. But any agreement with Iran will not be so balanced. The restrictions and obligations in the final framework agreement will be imposed almost exclusively on Iran. The P5+1 are obligated only to ease and eventually remove most but not all economic sanctions, which were imposed as leverage to gain this final deal. Finally some insist that any agreement must address Iranian missile programs, human rights violations or support for Hamas or Hezbollah. As important as these issues are, and they must indeed be addressed, they are unrelated to the most important aim of a nuclear deal: preventing a nuclear Iran. To include them in the negotiations would be a poison pill. This agreement should be judged on its merits and on how it affects the security of our negotiating partners and allies, including Israel. Those judgments should be fact-based, not based on questionable assertions or dubious assumptions." - EXPECTED_SUMMARY_IRAN = "The U.S. and its negotiating partners reached a very strong framework agreement with Iran. Peter Bergen: The debate that has already begun will likely result in more heat than light. He says the agreement limits Iran's nuclear program in such a way as to effectively block it from building a nuclear weapon. Bergen says the most important aim of a nuclear deal is preventing a nuclear Iran." + IRAN_ARTICLE = ( + " (CNN)The United States and its negotiating partners reached a very strong framework agreement with Iran" + " in Lausanne, Switzerland, on Thursday that limits Iran's nuclear program in such a way as to effectively" + " block it from building a nuclear weapon. Expect pushback anyway, if the recent past is any harbinger." + " Just last month, in an attempt to head off such an agreement, House Speaker John Boehner invited Israeli" + " Prime Minister Benjamin Netanyahu to preemptively blast it before Congress, and 47 senators sent a" + " letter to the Iranian leadership warning them away from a deal. The debate that has already begun since" + " the announcement of the new framework will likely result in more heat than light. It will not be helped" + " by the gathering swirl of dubious assumptions and doubtful assertions. Let us address some of these: ." + " The most misleading assertion, despite universal rejection by experts, is that the negotiations'" + " objective at the outset was the total elimination of any nuclear program in Iran. That is the position" + " of Netanyahu and his acolytes in the U.S. Congress. But that is not and never was the objective. If it" + " had been, there would have been no Iranian team at the negotiating table. Rather, the objective has" + " always been to structure an agreement or series of agreements so that Iran could not covertly develop a" + " nuclear arsenal before the United States and its allies could respond. The new framework has exceeded" + " expectations in achieving that goal. It would reduce Iran's low-enriched uranium stockpile, cut by" + " two-thirds its number of installed centrifuges and implement a rigorous inspection regime. Another" + " dubious assumption of opponents is that the Iranian nuclear program is a covert weapons program. Despite" + " sharp accusations by some in the United States and its allies, Iran denies having such a program, and" + " U.S. intelligence contends that Iran has not yet made the decision to build a nuclear weapon. Iran's" + " continued cooperation with International Atomic Energy Agency inspections is further evidence on this" + " point, and we'll know even more about Iran's program in the coming months and years because of the deal." + " In fact, the inspections provisions that are part of this agreement are designed to protect against any" + " covert action by the Iranians. What's more, the rhetoric of some members of Congress has implied that" + " the negotiations have been between only the United States and Iran (i.e., the 47 senators' letter" + " warning that a deal might be killed by Congress or a future president). This of course is not the case." + " The talks were between Iran and the five permanent members of the U.N. Security Council (United States," + " United Kingdom, France, China and Russia) plus Germany, dubbed the P5+1. While the United States has" + " played a leading role in the effort, it negotiated the terms alongside its partners. If the agreement" + " reached by the P5+1 is rejected by Congress, it could result in an unraveling of the sanctions on Iran" + " and threaten NATO cohesion in other areas. Another questionable assertion is that this agreement" + " contains a sunset clause, after which Iran will be free to do as it pleases. Again, this is not the" + " case. Some of the restrictions on Iran's nuclear activities, such as uranium enrichment, will be eased" + " or eliminated over time, as long as 15 years. But most importantly, the framework agreement includes" + " Iran's ratification of the Additional Protocol, which allows IAEA inspectors expanded access to nuclear" + " sites both declared and nondeclared. This provision will be permanent. It does not sunset. Thus, going" + " forward, if Iran decides to enrich uranium to weapons-grade levels, monitors will be able to detect such" + " a move in a matter of days and alert the U.N. Security Council. Many in Congress have said that the" + ' agreement should be a formal treaty requiring the Senate to "advise and consent." But the issue is not' + " suited for a treaty. Treaties impose equivalent obligations on all signatories. For example, the New" + " START treaty limits Russia and the United States to 1,550 deployed strategic warheads. But any agreement" + " with Iran will not be so balanced. The restrictions and obligations in the final framework agreement" + " will be imposed almost exclusively on Iran. The P5+1 are obligated only to ease and eventually remove" + " most but not all economic sanctions, which were imposed as leverage to gain this final deal. Finally" + " some insist that any agreement must address Iranian missile programs, human rights violations or support" + " for Hamas or Hezbollah. As important as these issues are, and they must indeed be addressed, they are" + " unrelated to the most important aim of a nuclear deal: preventing a nuclear Iran. To include them in" + " the negotiations would be a poison pill. This agreement should be judged on its merits and on how it" + " affects the security of our negotiating partners and allies, including Israel. Those judgments should be" + " fact-based, not based on questionable assertions or dubious assumptions." + ) + EXPECTED_SUMMARY_IRAN = ( + "The U.S. and its negotiating partners reached a very strong framework agreement with Iran. Peter Bergen:" + " The debate that has already begun will likely result in more heat than light. He says the agreement" + " limits Iran's nuclear program in such a way as to effectively block it from building a nuclear weapon." + " Bergen says the most important aim of a nuclear deal is preventing a nuclear Iran." + ) - ARTICLE_SUBWAY = ' New York (CNN)When Liana Barrientos was 23 years old, she got married in Westchester County, New York. A year later, she got married again in Westchester County, but to a different man and without divorcing her first husband. Only 18 days after that marriage, she got hitched yet again. Then, Barrientos declared "I do" five more times, sometimes only within two weeks of each other. In 2010, she married once more, this time in the Bronx. In an application for a marriage license, she stated it was her "first and only" marriage. Barrientos, now 39, is facing two criminal counts of "offering a false instrument for filing in the first degree," referring to her false statements on the 2010 marriage license application, according to court documents. Prosecutors said the marriages were part of an immigration scam. On Friday, she pleaded not guilty at State Supreme Court in the Bronx, according to her attorney, Christopher Wright, who declined to comment further. After leaving court, Barrientos was arrested and charged with theft of service and criminal trespass for allegedly sneaking into the New York subway through an emergency exit, said Detective Annette Markowski, a police spokeswoman. In total, Barrientos has been married 10 times, with nine of her marriages occurring between 1999 and 2002. All occurred either in Westchester County, Long Island, New Jersey or the Bronx. She is believed to still be married to four men, and at one time, she was married to eight men at once, prosecutors say. Prosecutors said the immigration scam involved some of her husbands, who filed for permanent residence status shortly after the marriages. Any divorces happened only after such filings were approved. It was unclear whether any of the men will be prosecuted. The case was referred to the Bronx District Attorney\'s Office by Immigration and Customs Enforcement and the Department of Homeland Security\'s Investigation Division. Seven of the men are from so-called "red-flagged" countries, including Egypt, Turkey, Georgia, Pakistan and Mali. Her eighth husband, Rashid Rajput, was deported in 2006 to his native Pakistan after an investigation by the Joint Terrorism Task Force. If convicted, Barrientos faces up to four years in prison. Her next court appearance is scheduled for May 18.' - EXPECTED_SUMMARY_SUBWAY = "Liana Barrientos has been married 10 times, sometimes within two weeks of each other. Prosecutors say the marriages were part of an immigration scam. On Friday, she pleaded not guilty at State Supreme Court in the Bronx. She was arrested and charged with theft of service and criminal trespass for allegedly sneaking into the subway." + ARTICLE_SUBWAY = ( + " New York (CNN)When Liana Barrientos was 23 years old, she got married in Westchester County, New York. A" + " year later, she got married again in Westchester County, but to a different man and without divorcing" + " her first husband. Only 18 days after that marriage, she got hitched yet again. Then, Barrientos" + ' declared "I do" five more times, sometimes only within two weeks of each other. In 2010, she married' + " once more, this time in the Bronx. In an application for a marriage license, she stated it was her" + ' "first and only" marriage. Barrientos, now 39, is facing two criminal counts of "offering a false' + ' instrument for filing in the first degree," referring to her false statements on the 2010 marriage' + " license application, according to court documents. Prosecutors said the marriages were part of an" + " immigration scam. On Friday, she pleaded not guilty at State Supreme Court in the Bronx, according to" + " her attorney, Christopher Wright, who declined to comment further. After leaving court, Barrientos was" + " arrested and charged with theft of service and criminal trespass for allegedly sneaking into the New" + " York subway through an emergency exit, said Detective Annette Markowski, a police spokeswoman. In total," + " Barrientos has been married 10 times, with nine of her marriages occurring between 1999 and 2002. All" + " occurred either in Westchester County, Long Island, New Jersey or the Bronx. She is believed to still be" + " married to four men, and at one time, she was married to eight men at once, prosecutors say. Prosecutors" + " said the immigration scam involved some of her husbands, who filed for permanent residence status" + " shortly after the marriages. Any divorces happened only after such filings were approved. It was" + " unclear whether any of the men will be prosecuted. The case was referred to the Bronx District" + " Attorney's Office by Immigration and Customs Enforcement and the Department of Homeland Security's" + ' Investigation Division. Seven of the men are from so-called "red-flagged" countries, including Egypt,' + " Turkey, Georgia, Pakistan and Mali. Her eighth husband, Rashid Rajput, was deported in 2006 to his" + " native Pakistan after an investigation by the Joint Terrorism Task Force. If convicted, Barrientos faces" + " up to four years in prison. Her next court appearance is scheduled for May 18." + ) + EXPECTED_SUMMARY_SUBWAY = ( + "Liana Barrientos has been married 10 times, sometimes within two weeks of each other. Prosecutors say the" + " marriages were part of an immigration scam. On Friday, she pleaded not guilty at State Supreme Court in" + " the Bronx. She was arrested and charged with theft of service and criminal trespass for allegedly" + " sneaking into the subway." + ) dct = tok( [FRANCE_ARTICLE, SHORTER_ARTICLE, IRAN_ARTICLE, ARTICLE_SUBWAY], @@ -433,8 +636,47 @@ class FasterTFBartModelIntegrationTests(unittest.TestCase): def test_xsum_1_1_generation(self): model = self.xsum_1_1_model assert model.model.decoder.embed_tokens._layer == model.model.shared - ARTICLE = 'The Palestinian Authority officially became the 123rd member of the International Criminal Court on Wednesday, a step that gives the court jurisdiction over alleged crimes in Palestinian territories. The formal accession was marked with a ceremony at The Hague, in the Netherlands, where the court is based. The Palestinians signed the ICC\'s founding Rome Statute in January, when they also accepted its jurisdiction over alleged crimes committed "in the occupied Palestinian territory, including East Jerusalem, since June 13, 2014." Later that month, the ICC opened a preliminary examination into the situation in Palestinian territories, paving the way for possible war crimes investigations against Israelis. As members of the court, Palestinians may be subject to counter-charges as well. Israel and the United States, neither of which is an ICC member, opposed the Palestinians\' efforts to join the body. But Palestinian Foreign Minister Riad al-Malki, speaking at Wednesday\'s ceremony, said it was a move toward greater justice. "As Palestine formally becomes a State Party to the Rome Statute today, the world is also a step closer to ending a long era of impunity and injustice," he said, according to an ICC news release. "Indeed, today brings us closer to our shared goals of justice and peace." Judge Kuniko Ozaki, a vice president of the ICC, said acceding to the treaty was just the first step for the Palestinians. "As the Rome Statute today enters into force for the State of Palestine, Palestine acquires all the rights as well as responsibilities that come with being a State Party to the Statute. These are substantive commitments, which cannot be taken lightly," she said. Rights group Human Rights Watch welcomed the development. "Governments seeking to penalize Palestine for joining the ICC should immediately end their pressure, and countries that support universal acceptance of the court\'s treaty should speak out to welcome its membership," said Balkees Jarrah, international justice counsel for the group. "What\'s objectionable is the attempts to undermine international justice, not Palestine\'s decision to join a treaty to which over 100 countries around the world are members." In January, when the preliminary ICC examination was opened, Israeli Prime Minister Benjamin Netanyahu described it as an outrage, saying the court was overstepping its boundaries. The United States also said it "strongly" disagreed with the court\'s decision. "As we have said repeatedly, we do not believe that Palestine is a state and therefore we do not believe that it is eligible to join the ICC," the State Department said in a statement. It urged the warring sides to resolve their differences through direct negotiations. "We will continue to oppose actions against Israel at the ICC as counterproductive to the cause of peace," it said. But the ICC begs to differ with the definition of a state for its purposes and refers to the territories as "Palestine." While a preliminary examination is not a formal investigation, it allows the court to review evidence and determine whether to investigate suspects on both sides. Prosecutor Fatou Bensouda said her office would "conduct its analysis in full independence and impartiality." The war between Israel and Hamas militants in Gaza last summer left more than 2,000 people dead. The inquiry will include alleged war crimes committed since June. The International Criminal Court was set up in 2002 to prosecute genocide, crimes against humanity and war crimes.' - EXPECTED = " The International Criminal Court (ICC) has announced that it has been announced by the International Criminal court." + ARTICLE = ( + "The Palestinian Authority officially became the 123rd member of the International Criminal Court on" + " Wednesday, a step that gives the court jurisdiction over alleged crimes in Palestinian territories. The" + " formal accession was marked with a ceremony at The Hague, in the Netherlands, where the court is based." + " The Palestinians signed the ICC's founding Rome Statute in January, when they also accepted its" + ' jurisdiction over alleged crimes committed "in the occupied Palestinian territory, including East' + ' Jerusalem, since June 13, 2014." Later that month, the ICC opened a preliminary examination into the' + " situation in Palestinian territories, paving the way for possible war crimes investigations against" + " Israelis. As members of the court, Palestinians may be subject to counter-charges as well. Israel and" + " the United States, neither of which is an ICC member, opposed the Palestinians' efforts to join the" + " body. But Palestinian Foreign Minister Riad al-Malki, speaking at Wednesday's ceremony, said it was a" + ' move toward greater justice. "As Palestine formally becomes a State Party to the Rome Statute today, the' + ' world is also a step closer to ending a long era of impunity and injustice," he said, according to an' + ' ICC news release. "Indeed, today brings us closer to our shared goals of justice and peace." Judge' + " Kuniko Ozaki, a vice president of the ICC, said acceding to the treaty was just the first step for the" + ' Palestinians. "As the Rome Statute today enters into force for the State of Palestine, Palestine' + " acquires all the rights as well as responsibilities that come with being a State Party to the Statute." + ' These are substantive commitments, which cannot be taken lightly," she said. Rights group Human Rights' + ' Watch welcomed the development. "Governments seeking to penalize Palestine for joining the ICC should' + " immediately end their pressure, and countries that support universal acceptance of the court's treaty" + ' should speak out to welcome its membership," said Balkees Jarrah, international justice counsel for the' + " group. \"What's objectionable is the attempts to undermine international justice, not Palestine's" + ' decision to join a treaty to which over 100 countries around the world are members." In January, when' + " the preliminary ICC examination was opened, Israeli Prime Minister Benjamin Netanyahu described it as an" + ' outrage, saying the court was overstepping its boundaries. The United States also said it "strongly"' + " disagreed with the court's decision. \"As we have said repeatedly, we do not believe that Palestine is a" + ' state and therefore we do not believe that it is eligible to join the ICC," the State Department said in' + ' a statement. It urged the warring sides to resolve their differences through direct negotiations. "We' + ' will continue to oppose actions against Israel at the ICC as counterproductive to the cause of peace,"' + " it said. But the ICC begs to differ with the definition of a state for its purposes and refers to the" + ' territories as "Palestine." While a preliminary examination is not a formal investigation, it allows the' + " court to review evidence and determine whether to investigate suspects on both sides. Prosecutor Fatou" + ' Bensouda said her office would "conduct its analysis in full independence and impartiality." The war' + " between Israel and Hamas militants in Gaza last summer left more than 2,000 people dead. The inquiry" + " will include alleged war crimes committed since June. The International Criminal Court was set up in" + " 2002 to prosecute genocide, crimes against humanity and war crimes." + ) + EXPECTED = ( + " The International Criminal Court (ICC) has announced that it has been announced by the International" + " Criminal court." + ) dct = self.tok(ARTICLE, return_tensors="tf") generated_ids = model.generate(**dct, num_beams=4) result = self.tok.batch_decode(generated_ids, skip_special_tokens=True)[0] @@ -443,8 +685,116 @@ class FasterTFBartModelIntegrationTests(unittest.TestCase): def test_xsum_1_1_batch_generation(self): batch = self.tok( [ - 'The Palestinian Authority officially became the 123rd member of the International Criminal Court on Wednesday, a step that gives the court jurisdiction over alleged crimes in Palestinian territories. The formal accession was marked with a ceremony at The Hague, in the Netherlands, where the court is based. The Palestinians signed the ICC\'s founding Rome Statute in January, when they also accepted its jurisdiction over alleged crimes committed "in the occupied Palestinian territory, including East Jerusalem, since June 13, 2014." Later that month, the ICC opened a preliminary examination into the situation in Palestinian territories, paving the way for possible war crimes investigations against Israelis. As members of the court, Palestinians may be subject to counter-charges as well. Israel and the United States, neither of which is an ICC member, opposed the Palestinians\' efforts to join the body. But Palestinian Foreign Minister Riad al-Malki, speaking at Wednesday\'s ceremony, said it was a move toward greater justice. "As Palestine formally becomes a State Party to the Rome Statute today, the world is also a step closer to ending a long era of impunity and injustice," he said, according to an ICC news release. "Indeed, today brings us closer to our shared goals of justice and peace." Judge Kuniko Ozaki, a vice president of the ICC, said acceding to the treaty was just the first step for the Palestinians. "As the Rome Statute today enters into force for the State of Palestine, Palestine acquires all the rights as well as responsibilities that come with being a State Party to the Statute. These are substantive commitments, which cannot be taken lightly," she said. Rights group Human Rights Watch welcomed the development. "Governments seeking to penalize Palestine for joining the ICC should immediately end their pressure, and countries that support universal acceptance of the court\'s treaty should speak out to welcome its membership," said Balkees Jarrah, international justice counsel for the group. "What\'s objectionable is the attempts to undermine international justice, not Palestine\'s decision to join a treaty to which over 100 countries around the world are members." In January, when the preliminary ICC examination was opened, Israeli Prime Minister Benjamin Netanyahu described it as an outrage, saying the court was overstepping its boundaries. The United States also said it "strongly" disagreed with the court\'s decision. "As we have said repeatedly, we do not believe that Palestine is a state and therefore we do not believe that it is eligible to join the ICC," the State Department said in a statement. It urged the warring sides to resolve their differences through direct negotiations. "We will continue to oppose actions against Israel at the ICC as counterproductive to the cause of peace," it said. But the ICC begs to differ with the definition of a state for its purposes and refers to the territories as "Palestine." While a preliminary examination is not a formal investigation, it allows the court to review evidence and determine whether to investigate suspects on both sides. Prosecutor Fatou Bensouda said her office would "conduct its analysis in full independence and impartiality." The war between Israel and Hamas militants in Gaza last summer left more than 2,000 people dead. The inquiry will include alleged war crimes committed since June. The International Criminal Court was set up in 2002 to prosecute genocide, crimes against humanity and war crimes.', - 'The French prosecutor leading an investigation into the crash of Germanwings Flight 9525 insisted Wednesday that he was not aware of any video footage from on board the plane. Marseille prosecutor Brice Robin told CNN that "so far no videos were used in the crash investigation." He added, "A person who has such a video needs to immediately give it to the investigators." Robin\'s comments follow claims by two magazines, German daily Bild and French Paris Match, of a cell phone video showing the harrowing final seconds from on board Germanwings Flight 9525 as it crashed into the French Alps. All 150 on board were killed. Paris Match and Bild reported that the video was recovered from a phone at the wreckage site. The two publications described the supposed video, but did not post it on their websites. The publications said that they watched the video, which was found by a source close to the investigation. "One can hear cries of \'My God\' in several languages," Paris Match reported. "Metallic banging can also be heard more than three times, perhaps of the pilot trying to open the cockpit door with a heavy object. Towards the end, after a heavy shake, stronger than the others, the screaming intensifies. Then nothing." "It is a very disturbing scene," said Julian Reichelt, editor-in-chief of Bild online. An official with France\'s accident investigation agency, the BEA, said the agency is not aware of any such video. Lt. Col. Jean-Marc Menichini, a French Gendarmerie spokesman in charge of communications on rescue efforts around the Germanwings crash site, told CNN that the reports were "completely wrong" and "unwarranted." Cell phones have been collected at the site, he said, but that they "hadn\'t been exploited yet." Menichini said he believed the cell phones would need to be sent to the Criminal Research Institute in Rosny sous-Bois, near Paris, in order to be analyzed by specialized technicians working hand-in-hand with investigators. But none of the cell phones found so far have been sent to the institute, Menichini said. Asked whether staff involved in the search could have leaked a memory card to the media, Menichini answered with a categorical "no." Reichelt told "Erin Burnett: Outfront" that he had watched the video and stood by the report, saying Bild and Paris Match are "very confident" that the clip is real. He noted that investigators only revealed they\'d recovered cell phones from the crash site after Bild and Paris Match published their reports. "That is something we did not know before. ... Overall we can say many things of the investigation weren\'t revealed by the investigation at the beginning," he said. What was mental state of Germanwings co-pilot? German airline Lufthansa confirmed Tuesday that co-pilot Andreas Lubitz had battled depression years before he took the controls of Germanwings Flight 9525, which he\'s accused of deliberately crashing last week in the French Alps. Lubitz told his Lufthansa flight training school in 2009 that he had a "previous episode of severe depression," the airline said Tuesday. Email correspondence between Lubitz and the school discovered in an internal investigation, Lufthansa said, included medical documents he submitted in connection with resuming his flight training. The announcement indicates that Lufthansa, the parent company of Germanwings, knew of Lubitz\'s battle with depression, allowed him to continue training and ultimately put him in the cockpit. Lufthansa, whose CEO Carsten Spohr previously said Lubitz was 100% fit to fly, described its statement Tuesday as a "swift and seamless clarification" and said it was sharing the information and documents -- including training and medical records -- with public prosecutors. Spohr traveled to the crash site Wednesday, where recovery teams have been working for the past week to recover human remains and plane debris scattered across a steep mountainside. He saw the crisis center set up in Seyne-les-Alpes, laid a wreath in the village of Le Vernet, closer to the crash site, where grieving families have left flowers at a simple stone memorial. Menichini told CNN late Tuesday that no visible human remains were left at the site but recovery teams would keep searching. French President Francois Hollande, speaking Tuesday, said that it should be possible to identify all the victims using DNA analysis by the end of the week, sooner than authorities had previously suggested. In the meantime, the recovery of the victims\' personal belongings will start Wednesday, Menichini said. Among those personal belongings could be more cell phones belonging to the 144 passengers and six crew on board. Check out the latest from our correspondents . The details about Lubitz\'s correspondence with the flight school during his training were among several developments as investigators continued to delve into what caused the crash and Lubitz\'s possible motive for downing the jet. A Lufthansa spokesperson told CNN on Tuesday that Lubitz had a valid medical certificate, had passed all his examinations and "held all the licenses required." Earlier, a spokesman for the prosecutor\'s office in Dusseldorf, Christoph Kumpa, said medical records reveal Lubitz suffered from suicidal tendencies at some point before his aviation career and underwent psychotherapy before he got his pilot\'s license. Kumpa emphasized there\'s no evidence suggesting Lubitz was suicidal or acting aggressively before the crash. Investigators are looking into whether Lubitz feared his medical condition would cause him to lose his pilot\'s license, a European government official briefed on the investigation told CNN on Tuesday. While flying was "a big part of his life," the source said, it\'s only one theory being considered. Another source, a law enforcement official briefed on the investigation, also told CNN that authorities believe the primary motive for Lubitz to bring down the plane was that he feared he would not be allowed to fly because of his medical problems. Lubitz\'s girlfriend told investigators he had seen an eye doctor and a neuropsychologist, both of whom deemed him unfit to work recently and concluded he had psychological issues, the European government official said. But no matter what details emerge about his previous mental health struggles, there\'s more to the story, said Brian Russell, a forensic psychologist. "Psychology can explain why somebody would turn rage inward on themselves about the fact that maybe they weren\'t going to keep doing their job and they\'re upset about that and so they\'re suicidal," he said. "But there is no mental illness that explains why somebody then feels entitled to also take that rage and turn it outward on 149 other people who had nothing to do with the person\'s problems." Germanwings crash compensation: What we know . Who was the captain of Germanwings Flight 9525? CNN\'s Margot Haddad reported from Marseille and Pamela Brown from Dusseldorf, while Laura Smith-Spark wrote from London. CNN\'s Frederik Pleitgen, Pamela Boykoff, Antonia Mortensen, Sandrine Amiel and Anna-Maja Rappard contributed to this report.', + "The Palestinian Authority officially became the 123rd member of the International Criminal Court on" + " Wednesday, a step that gives the court jurisdiction over alleged crimes in Palestinian territories." + " The formal accession was marked with a ceremony at The Hague, in the Netherlands, where the court is" + " based. The Palestinians signed the ICC's founding Rome Statute in January, when they also accepted" + ' its jurisdiction over alleged crimes committed "in the occupied Palestinian territory, including' + ' East Jerusalem, since June 13, 2014." Later that month, the ICC opened a preliminary examination' + " into the situation in Palestinian territories, paving the way for possible war crimes investigations" + " against Israelis. As members of the court, Palestinians may be subject to counter-charges as well." + " Israel and the United States, neither of which is an ICC member, opposed the Palestinians' efforts" + " to join the body. But Palestinian Foreign Minister Riad al-Malki, speaking at Wednesday's ceremony," + ' said it was a move toward greater justice. "As Palestine formally becomes a State Party to the Rome' + ' Statute today, the world is also a step closer to ending a long era of impunity and injustice," he' + ' said, according to an ICC news release. "Indeed, today brings us closer to our shared goals of' + ' justice and peace." Judge Kuniko Ozaki, a vice president of the ICC, said acceding to the treaty was' + ' just the first step for the Palestinians. "As the Rome Statute today enters into force for the State' + " of Palestine, Palestine acquires all the rights as well as responsibilities that come with being a" + ' State Party to the Statute. These are substantive commitments, which cannot be taken lightly," she' + ' said. Rights group Human Rights Watch welcomed the development. "Governments seeking to penalize' + " Palestine for joining the ICC should immediately end their pressure, and countries that support" + " universal acceptance of the court's treaty should speak out to welcome its membership,\" said" + " Balkees Jarrah, international justice counsel for the group. \"What's objectionable is the attempts" + " to undermine international justice, not Palestine's decision to join a treaty to which over 100" + ' countries around the world are members." In January, when the preliminary ICC examination was' + " opened, Israeli Prime Minister Benjamin Netanyahu described it as an outrage, saying the court was" + ' overstepping its boundaries. The United States also said it "strongly" disagreed with the court\'s' + ' decision. "As we have said repeatedly, we do not believe that Palestine is a state and therefore we' + ' do not believe that it is eligible to join the ICC," the State Department said in a statement. It' + ' urged the warring sides to resolve their differences through direct negotiations. "We will continue' + ' to oppose actions against Israel at the ICC as counterproductive to the cause of peace," it said.' + " But the ICC begs to differ with the definition of a state for its purposes and refers to the" + ' territories as "Palestine." While a preliminary examination is not a formal investigation, it allows' + " the court to review evidence and determine whether to investigate suspects on both sides. Prosecutor" + ' Fatou Bensouda said her office would "conduct its analysis in full independence and impartiality."' + " The war between Israel and Hamas militants in Gaza last summer left more than 2,000 people dead. The" + " inquiry will include alleged war crimes committed since June. The International Criminal Court was" + " set up in 2002 to prosecute genocide, crimes against humanity and war crimes.", + "The French prosecutor leading an investigation into the crash of Germanwings Flight 9525 insisted" + " Wednesday that he was not aware of any video footage from on board the plane. Marseille prosecutor" + ' Brice Robin told CNN that "so far no videos were used in the crash investigation." He added, "A' + " person who has such a video needs to immediately give it to the investigators.\" Robin's comments" + " follow claims by two magazines, German daily Bild and French Paris Match, of a cell phone video" + " showing the harrowing final seconds from on board Germanwings Flight 9525 as it crashed into the" + " French Alps. All 150 on board were killed. Paris Match and Bild reported that the video was" + " recovered from a phone at the wreckage site. The two publications described the supposed video, but" + " did not post it on their websites. The publications said that they watched the video, which was" + " found by a source close to the investigation. \"One can hear cries of 'My God' in several" + ' languages," Paris Match reported. "Metallic banging can also be heard more than three times, perhaps' + " of the pilot trying to open the cockpit door with a heavy object. Towards the end, after a heavy" + ' shake, stronger than the others, the screaming intensifies. Then nothing." "It is a very disturbing' + " scene,\" said Julian Reichelt, editor-in-chief of Bild online. An official with France's accident" + " investigation agency, the BEA, said the agency is not aware of any such video. Lt. Col. Jean-Marc" + " Menichini, a French Gendarmerie spokesman in charge of communications on rescue efforts around the" + ' Germanwings crash site, told CNN that the reports were "completely wrong" and "unwarranted." Cell' + ' phones have been collected at the site, he said, but that they "hadn\'t been exploited yet."' + " Menichini said he believed the cell phones would need to be sent to the Criminal Research Institute" + " in Rosny sous-Bois, near Paris, in order to be analyzed by specialized technicians working" + " hand-in-hand with investigators. But none of the cell phones found so far have been sent to the" + " institute, Menichini said. Asked whether staff involved in the search could have leaked a memory" + ' card to the media, Menichini answered with a categorical "no." Reichelt told "Erin Burnett:' + ' Outfront" that he had watched the video and stood by the report, saying Bild and Paris Match are' + ' "very confident" that the clip is real. He noted that investigators only revealed they\'d recovered' + ' cell phones from the crash site after Bild and Paris Match published their reports. "That is' + " something we did not know before. ... Overall we can say many things of the investigation weren't" + ' revealed by the investigation at the beginning," he said. What was mental state of Germanwings' + " co-pilot? German airline Lufthansa confirmed Tuesday that co-pilot Andreas Lubitz had battled" + " depression years before he took the controls of Germanwings Flight 9525, which he's accused of" + " deliberately crashing last week in the French Alps. Lubitz told his Lufthansa flight training school" + ' in 2009 that he had a "previous episode of severe depression," the airline said Tuesday. Email' + " correspondence between Lubitz and the school discovered in an internal investigation, Lufthansa" + " said, included medical documents he submitted in connection with resuming his flight training. The" + " announcement indicates that Lufthansa, the parent company of Germanwings, knew of Lubitz's battle" + " with depression, allowed him to continue training and ultimately put him in the cockpit. Lufthansa," + " whose CEO Carsten Spohr previously said Lubitz was 100% fit to fly, described its statement Tuesday" + ' as a "swift and seamless clarification" and said it was sharing the information and documents --' + " including training and medical records -- with public prosecutors. Spohr traveled to the crash site" + " Wednesday, where recovery teams have been working for the past week to recover human remains and" + " plane debris scattered across a steep mountainside. He saw the crisis center set up in" + " Seyne-les-Alpes, laid a wreath in the village of Le Vernet, closer to the crash site, where grieving" + " families have left flowers at a simple stone memorial. Menichini told CNN late Tuesday that no" + " visible human remains were left at the site but recovery teams would keep searching. French" + " President Francois Hollande, speaking Tuesday, said that it should be possible to identify all the" + " victims using DNA analysis by the end of the week, sooner than authorities had previously suggested." + " In the meantime, the recovery of the victims' personal belongings will start Wednesday, Menichini" + " said. Among those personal belongings could be more cell phones belonging to the 144 passengers and" + " six crew on board. Check out the latest from our correspondents . The details about Lubitz's" + " correspondence with the flight school during his training were among several developments as" + " investigators continued to delve into what caused the crash and Lubitz's possible motive for" + " downing the jet. A Lufthansa spokesperson told CNN on Tuesday that Lubitz had a valid medical" + ' certificate, had passed all his examinations and "held all the licenses required." Earlier, a' + " spokesman for the prosecutor's office in Dusseldorf, Christoph Kumpa, said medical records reveal" + " Lubitz suffered from suicidal tendencies at some point before his aviation career and underwent" + " psychotherapy before he got his pilot's license. Kumpa emphasized there's no evidence suggesting" + " Lubitz was suicidal or acting aggressively before the crash. Investigators are looking into whether" + " Lubitz feared his medical condition would cause him to lose his pilot's license, a European" + ' government official briefed on the investigation told CNN on Tuesday. While flying was "a big part' + " of his life,\" the source said, it's only one theory being considered. Another source, a law" + " enforcement official briefed on the investigation, also told CNN that authorities believe the" + " primary motive for Lubitz to bring down the plane was that he feared he would not be allowed to fly" + " because of his medical problems. Lubitz's girlfriend told investigators he had seen an eye doctor" + " and a neuropsychologist, both of whom deemed him unfit to work recently and concluded he had" + " psychological issues, the European government official said. But no matter what details emerge about" + " his previous mental health struggles, there's more to the story, said Brian Russell, a forensic" + ' psychologist. "Psychology can explain why somebody would turn rage inward on themselves about the' + " fact that maybe they weren't going to keep doing their job and they're upset about that and so" + ' they\'re suicidal," he said. "But there is no mental illness that explains why somebody then feels' + " entitled to also take that rage and turn it outward on 149 other people who had nothing to do with" + " the person's problems.\" Germanwings crash compensation: What we know . Who was the captain of" + " Germanwings Flight 9525? CNN's Margot Haddad reported from Marseille and Pamela Brown from" + " Dusseldorf, while Laura Smith-Spark wrote from London. CNN's Frederik Pleitgen, Pamela Boykoff," + " Antonia Mortensen, Sandrine Amiel and Anna-Maja Rappard contributed to this report.", ], return_tensors="tf", padding="longest", @@ -454,18 +804,128 @@ class FasterTFBartModelIntegrationTests(unittest.TestCase): result = self.tok.batch_decode(generated_ids, skip_special_tokens=True) assert ( result[0] - == " The International Criminal Court (ICC) has announced that it has been announced by the International Criminal court." + == " The International Criminal Court (ICC) has announced that it has been announced by the International" + " Criminal court." ) assert ( result[1] - == " An investigation into the crash that killed at least 10 people in the French capital has been released by the French police investigating the crash." + == " An investigation into the crash that killed at least 10 people in the French capital has been" + " released by the French police investigating the crash." ) def test_encoder_equiv(self): batch = self.tok( [ - 'The Palestinian Authority officially became the 123rd member of the International Criminal Court on Wednesday, a step that gives the court jurisdiction over alleged crimes in Palestinian territories. The formal accession was marked with a ceremony at The Hague, in the Netherlands, where the court is based. The Palestinians signed the ICC\'s founding Rome Statute in January, when they also accepted its jurisdiction over alleged crimes committed "in the occupied Palestinian territory, including East Jerusalem, since June 13, 2014." Later that month, the ICC opened a preliminary examination into the situation in Palestinian territories, paving the way for possible war crimes investigations against Israelis. As members of the court, Palestinians may be subject to counter-charges as well. Israel and the United States, neither of which is an ICC member, opposed the Palestinians\' efforts to join the body. But Palestinian Foreign Minister Riad al-Malki, speaking at Wednesday\'s ceremony, said it was a move toward greater justice. "As Palestine formally becomes a State Party to the Rome Statute today, the world is also a step closer to ending a long era of impunity and injustice," he said, according to an ICC news release. "Indeed, today brings us closer to our shared goals of justice and peace." Judge Kuniko Ozaki, a vice president of the ICC, said acceding to the treaty was just the first step for the Palestinians. "As the Rome Statute today enters into force for the State of Palestine, Palestine acquires all the rights as well as responsibilities that come with being a State Party to the Statute. These are substantive commitments, which cannot be taken lightly," she said. Rights group Human Rights Watch welcomed the development. "Governments seeking to penalize Palestine for joining the ICC should immediately end their pressure, and countries that support universal acceptance of the court\'s treaty should speak out to welcome its membership," said Balkees Jarrah, international justice counsel for the group. "What\'s objectionable is the attempts to undermine international justice, not Palestine\'s decision to join a treaty to which over 100 countries around the world are members." In January, when the preliminary ICC examination was opened, Israeli Prime Minister Benjamin Netanyahu described it as an outrage, saying the court was overstepping its boundaries. The United States also said it "strongly" disagreed with the court\'s decision. "As we have said repeatedly, we do not believe that Palestine is a state and therefore we do not believe that it is eligible to join the ICC," the State Department said in a statement. It urged the warring sides to resolve their differences through direct negotiations. "We will continue to oppose actions against Israel at the ICC as counterproductive to the cause of peace," it said. But the ICC begs to differ with the definition of a state for its purposes and refers to the territories as "Palestine." While a preliminary examination is not a formal investigation, it allows the court to review evidence and determine whether to investigate suspects on both sides. Prosecutor Fatou Bensouda said her office would "conduct its analysis in full independence and impartiality." The war between Israel and Hamas militants in Gaza last summer left more than 2,000 people dead. The inquiry will include alleged war crimes committed since June. The International Criminal Court was set up in 2002 to prosecute genocide, crimes against humanity and war crimes.', - 'The French prosecutor leading an investigation into the crash of Germanwings Flight 9525 insisted Wednesday that he was not aware of any video footage from on board the plane. Marseille prosecutor Brice Robin told CNN that "so far no videos were used in the crash investigation." He added, "A person who has such a video needs to immediately give it to the investigators." Robin\'s comments follow claims by two magazines, German daily Bild and French Paris Match, of a cell phone video showing the harrowing final seconds from on board Germanwings Flight 9525 as it crashed into the French Alps. All 150 on board were killed. Paris Match and Bild reported that the video was recovered from a phone at the wreckage site. The two publications described the supposed video, but did not post it on their websites. The publications said that they watched the video, which was found by a source close to the investigation. "One can hear cries of \'My God\' in several languages," Paris Match reported. "Metallic banging can also be heard more than three times, perhaps of the pilot trying to open the cockpit door with a heavy object. Towards the end, after a heavy shake, stronger than the others, the screaming intensifies. Then nothing." "It is a very disturbing scene," said Julian Reichelt, editor-in-chief of Bild online. An official with France\'s accident investigation agency, the BEA, said the agency is not aware of any such video. Lt. Col. Jean-Marc Menichini, a French Gendarmerie spokesman in charge of communications on rescue efforts around the Germanwings crash site, told CNN that the reports were "completely wrong" and "unwarranted." Cell phones have been collected at the site, he said, but that they "hadn\'t been exploited yet." Menichini said he believed the cell phones would need to be sent to the Criminal Research Institute in Rosny sous-Bois, near Paris, in order to be analyzed by specialized technicians working hand-in-hand with investigators. But none of the cell phones found so far have been sent to the institute, Menichini said. Asked whether staff involved in the search could have leaked a memory card to the media, Menichini answered with a categorical "no." Reichelt told "Erin Burnett: Outfront" that he had watched the video and stood by the report, saying Bild and Paris Match are "very confident" that the clip is real. He noted that investigators only revealed they\'d recovered cell phones from the crash site after Bild and Paris Match published their reports. "That is something we did not know before. ... Overall we can say many things of the investigation weren\'t revealed by the investigation at the beginning," he said. What was mental state of Germanwings co-pilot? German airline Lufthansa confirmed Tuesday that co-pilot Andreas Lubitz had battled depression years before he took the controls of Germanwings Flight 9525, which he\'s accused of deliberately crashing last week in the French Alps. Lubitz told his Lufthansa flight training school in 2009 that he had a "previous episode of severe depression," the airline said Tuesday. Email correspondence between Lubitz and the school discovered in an internal investigation, Lufthansa said, included medical documents he submitted in connection with resuming his flight training. The announcement indicates that Lufthansa, the parent company of Germanwings, knew of Lubitz\'s battle with depression, allowed him to continue training and ultimately put him in the cockpit. Lufthansa, whose CEO Carsten Spohr previously said Lubitz was 100% fit to fly, described its statement Tuesday as a "swift and seamless clarification" and said it was sharing the information and documents -- including training and medical records -- with public prosecutors. Spohr traveled to the crash site Wednesday, where recovery teams have been working for the past week to recover human remains and plane debris scattered across a steep mountainside. He saw the crisis center set up in Seyne-les-Alpes, laid a wreath in the village of Le Vernet, closer to the crash site, where grieving families have left flowers at a simple stone memorial. Menichini told CNN late Tuesday that no visible human remains were left at the site but recovery teams would keep searching. French President Francois Hollande, speaking Tuesday, said that it should be possible to identify all the victims using DNA analysis by the end of the week, sooner than authorities had previously suggested. In the meantime, the recovery of the victims\' personal belongings will start Wednesday, Menichini said. Among those personal belongings could be more cell phones belonging to the 144 passengers and six crew on board. Check out the latest from our correspondents . The details about Lubitz\'s correspondence with the flight school during his training were among several developments as investigators continued to delve into what caused the crash and Lubitz\'s possible motive for downing the jet. A Lufthansa spokesperson told CNN on Tuesday that Lubitz had a valid medical certificate, had passed all his examinations and "held all the licenses required." Earlier, a spokesman for the prosecutor\'s office in Dusseldorf, Christoph Kumpa, said medical records reveal Lubitz suffered from suicidal tendencies at some point before his aviation career and underwent psychotherapy before he got his pilot\'s license. Kumpa emphasized there\'s no evidence suggesting Lubitz was suicidal or acting aggressively before the crash. Investigators are looking into whether Lubitz feared his medical condition would cause him to lose his pilot\'s license, a European government official briefed on the investigation told CNN on Tuesday. While flying was "a big part of his life," the source said, it\'s only one theory being considered. Another source, a law enforcement official briefed on the investigation, also told CNN that authorities believe the primary motive for Lubitz to bring down the plane was that he feared he would not be allowed to fly because of his medical problems. Lubitz\'s girlfriend told investigators he had seen an eye doctor and a neuropsychologist, both of whom deemed him unfit to work recently and concluded he had psychological issues, the European government official said. But no matter what details emerge about his previous mental health struggles, there\'s more to the story, said Brian Russell, a forensic psychologist. "Psychology can explain why somebody would turn rage inward on themselves about the fact that maybe they weren\'t going to keep doing their job and they\'re upset about that and so they\'re suicidal," he said. "But there is no mental illness that explains why somebody then feels entitled to also take that rage and turn it outward on 149 other people who had nothing to do with the person\'s problems." Germanwings crash compensation: What we know . Who was the captain of Germanwings Flight 9525? CNN\'s Margot Haddad reported from Marseille and Pamela Brown from Dusseldorf, while Laura Smith-Spark wrote from London. CNN\'s Frederik Pleitgen, Pamela Boykoff, Antonia Mortensen, Sandrine Amiel and Anna-Maja Rappard contributed to this report.', + "The Palestinian Authority officially became the 123rd member of the International Criminal Court on" + " Wednesday, a step that gives the court jurisdiction over alleged crimes in Palestinian territories." + " The formal accession was marked with a ceremony at The Hague, in the Netherlands, where the court is" + " based. The Palestinians signed the ICC's founding Rome Statute in January, when they also accepted" + ' its jurisdiction over alleged crimes committed "in the occupied Palestinian territory, including' + ' East Jerusalem, since June 13, 2014." Later that month, the ICC opened a preliminary examination' + " into the situation in Palestinian territories, paving the way for possible war crimes investigations" + " against Israelis. As members of the court, Palestinians may be subject to counter-charges as well." + " Israel and the United States, neither of which is an ICC member, opposed the Palestinians' efforts" + " to join the body. But Palestinian Foreign Minister Riad al-Malki, speaking at Wednesday's ceremony," + ' said it was a move toward greater justice. "As Palestine formally becomes a State Party to the Rome' + ' Statute today, the world is also a step closer to ending a long era of impunity and injustice," he' + ' said, according to an ICC news release. "Indeed, today brings us closer to our shared goals of' + ' justice and peace." Judge Kuniko Ozaki, a vice president of the ICC, said acceding to the treaty was' + ' just the first step for the Palestinians. "As the Rome Statute today enters into force for the State' + " of Palestine, Palestine acquires all the rights as well as responsibilities that come with being a" + ' State Party to the Statute. These are substantive commitments, which cannot be taken lightly," she' + ' said. Rights group Human Rights Watch welcomed the development. "Governments seeking to penalize' + " Palestine for joining the ICC should immediately end their pressure, and countries that support" + " universal acceptance of the court's treaty should speak out to welcome its membership,\" said" + " Balkees Jarrah, international justice counsel for the group. \"What's objectionable is the attempts" + " to undermine international justice, not Palestine's decision to join a treaty to which over 100" + ' countries around the world are members." In January, when the preliminary ICC examination was' + " opened, Israeli Prime Minister Benjamin Netanyahu described it as an outrage, saying the court was" + ' overstepping its boundaries. The United States also said it "strongly" disagreed with the court\'s' + ' decision. "As we have said repeatedly, we do not believe that Palestine is a state and therefore we' + ' do not believe that it is eligible to join the ICC," the State Department said in a statement. It' + ' urged the warring sides to resolve their differences through direct negotiations. "We will continue' + ' to oppose actions against Israel at the ICC as counterproductive to the cause of peace," it said.' + " But the ICC begs to differ with the definition of a state for its purposes and refers to the" + ' territories as "Palestine." While a preliminary examination is not a formal investigation, it allows' + " the court to review evidence and determine whether to investigate suspects on both sides. Prosecutor" + ' Fatou Bensouda said her office would "conduct its analysis in full independence and impartiality."' + " The war between Israel and Hamas militants in Gaza last summer left more than 2,000 people dead. The" + " inquiry will include alleged war crimes committed since June. The International Criminal Court was" + " set up in 2002 to prosecute genocide, crimes against humanity and war crimes.", + "The French prosecutor leading an investigation into the crash of Germanwings Flight 9525 insisted" + " Wednesday that he was not aware of any video footage from on board the plane. Marseille prosecutor" + ' Brice Robin told CNN that "so far no videos were used in the crash investigation." He added, "A' + " person who has such a video needs to immediately give it to the investigators.\" Robin's comments" + " follow claims by two magazines, German daily Bild and French Paris Match, of a cell phone video" + " showing the harrowing final seconds from on board Germanwings Flight 9525 as it crashed into the" + " French Alps. All 150 on board were killed. Paris Match and Bild reported that the video was" + " recovered from a phone at the wreckage site. The two publications described the supposed video, but" + " did not post it on their websites. The publications said that they watched the video, which was" + " found by a source close to the investigation. \"One can hear cries of 'My God' in several" + ' languages," Paris Match reported. "Metallic banging can also be heard more than three times, perhaps' + " of the pilot trying to open the cockpit door with a heavy object. Towards the end, after a heavy" + ' shake, stronger than the others, the screaming intensifies. Then nothing." "It is a very disturbing' + " scene,\" said Julian Reichelt, editor-in-chief of Bild online. An official with France's accident" + " investigation agency, the BEA, said the agency is not aware of any such video. Lt. Col. Jean-Marc" + " Menichini, a French Gendarmerie spokesman in charge of communications on rescue efforts around the" + ' Germanwings crash site, told CNN that the reports were "completely wrong" and "unwarranted." Cell' + ' phones have been collected at the site, he said, but that they "hadn\'t been exploited yet."' + " Menichini said he believed the cell phones would need to be sent to the Criminal Research Institute" + " in Rosny sous-Bois, near Paris, in order to be analyzed by specialized technicians working" + " hand-in-hand with investigators. But none of the cell phones found so far have been sent to the" + " institute, Menichini said. Asked whether staff involved in the search could have leaked a memory" + ' card to the media, Menichini answered with a categorical "no." Reichelt told "Erin Burnett:' + ' Outfront" that he had watched the video and stood by the report, saying Bild and Paris Match are' + ' "very confident" that the clip is real. He noted that investigators only revealed they\'d recovered' + ' cell phones from the crash site after Bild and Paris Match published their reports. "That is' + " something we did not know before. ... Overall we can say many things of the investigation weren't" + ' revealed by the investigation at the beginning," he said. What was mental state of Germanwings' + " co-pilot? German airline Lufthansa confirmed Tuesday that co-pilot Andreas Lubitz had battled" + " depression years before he took the controls of Germanwings Flight 9525, which he's accused of" + " deliberately crashing last week in the French Alps. Lubitz told his Lufthansa flight training school" + ' in 2009 that he had a "previous episode of severe depression," the airline said Tuesday. Email' + " correspondence between Lubitz and the school discovered in an internal investigation, Lufthansa" + " said, included medical documents he submitted in connection with resuming his flight training. The" + " announcement indicates that Lufthansa, the parent company of Germanwings, knew of Lubitz's battle" + " with depression, allowed him to continue training and ultimately put him in the cockpit. Lufthansa," + " whose CEO Carsten Spohr previously said Lubitz was 100% fit to fly, described its statement Tuesday" + ' as a "swift and seamless clarification" and said it was sharing the information and documents --' + " including training and medical records -- with public prosecutors. Spohr traveled to the crash site" + " Wednesday, where recovery teams have been working for the past week to recover human remains and" + " plane debris scattered across a steep mountainside. He saw the crisis center set up in" + " Seyne-les-Alpes, laid a wreath in the village of Le Vernet, closer to the crash site, where grieving" + " families have left flowers at a simple stone memorial. Menichini told CNN late Tuesday that no" + " visible human remains were left at the site but recovery teams would keep searching. French" + " President Francois Hollande, speaking Tuesday, said that it should be possible to identify all the" + " victims using DNA analysis by the end of the week, sooner than authorities had previously suggested." + " In the meantime, the recovery of the victims' personal belongings will start Wednesday, Menichini" + " said. Among those personal belongings could be more cell phones belonging to the 144 passengers and" + " six crew on board. Check out the latest from our correspondents . The details about Lubitz's" + " correspondence with the flight school during his training were among several developments as" + " investigators continued to delve into what caused the crash and Lubitz's possible motive for" + " downing the jet. A Lufthansa spokesperson told CNN on Tuesday that Lubitz had a valid medical" + ' certificate, had passed all his examinations and "held all the licenses required." Earlier, a' + " spokesman for the prosecutor's office in Dusseldorf, Christoph Kumpa, said medical records reveal" + " Lubitz suffered from suicidal tendencies at some point before his aviation career and underwent" + " psychotherapy before he got his pilot's license. Kumpa emphasized there's no evidence suggesting" + " Lubitz was suicidal or acting aggressively before the crash. Investigators are looking into whether" + " Lubitz feared his medical condition would cause him to lose his pilot's license, a European" + ' government official briefed on the investigation told CNN on Tuesday. While flying was "a big part' + " of his life,\" the source said, it's only one theory being considered. Another source, a law" + " enforcement official briefed on the investigation, also told CNN that authorities believe the" + " primary motive for Lubitz to bring down the plane was that he feared he would not be allowed to fly" + " because of his medical problems. Lubitz's girlfriend told investigators he had seen an eye doctor" + " and a neuropsychologist, both of whom deemed him unfit to work recently and concluded he had" + " psychological issues, the European government official said. But no matter what details emerge about" + " his previous mental health struggles, there's more to the story, said Brian Russell, a forensic" + ' psychologist. "Psychology can explain why somebody would turn rage inward on themselves about the' + " fact that maybe they weren't going to keep doing their job and they're upset about that and so" + ' they\'re suicidal," he said. "But there is no mental illness that explains why somebody then feels' + " entitled to also take that rage and turn it outward on 149 other people who had nothing to do with" + " the person's problems.\" Germanwings crash compensation: What we know . Who was the captain of" + " Germanwings Flight 9525? CNN's Margot Haddad reported from Marseille and Pamela Brown from" + " Dusseldorf, while Laura Smith-Spark wrote from London. CNN's Frederik Pleitgen, Pamela Boykoff," + " Antonia Mortensen, Sandrine Amiel and Anna-Maja Rappard contributed to this report.", ], return_tensors="tf", padding="longest", diff --git a/tests/models/bert/test_tokenization_bert.py b/tests/models/bert/test_tokenization_bert.py index fcb69914b9..dfbcd266c4 100644 --- a/tests/models/bert/test_tokenization_bert.py +++ b/tests/models/bert/test_tokenization_bert.py @@ -187,7 +187,7 @@ class BertTokenizationTest(TokenizerTesterMixin, unittest.TestCase): vocab_tokens = ["[UNK]", "[CLS]", "[SEP]", "want", "##want", "##ed", "wa", "un", "runn", "##ing"] vocab = {} - for (i, token) in enumerate(vocab_tokens): + for i, token in enumerate(vocab_tokens): vocab[token] = i tokenizer = WordpieceTokenizer(vocab=vocab, unk_token="[UNK]") diff --git a/tests/models/bert_generation/test_tokenization_bert_generation.py b/tests/models/bert_generation/test_tokenization_bert_generation.py index 155f383a46..581f249db0 100644 --- a/tests/models/bert_generation/test_tokenization_bert_generation.py +++ b/tests/models/bert_generation/test_tokenization_bert_generation.py @@ -144,7 +144,10 @@ class BertGenerationTokenizationTest(TokenizerTesterMixin, unittest.TestCase): @slow def test_tokenization_base_hard_symbols(self): - symbols = 'This is a very long text with a lot of weird characters, such as: . , ~ ? ( ) " [ ] ! : - . Also we will add words that should not exsist and be tokenized to , such as saoneuhaoesuth' + symbols = ( + 'This is a very long text with a lot of weird characters, such as: . , ~ ? ( ) " [ ] ! : - . Also we will' + " add words that should not exsist and be tokenized to , such as saoneuhaoesuth" + ) original_tokenizer_encodings = [ 871, 419, diff --git a/tests/models/bert_japanese/test_tokenization_bert_japanese.py b/tests/models/bert_japanese/test_tokenization_bert_japanese.py index 59605bac14..86b3f16f10 100644 --- a/tests/models/bert_japanese/test_tokenization_bert_japanese.py +++ b/tests/models/bert_japanese/test_tokenization_bert_japanese.py @@ -176,7 +176,7 @@ class BertJapaneseTokenizationTest(TokenizerTesterMixin, unittest.TestCase): vocab_tokens = ["[UNK]", "[CLS]", "[SEP]", "こんにちは", "こん", "にちは", "ばんは", "##こん", "##にちは", "##ばんは"] vocab = {} - for (i, token) in enumerate(vocab_tokens): + for i, token in enumerate(vocab_tokens): vocab[token] = i tokenizer = WordpieceTokenizer(vocab=vocab, unk_token="[UNK]") @@ -249,7 +249,7 @@ class BertJapaneseCharacterTokenizationTest(TokenizerTesterMixin, unittest.TestC vocab_tokens = ["[UNK]", "[CLS]", "[SEP]", "こ", "ん", "に", "ち", "は", "ば", "世", "界", "、", "。"] vocab = {} - for (i, token) in enumerate(vocab_tokens): + for i, token in enumerate(vocab_tokens): vocab[token] = i tokenizer = CharacterTokenizer(vocab=vocab, unk_token="[UNK]") @@ -288,7 +288,8 @@ class BertTokenizerMismatchTest(unittest.TestCase): BertTokenizer.from_pretrained(EXAMPLE_BERT_JAPANESE_ID) self.assertTrue( cm.records[0].message.startswith( - "The tokenizer class you load from this checkpoint is not the same type as the class this function is called from." + "The tokenizer class you load from this checkpoint is not the same type as the class this function" + " is called from." ) ) EXAMPLE_BERT_ID = "bert-base-cased" @@ -296,6 +297,7 @@ class BertTokenizerMismatchTest(unittest.TestCase): BertJapaneseTokenizer.from_pretrained(EXAMPLE_BERT_ID) self.assertTrue( cm.records[0].message.startswith( - "The tokenizer class you load from this checkpoint is not the same type as the class this function is called from." + "The tokenizer class you load from this checkpoint is not the same type as the class this function" + " is called from." ) ) diff --git a/tests/models/big_bird/test_modeling_big_bird.py b/tests/models/big_bird/test_modeling_big_bird.py index 90e6bbb90e..ba09241af9 100644 --- a/tests/models/big_bird/test_modeling_big_bird.py +++ b/tests/models/big_bird/test_modeling_big_bird.py @@ -799,7 +799,16 @@ class BigBirdModelIntegrationTest(unittest.TestCase): model.to(torch_device) text = [ - "Transformer-based models are unable to process long sequences due to their self-attention operation, which scales quadratically with the sequence length. To address this limitation, we introduce the Longformer with an attention mechanism that scales linearly with sequence length, making it easy to process documents of thousands of tokens or longer. Longformer’s attention mechanism is a drop-in replacement for the standard self-attention and combines a local windowed attention with a task motivated global attention. Following prior work on long-sequence transformers, we evaluate Longformer on character-level language modeling and achieve state-of-the-art results on text8 and enwik8. In contrast to most prior work, we also pretrain Longformer and finetune it on a variety of downstream tasks. Our pretrained Longformer consistently outperforms RoBERTa on long document tasks and sets new state-of-the-art results on WikiHop and TriviaQA." + "Transformer-based models are unable to process long sequences due to their self-attention operation," + " which scales quadratically with the sequence length. To address this limitation, we introduce the" + " Longformer with an attention mechanism that scales linearly with sequence length, making it easy to" + " process documents of thousands of tokens or longer. Longformer’s attention mechanism is a drop-in" + " replacement for the standard self-attention and combines a local windowed attention with a task" + " motivated global attention. Following prior work on long-sequence transformers, we evaluate Longformer" + " on character-level language modeling and achieve state-of-the-art results on text8 and enwik8. In" + " contrast to most prior work, we also pretrain Longformer and finetune it on a variety of downstream" + " tasks. Our pretrained Longformer consistently outperforms RoBERTa on long document tasks and sets new" + " state-of-the-art results on WikiHop and TriviaQA." ] inputs = tokenizer(text) @@ -837,7 +846,18 @@ class BigBirdModelIntegrationTest(unittest.TestCase): ) model.to(torch_device) - context = "The BigBird model was proposed in Big Bird: Transformers for Longer Sequences by Zaheer, Manzil and Guruganesh, Guru and Dubey, Kumar Avinava and Ainslie, Joshua and Alberti, Chris and Ontanon, Santiago and Pham, Philip and Ravula, Anirudh and Wang, Qifan and Yang, Li and others. BigBird, is a sparse-attention based transformer which extends Transformer based models, such as BERT to much longer sequences. In addition to sparse attention, BigBird also applies global attention as well as random attention to the input sequence. Theoretically, it has been shown that applying sparse, global, and random attention approximates full attention, while being computationally much more efficient for longer sequences. As a consequence of the capability to handle longer context, BigBird has shown improved performance on various long document NLP tasks, such as question answering and summarization, compared to BERT or RoBERTa." + context = ( + "The BigBird model was proposed in Big Bird: Transformers for Longer Sequences by Zaheer, Manzil and" + " Guruganesh, Guru and Dubey, Kumar Avinava and Ainslie, Joshua and Alberti, Chris and Ontanon, Santiago" + " and Pham, Philip and Ravula, Anirudh and Wang, Qifan and Yang, Li and others. BigBird, is a" + " sparse-attention based transformer which extends Transformer based models, such as BERT to much longer" + " sequences. In addition to sparse attention, BigBird also applies global attention as well as random" + " attention to the input sequence. Theoretically, it has been shown that applying sparse, global, and" + " random attention approximates full attention, while being computationally much more efficient for longer" + " sequences. As a consequence of the capability to handle longer context, BigBird has shown improved" + " performance on various long document NLP tasks, such as question answering and summarization, compared" + " to BERT or RoBERTa." + ) question = [ "Which is better for longer sequences- BigBird or BERT?", diff --git a/tests/models/big_bird/test_tokenization_big_bird.py b/tests/models/big_bird/test_tokenization_big_bird.py index 29c28d5877..ff65451008 100644 --- a/tests/models/big_bird/test_tokenization_big_bird.py +++ b/tests/models/big_bird/test_tokenization_big_bird.py @@ -168,7 +168,10 @@ class BigBirdTokenizationTest(TokenizerTesterMixin, unittest.TestCase): @slow def test_tokenization_base_hard_symbols(self): - symbols = 'This is a very long text with a lot of weird characters, such as: . , ~ ? ( ) " [ ] ! : - . Also we will add words that should not exsist and be tokenized to , such as saoneuhaoesuth' + symbols = ( + 'This is a very long text with a lot of weird characters, such as: . , ~ ? ( ) " [ ] ! : - . Also we will' + " add words that should not exsist and be tokenized to , such as saoneuhaoesuth" + ) # fmt: off original_tokenizer_encodings = [65, 871, 419, 358, 946, 991, 2521, 452, 358, 1357, 387, 7751, 3536, 112, 985, 456, 126, 865, 938, 5400, 5734, 458, 1368, 467, 786, 2462, 5246, 1159, 633, 865, 4519, 457, 582, 852, 2557, 427, 916, 508, 405, 34324, 497, 391, 408, 11342, 1244, 385, 100, 938, 985, 456, 574, 362, 12597, 3200, 3129, 1172, 66] # noqa: E231 # fmt: on diff --git a/tests/models/bigbird_pegasus/test_modeling_bigbird_pegasus.py b/tests/models/bigbird_pegasus/test_modeling_bigbird_pegasus.py index 31f109fbcf..d4e7e8f4ae 100644 --- a/tests/models/bigbird_pegasus/test_modeling_bigbird_pegasus.py +++ b/tests/models/bigbird_pegasus/test_modeling_bigbird_pegasus.py @@ -538,9 +538,26 @@ class BigBirdPegasusModelIntegrationTests(unittest.TestCase): hypotheses_batch = model.generate(**inputs) - EXPECTED_LEP = "motivated by some recent studies on the light cp - odd higgs boson @xmath0 in non - minimal supersymmetric models, we investigate the rare @xmath1-decays @xmath2 ( @xmath3 ) in the two higgs doublet model ( 2hdm ), the nearly minimal supersymmetric standard model ( nmssm ), the next - to - minimal supersymmetric standard model ( nmssm ) and the minimal supersymmetric standard model ( mssm ). we find that the branching ratios of @xmath4 can reach @xmath5 in 2hdm, @xmath6 in nmssm and @xmath7 in mssm, which are at the level of @xmath8 in 2hdm, @xmath9 in nmssm and @xmath10 in mssm, respectively. these rates can be significantly enhanced in new physics models which lie within the expected sensitivity of the gigaz option of the international linear collider ( ilc ). = # 1,nucl. phys. b * # 1" + EXPECTED_LEP = ( + "motivated by some recent studies on the light cp - odd higgs boson @xmath0 in non - minimal" + " supersymmetric models, we investigate the rare @xmath1-decays @xmath2 ( @xmath3 ) in the two higgs" + " doublet model ( 2hdm ), the nearly minimal supersymmetric standard model ( nmssm ), the next - to -" + " minimal supersymmetric standard model ( nmssm ) and the minimal supersymmetric standard model ( mssm" + " ). we find that the branching ratios of @xmath4 can reach @xmath5 in 2hdm, @xmath6 in nmssm and" + " @xmath7 in mssm, which are at the level of @xmath8 in 2hdm, @xmath9 in nmssm and @xmath10 in mssm," + " respectively. these rates can be significantly enhanced in new physics models which lie within the" + " expected sensitivity of the gigaz option of the international linear collider ( ilc ). = # 1,nucl." + " phys. b * # 1" + ) - EXPECTED_MAGNET = "a positive, nonsaturating and dominantly linear magnetoresistance can appear within quite wide magnetic - field range in the surface state of a topological insulator having a positive and finite effective g - factor. this linear magnetoresistance shows up in the system of high carrier concentration and low mobility when electrons are in extended states and spread over many smeared landau levels, and persists up to room temperature, providing a possible mechanism for the recently observed linear magnetoresistance in topological insulator bi@xmath0se@xmath1 nanoribbons." + EXPECTED_MAGNET = ( + "a positive, nonsaturating and dominantly linear magnetoresistance can appear within quite wide magnetic -" + " field range in the surface state of a topological insulator having a positive and finite effective g -" + " factor. this linear magnetoresistance shows up in the system of high carrier concentration and low" + " mobility when electrons are in extended states and spread over many smeared landau levels, and persists" + " up to room temperature, providing a possible mechanism for the recently observed linear" + " magnetoresistance in topological insulator bi@xmath0se@xmath1 nanoribbons." + ) generated = tokenizer.batch_decode( hypotheses_batch.tolist(), clean_up_tokenization_spaces=True, skip_special_tokens=True diff --git a/tests/models/blenderbot/test_modeling_blenderbot.py b/tests/models/blenderbot/test_modeling_blenderbot.py index 6bf7138467..e4dbf474d1 100644 --- a/tests/models/blenderbot/test_modeling_blenderbot.py +++ b/tests/models/blenderbot/test_modeling_blenderbot.py @@ -304,7 +304,10 @@ class Blenderbot3BIntegrationTests(unittest.TestCase): generated_txt = self.tokenizer.batch_decode(generated_utterances, **TOK_DECODE_KW) assert generated_txt[0].strip() == tgt_text - src_text = "Social anxiety\nWow, I am never shy. Do you have anxiety?\nYes. I end up sweating and blushing and feel like i'm going to throw up.\nand why is that?" + src_text = ( + "Social anxiety\nWow, I am never shy. Do you have anxiety?\nYes. I end up sweating and blushing and feel" + " like i'm going to throw up.\nand why is that?" + ) model_inputs = self.tokenizer([src_text], return_tensors="pt").to(torch_device) diff --git a/tests/models/blenderbot_small/test_modeling_blenderbot_small.py b/tests/models/blenderbot_small/test_modeling_blenderbot_small.py index b046fa97d9..8bc6304e79 100644 --- a/tests/models/blenderbot_small/test_modeling_blenderbot_small.py +++ b/tests/models/blenderbot_small/test_modeling_blenderbot_small.py @@ -290,8 +290,8 @@ class Blenderbot90MIntegrationTests(unittest.TestCase): def test_90_generation_from_long_input(self): src_text = [ - "Social anxiety\nWow, I am never shy. Do you have anxiety?\nYes. I end up sweating and blushing and feel like\ - i'm going to throw up.\nand why is that?" + "Social anxiety\nWow, I am never shy. Do you have anxiety?\nYes. I end up sweating and blushing and feel" + " like i'm going to throw up.\nand why is that?" ] model_inputs = self.tokenizer(src_text, return_tensors="pt").to(torch_device) diff --git a/tests/models/blenderbot_small/test_modeling_tf_blenderbot_small.py b/tests/models/blenderbot_small/test_modeling_tf_blenderbot_small.py index a830e6c0b6..f8543aad59 100644 --- a/tests/models/blenderbot_small/test_modeling_tf_blenderbot_small.py +++ b/tests/models/blenderbot_small/test_modeling_tf_blenderbot_small.py @@ -305,7 +305,8 @@ def _long_tensor(tok_lst): @require_tf class TFBlenderbot90MIntegrationTests(unittest.TestCase): src_text = [ - "Social anxiety\nWow, I am never shy. Do you have anxiety?\nYes. I end up sweating and blushing and feel like i'm going to throw up.\nand why is that?" + "Social anxiety\nWow, I am never shy. Do you have anxiety?\nYes. I end up sweating and blushing and feel like " + " i'm going to throw up.\nand why is that?" ] model_name = "facebook/blenderbot_small-90M" diff --git a/tests/models/canine/test_modeling_canine.py b/tests/models/canine/test_modeling_canine.py index 483dd095a1..a4d13f0efa 100644 --- a/tests/models/canine/test_modeling_canine.py +++ b/tests/models/canine/test_modeling_canine.py @@ -378,7 +378,12 @@ class CanineModelTest(ModelTesterMixin, unittest.TestCase): torch.allclose( set_nan_tensor_to_zero(tuple_object), set_nan_tensor_to_zero(dict_object), atol=1e-5 ), - msg=f"Tuple and dict output are not equal. Difference: {torch.max(torch.abs(tuple_object - dict_object))}. Tuple has `nan`: {torch.isnan(tuple_object).any()} and `inf`: {torch.isinf(tuple_object)}. Dict has `nan`: {torch.isnan(dict_object).any()} and `inf`: {torch.isinf(dict_object)}.", + msg=( + "Tuple and dict output are not equal. Difference:" + f" {torch.max(torch.abs(tuple_object - dict_object))}. Tuple has `nan`:" + f" {torch.isnan(tuple_object).any()} and `inf`: {torch.isinf(tuple_object)}. Dict has" + f" `nan`: {torch.isnan(dict_object).any()} and `inf`: {torch.isinf(dict_object)}." + ), ) recursive_check(tuple_output, dict_output) diff --git a/tests/models/convnext/test_modeling_tf_convnext.py b/tests/models/convnext/test_modeling_tf_convnext.py index 7b86a99fd4..4129591289 100644 --- a/tests/models/convnext/test_modeling_tf_convnext.py +++ b/tests/models/convnext/test_modeling_tf_convnext.py @@ -219,7 +219,10 @@ class TFConvNextModelTest(TFModelTesterMixin, unittest.TestCase): else: self.assertTrue( all(tf.equal(tuple_object, dict_object)), - msg=f"Tuple and dict output are not equal. Difference: {tf.math.reduce_max(tf.abs(tuple_object - dict_object))}", + msg=( + "Tuple and dict output are not equal. Difference:" + f" {tf.math.reduce_max(tf.abs(tuple_object - dict_object))}" + ), ) recursive_check(tuple_output, dict_output) diff --git a/tests/models/data2vec/test_modeling_data2vec_audio.py b/tests/models/data2vec/test_modeling_data2vec_audio.py index 87885268b2..24e2cd918d 100644 --- a/tests/models/data2vec/test_modeling_data2vec_audio.py +++ b/tests/models/data2vec/test_modeling_data2vec_audio.py @@ -736,7 +736,8 @@ class Data2VecAudioModelIntegrationTest(unittest.TestCase): EXPECTED_TRANSCRIPTIONS = [ "a man said to the universe sir i exist", "sweat covered brion's body trickling into the tight loin cloth that was the only garment he wore", - "the cut on his chest still dripping blood the ache of his overstrained eyes even the soaring arena around him with thousands of spectators were trivialities not worth thinking about", + "the cut on his chest still dripping blood the ache of his overstrained eyes even the soaring arena around" + " him with thousands of spectators were trivialities not worth thinking about", "his instant of panic was followed by a small sharp blow high on his chest", ] self.assertListEqual(predicted_trans, EXPECTED_TRANSCRIPTIONS) diff --git a/tests/models/deberta/test_tokenization_deberta.py b/tests/models/deberta/test_tokenization_deberta.py index ca6574bc31..4aa53e13ff 100644 --- a/tests/models/deberta/test_tokenization_deberta.py +++ b/tests/models/deberta/test_tokenization_deberta.py @@ -126,7 +126,9 @@ class DebertaTokenizationTest(TokenizerTesterMixin, unittest.TestCase): sequences = [ "ALBERT: A Lite BERT for Self-supervised Learning of Language Representations", "ALBERT incorporates two parameter reduction techniques", - "The first one is a factorized embedding parameterization. By decomposing the large vocabulary embedding matrix into two small matrices, we separate the size of the hidden layers from the size of vocabulary embedding.", + "The first one is a factorized embedding parameterization. By decomposing the large vocabulary" + " embedding matrix into two small matrices, we separate the size of the hidden layers from the size of" + " vocabulary embedding.", ] encoding = tokenizer(sequences, padding=True) @@ -155,7 +157,9 @@ class DebertaTokenizationTest(TokenizerTesterMixin, unittest.TestCase): expected_decoded_sequence = [ "ALBERT: A Lite BERT for Self-supervised Learning of Language Representations", "ALBERT incorporates two parameter reduction techniques", - "The first one is a factorized embedding parameterization. By decomposing the large vocabulary embedding matrix into two small matrices, we separate the size of the hidden layers from the size of vocabulary embedding.", + "The first one is a factorized embedding parameterization. By decomposing the large vocabulary" + " embedding matrix into two small matrices, we separate the size of the hidden layers from the size of" + " vocabulary embedding.", ] self.assertDictEqual(encoding.data, expected_encoding) diff --git a/tests/models/fnet/test_modeling_fnet.py b/tests/models/fnet/test_modeling_fnet.py index 0abf51fb5d..974d7c2d4e 100644 --- a/tests/models/fnet/test_modeling_fnet.py +++ b/tests/models/fnet/test_modeling_fnet.py @@ -333,7 +333,12 @@ class FNetModelTest(ModelTesterMixin, unittest.TestCase): torch.allclose( set_nan_tensor_to_zero(tuple_object), set_nan_tensor_to_zero(dict_object), atol=1e-5 ), - msg=f"Tuple and dict output are not equal. Difference: {torch.max(torch.abs(tuple_object - dict_object))}. Tuple has `nan`: {torch.isnan(tuple_object).any()} and `inf`: {torch.isinf(tuple_object)}. Dict has `nan`: {torch.isnan(dict_object).any()} and `inf`: {torch.isinf(dict_object)}.", + msg=( + "Tuple and dict output are not equal. Difference:" + f" {torch.max(torch.abs(tuple_object - dict_object))}. Tuple has `nan`:" + f" {torch.isnan(tuple_object).any()} and `inf`: {torch.isinf(tuple_object)}. Dict has" + f" `nan`: {torch.isnan(dict_object).any()} and `inf`: {torch.isinf(dict_object)}." + ), ) recursive_check(tuple_output, dict_output) diff --git a/tests/models/gpt2/test_modeling_tf_gpt2.py b/tests/models/gpt2/test_modeling_tf_gpt2.py index 9790b1c766..a032e33500 100644 --- a/tests/models/gpt2/test_modeling_tf_gpt2.py +++ b/tests/models/gpt2/test_modeling_tf_gpt2.py @@ -576,7 +576,8 @@ class TFGPT2ModelLanguageGenerationTest(unittest.TestCase): sentence = ["The dog"] expected_output_string = [ - "The dog owner asked why did our vet decide there needed to be extra ventilation inside because most puppies" + "The dog owner asked why did our vet decide there needed to be extra ventilation inside because most" + " puppies" ] expected_output_string_xla = [ "The dog has been named in connection with the murder of a 20-year-old man in!" diff --git a/tests/models/hubert/test_modeling_tf_hubert.py b/tests/models/hubert/test_modeling_tf_hubert.py index 156535d7a2..871d466d97 100644 --- a/tests/models/hubert/test_modeling_tf_hubert.py +++ b/tests/models/hubert/test_modeling_tf_hubert.py @@ -539,7 +539,8 @@ class TFHubertModelIntegrationTest(unittest.TestCase): EXPECTED_TRANSCRIPTIONS = [ "a man said to the universe sir i exist", "sweat covered brion's body trickling into the tight loin cloth that was the only garment he wore", - "the cut on his chest still dripping blood the ache of his overstrained eyes even the soaring arena around him with the thousands of spectators were trivialities not worth thinking about", + "the cut on his chest still dripping blood the ache of his overstrained eyes even the soaring arena around" + " him with the thousands of spectators were trivialities not worth thinking about", "his instant of panic was followed by a small sharp blow high on his chest", ] self.assertListEqual(predicted_trans, EXPECTED_TRANSCRIPTIONS) diff --git a/tests/models/layoutlmv2/test_tokenization_layoutlmv2.py b/tests/models/layoutlmv2/test_tokenization_layoutlmv2.py index 1c3f8190c1..78f78c33e7 100644 --- a/tests/models/layoutlmv2/test_tokenization_layoutlmv2.py +++ b/tests/models/layoutlmv2/test_tokenization_layoutlmv2.py @@ -181,7 +181,7 @@ class LayoutLMv2TokenizationTest(TokenizerTesterMixin, unittest.TestCase): vocab_tokens = ["[UNK]", "[CLS]", "[SEP]", "want", "##want", "##ed", "wa", "un", "runn", "##ing"] vocab = {} - for (i, token) in enumerate(vocab_tokens): + for i, token in enumerate(vocab_tokens): vocab[token] = i tokenizer = WordpieceTokenizer(vocab=vocab, unk_token="[UNK]") @@ -1634,11 +1634,9 @@ class LayoutLMv2TokenizationTest(TokenizerTesterMixin, unittest.TestCase): break self.assertTrue( find, - ( - f"'{new_special_token_str}' doesn't appear in the list " - f"'{new_tokenizer.all_special_tokens_extended}' as an AddedToken with the same parameters as " - f"'{special_token}' in the list {tokenizer.all_special_tokens_extended}" - ), + f"'{new_special_token_str}' doesn't appear in the list " + f"'{new_tokenizer.all_special_tokens_extended}' as an AddedToken with the same parameters as " + f"'{special_token}' in the list {tokenizer.all_special_tokens_extended}", ) elif special_token not in special_tokens_map: # The special token must appear identically in the list of the new tokenizer. @@ -1923,7 +1921,8 @@ class LayoutLMv2TokenizationTest(TokenizerTesterMixin, unittest.TestCase): self.assertEqual(len(cm.records), 1) self.assertTrue( cm.records[0].message.startswith( - "Token indices sequence length is longer than the specified maximum sequence length for this model" + "Token indices sequence length is longer than the specified maximum sequence length" + " for this model" ) ) @@ -1937,7 +1936,8 @@ class LayoutLMv2TokenizationTest(TokenizerTesterMixin, unittest.TestCase): self.assertEqual(len(cm.records), 1) self.assertTrue( cm.records[0].message.startswith( - "Token indices sequence length is longer than the specified maximum sequence length for this model" + "Token indices sequence length is longer than the specified maximum sequence length" + " for this model" ) ) # Check the order of Sequence of input ids, overflowing tokens and bbox sequence with truncation @@ -2232,7 +2232,8 @@ class LayoutLMv2TokenizationTest(TokenizerTesterMixin, unittest.TestCase): self.assertEqual(len(cm.records), 1) self.assertTrue( cm.records[0].message.startswith( - "Token indices sequence length is longer than the specified maximum sequence length for this model" + "Token indices sequence length is longer than the specified maximum sequence length" + " for this model" ) ) @@ -2244,7 +2245,8 @@ class LayoutLMv2TokenizationTest(TokenizerTesterMixin, unittest.TestCase): self.assertEqual(len(cm.records), 1) self.assertTrue( cm.records[0].message.startswith( - "Token indices sequence length is longer than the specified maximum sequence length for this model" + "Token indices sequence length is longer than the specified maximum sequence length" + " for this model" ) ) # Check the order of Sequence of input ids, overflowing tokens and bbox sequence with truncation diff --git a/tests/models/layoutxlm/test_tokenization_layoutxlm.py b/tests/models/layoutxlm/test_tokenization_layoutxlm.py index 561e87e772..68aba50eca 100644 --- a/tests/models/layoutxlm/test_tokenization_layoutxlm.py +++ b/tests/models/layoutxlm/test_tokenization_layoutxlm.py @@ -1543,11 +1543,9 @@ class LayoutXLMTokenizationTest(TokenizerTesterMixin, unittest.TestCase): break self.assertTrue( find, - ( - f"'{new_special_token_str}' doesn't appear in the list " - f"'{new_tokenizer.all_special_tokens_extended}' as an AddedToken with the same parameters as " - f"'{special_token}' in the list {tokenizer.all_special_tokens_extended}" - ), + f"'{new_special_token_str}' doesn't appear in the list " + f"'{new_tokenizer.all_special_tokens_extended}' as an AddedToken with the same parameters as " + f"'{special_token}' in the list {tokenizer.all_special_tokens_extended}", ) elif special_token not in special_tokens_map: # The special token must appear identically in the list of the new tokenizer. diff --git a/tests/models/led/test_modeling_led.py b/tests/models/led/test_modeling_led.py index 9d3d090ab1..e96f91ecc9 100644 --- a/tests/models/led/test_modeling_led.py +++ b/tests/models/led/test_modeling_led.py @@ -528,9 +528,26 @@ class LEDModelIntegrationTests(unittest.TestCase): no_repeat_ngram_size=3, ) - EXPECTED_LEP = " the physics of @xmath0-boson will again play the central role in the frontier of particle physics if the gigaz option of the international linear collider ( ilc ) can be realized in its first phase. \n the expected sensitivity to the branching ratio of the rare decays, especially its exotic or rare processes, should be investigated comprehensively to evaluate their potential in probing new physics. in this work \n, we extend the previous studies of these decays to some new models and investigate the decays altogether. we are motivated by some recent studies on the singlet extension of the mssm, such as the next - to - minimal supersymmetric standard model ( nmssm ) @xcite and the nearly - minimal - supersymmetry - standard - model(nmssm)@xcite, where a light cp - odd higgs boson with singlet - dominant component may naturally arise from the spontaneous breaking of some approximate global symmetry. # 1#2#3#4#5#6#7#8#9#10#11#12 " + EXPECTED_LEP = ( + " the physics of @xmath0-boson will again play the central role in the frontier of particle physics if the" + " gigaz option of the international linear collider ( ilc ) can be realized in its first phase. \n the" + " expected sensitivity to the branching ratio of the rare decays, especially its exotic or rare processes," + " should be investigated comprehensively to evaluate their potential in probing new physics. in this work" + " \n, we extend the previous studies of these decays to some new models and investigate the decays" + " altogether. we are motivated by some recent studies on the singlet extension of the mssm, such as the" + " next - to - minimal supersymmetric standard model ( nmssm ) @xcite and the nearly - minimal -" + " supersymmetry - standard - model(nmssm)@xcite, where a light cp - odd higgs boson with singlet -" + " dominant component may naturally arise from the spontaneous breaking of some approximate global" + " symmetry. # 1#2#3#4#5#6#7#8#9#10#11#12 " + ) - EXPECTED_MAGNET = " the recent experiment in the surface states of the topological insulator bi@xmath0se @xmath1, however, reported that a large positive magnetoresistance becomes very linear in perpendicular magnetic field even in an opposite situation where the carrier sheet density is high that all electrons occupy more than one landau levels. \n it is striking that this observation is in conflict with abrikosov s model and also with the classical parish - littlewood model. " + EXPECTED_MAGNET = ( + " the recent experiment in the surface states of the topological insulator bi@xmath0se @xmath1, however," + " reported that a large positive magnetoresistance becomes very linear in perpendicular magnetic field" + " even in an opposite situation where the carrier sheet density is high that all electrons occupy more" + " than one landau levels. \n it is striking that this observation is in conflict with abrikosov s model" + " and also with the classical parish - littlewood model. " + ) generated = tok.batch_decode( hypotheses_batch.tolist(), clean_up_tokenization_spaces=True, skip_special_tokens=True diff --git a/tests/models/luke/test_modeling_luke.py b/tests/models/luke/test_modeling_luke.py index 0661748da5..b6c9ef89ff 100644 --- a/tests/models/luke/test_modeling_luke.py +++ b/tests/models/luke/test_modeling_luke.py @@ -624,7 +624,10 @@ class LukeModelIntegrationTests(unittest.TestCase): model.to(torch_device) tokenizer = LukeTokenizer.from_pretrained("studio-ousia/luke-base", task="entity_classification") - text = "Top seed Ana Ivanovic said on Thursday she could hardly believe her luck as a fortuitous netcord helped the new world number one avoid a humiliating second- round exit at Wimbledon ." + text = ( + "Top seed Ana Ivanovic said on Thursday she could hardly believe her luck as a fortuitous netcord helped" + " the new world number one avoid a humiliating second- round exit at Wimbledon ." + ) span = (39, 42) encoding = tokenizer(text, entity_spans=[span], add_prefix_space=True, return_tensors="pt") @@ -656,7 +659,10 @@ class LukeModelIntegrationTests(unittest.TestCase): model.to(torch_device) tokenizer = LukeTokenizer.from_pretrained("studio-ousia/luke-large", task="entity_classification") - text = "Top seed Ana Ivanovic said on Thursday she could hardly believe her luck as a fortuitous netcord helped the new world number one avoid a humiliating second- round exit at Wimbledon ." + text = ( + "Top seed Ana Ivanovic said on Thursday she could hardly believe her luck as a fortuitous netcord helped" + " the new world number one avoid a humiliating second- round exit at Wimbledon ." + ) span = (39, 42) encoding = tokenizer(text, entity_spans=[span], add_prefix_space=True, return_tensors="pt") diff --git a/tests/models/luke/test_tokenization_luke.py b/tests/models/luke/test_tokenization_luke.py index 81dce277a3..aa208f950b 100644 --- a/tests/models/luke/test_tokenization_luke.py +++ b/tests/models/luke/test_tokenization_luke.py @@ -480,7 +480,10 @@ class LukeTokenizerIntegrationTests(unittest.TestCase): def test_entity_classification_no_padding_or_truncation(self): tokenizer = LukeTokenizer.from_pretrained("studio-ousia/luke-base", task="entity_classification") - sentence = "Top seed Ana Ivanovic said on Thursday she could hardly believe her luck as a fortuitous netcord helped the new world number one avoid a humiliating second- round exit at Wimbledon ." + sentence = ( + "Top seed Ana Ivanovic said on Thursday she could hardly believe her luck as a fortuitous netcord helped" + " the new world number one avoid a humiliating second- round exit at Wimbledon ." + ) span = (39, 42) encoding = tokenizer(sentence, entity_spans=[span], return_token_type_ids=True) @@ -491,7 +494,8 @@ class LukeTokenizerIntegrationTests(unittest.TestCase): self.assertEqual(len(encoding["token_type_ids"]), 42) self.assertEqual( tokenizer.decode(encoding["input_ids"], spaces_between_special_tokens=False), - "Top seed Ana Ivanovic said on Thursday she could hardly believe her luck as a fortuitous netcord helped the new world number one avoid a humiliating second- round exit at Wimbledon.", + "Top seed Ana Ivanovic said on Thursday she could hardly believe her luck as a fortuitous" + " netcord helped the new world number one avoid a humiliating second- round exit at Wimbledon.", ) self.assertEqual( tokenizer.decode(encoding["input_ids"][9:12], spaces_between_special_tokens=False), " she" @@ -514,7 +518,10 @@ class LukeTokenizerIntegrationTests(unittest.TestCase): tokenizer = LukeTokenizer.from_pretrained( "studio-ousia/luke-base", task="entity_classification", return_token_type_ids=True ) - sentence = "Top seed Ana Ivanovic said on Thursday she could hardly believe her luck as a fortuitous netcord helped the new world number one avoid a humiliating second- round exit at Wimbledon ." + sentence = ( + "Top seed Ana Ivanovic said on Thursday she could hardly believe her luck as a fortuitous netcord helped" + " the new world number one avoid a humiliating second- round exit at Wimbledon ." + ) # entity information span = (39, 42) diff --git a/tests/models/m2m_100/test_modeling_m2m_100.py b/tests/models/m2m_100/test_modeling_m2m_100.py index 003c5a57cf..7685e98886 100644 --- a/tests/models/m2m_100/test_modeling_m2m_100.py +++ b/tests/models/m2m_100/test_modeling_m2m_100.py @@ -354,7 +354,9 @@ class M2M100ModelIntegrationTests(unittest.TestCase): src_fr = [ "L'affaire NSA souligne l'absence totale de débat sur le renseignement", "Selon moi, il y a deux niveaux de réponse de la part du gouvernement français.", - "Lorsque François Hollande téléphone à Barack Obama ou quand le ministre des affaires étrangères Laurent Fabius convoque l'ambassadeur des Etats-Unis, ils réagissent à une vraie découverte, qui est celle de l'ampleur de la surveillance américaine sur l'ensemble des communications en France.", + "Lorsque François Hollande téléphone à Barack Obama ou quand le ministre des affaires étrangères Laurent" + " Fabius convoque l'ambassadeur des Etats-Unis, ils réagissent à une vraie découverte, qui est celle de" + " l'ampleur de la surveillance américaine sur l'ensemble des communications en France.", ] # The below article tests that we don't add any hypotheses outside of the top n_beams @@ -370,7 +372,9 @@ class M2M100ModelIntegrationTests(unittest.TestCase): expected_en = [ "The NSA case highlights the total absence of intelligence debate", "I think there are two levels of response from the French government.", - "When François Hollande calls Barack Obama or when Foreign Minister Laurent Fabius calls the U.S. Ambassador, they respond to a real discovery, which is that of the scale of U.S. surveillance on all communications in France.", + "When François Hollande calls Barack Obama or when Foreign Minister Laurent Fabius calls the U.S." + " Ambassador, they respond to a real discovery, which is that of the scale of U.S. surveillance on all" + " communications in France.", ] generated = tokenizer.batch_decode( diff --git a/tests/models/mbart/test_modeling_mbart.py b/tests/models/mbart/test_modeling_mbart.py index 3ac2c542da..48b9f57a56 100644 --- a/tests/models/mbart/test_modeling_mbart.py +++ b/tests/models/mbart/test_modeling_mbart.py @@ -348,7 +348,9 @@ class MBartEnroIntegrationTest(AbstractSeq2SeqIntegrationTest): ] tgt_text = [ "Şeful ONU declară că nu există o soluţie militară în Siria", - 'Secretarul General Ban Ki-moon declară că răspunsul său la intensificarea sprijinului militar al Rusiei pentru Siria este că "nu există o soluţie militară" la conflictul de aproape cinci ani şi că noi arme nu vor face decât să înrăutăţească violenţa şi mizeria pentru milioane de oameni.', + "Secretarul General Ban Ki-moon declară că răspunsul său la intensificarea sprijinului militar al Rusiei" + ' pentru Siria este că "nu există o soluţie militară" la conflictul de aproape cinci ani şi că noi arme nu vor' + " face decât să înrăutăţească violenţa şi mizeria pentru milioane de oameni.", ] expected_src_tokens = [8274, 127873, 25916, 7, 8622, 2071, 438, 67485, 53, 187895, 23, 51712, 2, 250004] diff --git a/tests/models/mbart/test_tokenization_mbart.py b/tests/models/mbart/test_tokenization_mbart.py index d24aefb01f..e80531051b 100644 --- a/tests/models/mbart/test_tokenization_mbart.py +++ b/tests/models/mbart/test_tokenization_mbart.py @@ -213,7 +213,9 @@ class MBartEnroIntegrationTest(unittest.TestCase): ] tgt_text = [ "Şeful ONU declară că nu există o soluţie militară în Siria", - 'Secretarul General Ban Ki-moon declară că răspunsul său la intensificarea sprijinului militar al Rusiei pentru Siria este că "nu există o soluţie militară" la conflictul de aproape cinci ani şi că noi arme nu vor face decât să înrăutăţească violenţele şi mizeria pentru milioane de oameni.', + "Secretarul General Ban Ki-moon declară că răspunsul său la intensificarea sprijinului militar al Rusiei" + ' pentru Siria este că "nu există o soluţie militară" la conflictul de aproape cinci ani şi că noi arme nu vor' + " face decât să înrăutăţească violenţele şi mizeria pentru milioane de oameni.", ] expected_src_tokens = [8274, 127873, 25916, 7, 8622, 2071, 438, 67485, 53, 187895, 23, 51712, 2, EN_CODE] diff --git a/tests/models/mbart50/test_tokenization_mbart50.py b/tests/models/mbart50/test_tokenization_mbart50.py index 63adfe8436..5a65d88566 100644 --- a/tests/models/mbart50/test_tokenization_mbart50.py +++ b/tests/models/mbart50/test_tokenization_mbart50.py @@ -203,7 +203,9 @@ class MBart50OneToManyIntegrationTest(unittest.TestCase): ] tgt_text = [ "Şeful ONU declară că nu există o soluţie militară în Siria", - 'Secretarul General Ban Ki-moon declară că răspunsul său la intensificarea sprijinului militar al Rusiei pentru Siria este că "nu există o soluţie militară" la conflictul de aproape cinci ani şi că noi arme nu vor face decât să înrăutăţească violenţele şi mizeria pentru milioane de oameni.', + "Secretarul General Ban Ki-moon declară că răspunsul său la intensificarea sprijinului militar al Rusiei" + ' pentru Siria este că "nu există o soluţie militară" la conflictul de aproape cinci ani şi că noi arme nu vor' + " face decât să înrăutăţească violenţele şi mizeria pentru milioane de oameni.", ] expected_src_tokens = [EN_CODE, 8274, 127873, 25916, 7, 8622, 2071, 438, 67485, 53, 187895, 23, 51712, 2] diff --git a/tests/models/mluke/test_tokenization_mluke.py b/tests/models/mluke/test_tokenization_mluke.py index 66d6699246..681825c7dc 100644 --- a/tests/models/mluke/test_tokenization_mluke.py +++ b/tests/models/mluke/test_tokenization_mluke.py @@ -365,7 +365,8 @@ class MLukeTokenizerIntegrationTests(unittest.TestCase): self.assertEqual( tokenizer.decode(encoding["input_ids"], spaces_between_special_tokens=False), - " ISO 639-3 uses the code fas for the dialects spoken across Iran and アフガニスタン ( Afghanistan ).", + " ISO 639-3 uses the code fas for the dialects spoken across Iran and アフガニスタン ( Afghanistan" + " ).", ) self.assertEqual( tokenizer.decode(encoding["input_ids"][1:5], spaces_between_special_tokens=False), "ISO 639-3" @@ -423,7 +424,8 @@ class MLukeTokenizerIntegrationTests(unittest.TestCase): self.assertEqual( tokenizer.decode(encoding["input_ids"], spaces_between_special_tokens=False), - " ISO 639-3 uses the code fas for the dialects spoken across Iran and アフガニスタン ( Afghanistan ).", + " ISO 639-3 uses the code fas for the dialects spoken across Iran and アフガニスタン ( Afghanistan" + " ).", ) self.assertEqual( tokenizer.decode(encoding["input_ids"][1:5], spaces_between_special_tokens=False), "ISO 639-3" @@ -506,7 +508,8 @@ class MLukeTokenizerIntegrationTests(unittest.TestCase): self.assertEqual(len(encoding["token_type_ids"]), 23) self.assertEqual( tokenizer.decode(encoding["input_ids"], spaces_between_special_tokens=False), - " Japanese is anEast Asian languagespoken by about 128 million people, primarily in Japan.", + " Japanese is anEast Asian languagespoken by about 128 million people, primarily in" + " Japan.", ) self.assertEqual( tokenizer.decode(encoding["input_ids"][4:9], spaces_between_special_tokens=False), @@ -559,7 +562,8 @@ class MLukeTokenizerIntegrationTests(unittest.TestCase): self.assertEqual( tokenizer.decode(encoding["input_ids"], spaces_between_special_tokens=False), - "Japaneseis an East Asian language spoken by about 128 million people, primarily inJapan.", + "Japaneseis an East Asian language spoken by about 128 million people, primarily" + " inJapan.", ) self.assertEqual( tokenizer.decode(encoding["input_ids"][1:4], spaces_between_special_tokens=False), diff --git a/tests/models/mobilebert/test_tokenization_mobilebert.py b/tests/models/mobilebert/test_tokenization_mobilebert.py index a72047ddbc..395f4a2aab 100644 --- a/tests/models/mobilebert/test_tokenization_mobilebert.py +++ b/tests/models/mobilebert/test_tokenization_mobilebert.py @@ -194,7 +194,7 @@ class MobileBERTTokenizationTest(TokenizerTesterMixin, unittest.TestCase): vocab_tokens = ["[UNK]", "[CLS]", "[SEP]", "want", "##want", "##ed", "wa", "un", "runn", "##ing"] vocab = {} - for (i, token) in enumerate(vocab_tokens): + for i, token in enumerate(vocab_tokens): vocab[token] = i tokenizer = WordpieceTokenizer(vocab=vocab, unk_token="[UNK]") diff --git a/tests/models/pegasus/test_modeling_tf_pegasus.py b/tests/models/pegasus/test_modeling_tf_pegasus.py index 594323a7dc..14fcce39a6 100644 --- a/tests/models/pegasus/test_modeling_tf_pegasus.py +++ b/tests/models/pegasus/test_modeling_tf_pegasus.py @@ -339,7 +339,8 @@ class TFPegasusIntegrationTests(unittest.TestCase): """ The London trio are up for best UK act and best album, as well as getting two nominations in the best song category."We got told like this morning 'Oh I think you're nominated'", said Dappy."And I was like 'Oh yeah, which one?' And now we've got nominated for four awards. I mean, wow!"Bandmate Fazer added: "We thought it's best of us to come down and mingle with everyone and say hello to the cameras. And now we find we've got four nominations."The band have two shots at the best song prize, getting the nod for their Tynchy Stryder collaboration Number One, and single Strong Again.Their album Uncle B will also go up against records by the likes of Beyonce and Kanye West.N-Dubz picked up the best newcomer Mobo in 2007, but female member Tulisa said they wouldn't be too disappointed if they didn't win this time around."At the end of the day we're grateful to be where we are in our careers."If it don't happen then it don't happen - live to fight another day and keep on making albums and hits for the fans."Dappy also revealed they could be performing live several times on the night.The group will be doing Number One and also a possible rendition of the War Child single, I Got Soul.The charity song is a re-working of The Killers' All These Things That I've Done and is set to feature artists like Chipmunk, Ironik and Pixie Lott.This year's Mobos will be held outside of London for the first time, in Glasgow on 30 September.N-Dubz said they were looking forward to performing for their Scottish fans and boasted about their recent shows north of the border."We just done Edinburgh the other day," said Dappy."We smashed up an N-Dubz show over there. We done Aberdeen about three or four months ago - we smashed up that show over there! Everywhere we go we smash it up!" """, ] expected_text = [ - "California's largest electricity provider has cut power to hundreds of thousands of customers in an effort to reduce the risk of wildfires.", + "California's largest electricity provider has cut power to hundreds of thousands of customers in an effort to" + " reduce the risk of wildfires.", 'N-Dubz have revealed they\'re "grateful" to have been nominated for four Mobo Awards.', ] # differs slightly from pytorch, likely due to numerical differences in linear layers model_name = "google/pegasus-xsum" diff --git a/tests/models/pegasus/test_tokenization_pegasus.py b/tests/models/pegasus/test_tokenization_pegasus.py index 3f83e84178..d473725f9a 100644 --- a/tests/models/pegasus/test_tokenization_pegasus.py +++ b/tests/models/pegasus/test_tokenization_pegasus.py @@ -72,7 +72,10 @@ class PegasusTokenizationTest(TokenizerTesterMixin, unittest.TestCase): def test_mask_tokens_rust_pegasus(self): rust_tokenizer = self.rust_tokenizer_class.from_pretrained(self.tmpdirname) py_tokenizer = self.tokenizer_class.from_pretrained(self.tmpdirname) - raw_input_str = "Let's see which is the better one It seems like this was important " + raw_input_str = ( + "Let's see which is the better one It seems like this was important" + " " + ) rust_ids = rust_tokenizer([raw_input_str], return_tensors=None, add_special_tokens=False).input_ids[0] py_ids = py_tokenizer([raw_input_str], return_tensors=None, add_special_tokens=False).input_ids[0] self.assertListEqual(py_ids, rust_ids) @@ -158,7 +161,10 @@ class BigBirdPegasusTokenizationTest(TokenizerTesterMixin, unittest.TestCase): def test_mask_tokens_rust_pegasus(self): rust_tokenizer = self.rust_tokenizer_class.from_pretrained(self.tmpdirname) py_tokenizer = self.tokenizer_class.from_pretrained(self.tmpdirname) - raw_input_str = "Let's see which is the better one [MASK] It seems like this [MASK] was important " + raw_input_str = ( + "Let's see which is the better one [MASK] It seems like this [MASK] was important " + " " + ) rust_ids = rust_tokenizer([raw_input_str], return_tensors=None, add_special_tokens=False).input_ids[0] py_ids = py_tokenizer([raw_input_str], return_tensors=None, add_special_tokens=False).input_ids[0] self.assertListEqual(py_ids, rust_ids) @@ -198,7 +204,10 @@ class BigBirdPegasusTokenizationTest(TokenizerTesterMixin, unittest.TestCase): tokenizer.tokenize(test_str) """ - test_str = "This is an example string that is used to test the original TF implementation against the HF implementation" + test_str = ( + "This is an example string that is used to test the original TF implementation against the HF" + " implementation" + ) token_ids = self._large_tokenizer(test_str).input_ids diff --git a/tests/models/perceiver/test_modeling_perceiver.py b/tests/models/perceiver/test_modeling_perceiver.py index 1fc102bc40..5947a73a0e 100644 --- a/tests/models/perceiver/test_modeling_perceiver.py +++ b/tests/models/perceiver/test_modeling_perceiver.py @@ -542,9 +542,12 @@ class PerceiverModelTest(ModelTesterMixin, unittest.TestCase): torch.allclose( set_nan_tensor_to_zero(tuple_object), set_nan_tensor_to_zero(dict_object), atol=1e-5 ), - msg=f"Tuple and dict output are not equal. Difference: {torch.max(torch.abs(tuple_object - dict_object))}. " - f"Tuple has `nan`: {torch.isnan(tuple_object).any()} and `inf`: {torch.isinf(tuple_object)}. " - f"Dict has `nan`: {torch.isnan(dict_object).any()} and `inf`: {torch.isinf(dict_object)}.", + msg=( + "Tuple and dict output are not equal. Difference:" + f" {torch.max(torch.abs(tuple_object - dict_object))}. Tuple has `nan`:" + f" {torch.isnan(tuple_object).any()} and `inf`: {torch.isinf(tuple_object)}. Dict has" + f" `nan`: {torch.isnan(dict_object).any()} and `inf`: {torch.isinf(dict_object)}." + ), ) recursive_check(tuple_output, dict_output) @@ -767,7 +770,10 @@ class PerceiverModelTest(ModelTesterMixin, unittest.TestCase): @require_torch_multi_gpu @unittest.skip( - reason="Perceiver does not work with data parallel (DP) because of a bug in PyTorch: https://github.com/pytorch/pytorch/issues/36035" + reason=( + "Perceiver does not work with data parallel (DP) because of a bug in PyTorch:" + " https://github.com/pytorch/pytorch/issues/36035" + ) ) def test_multi_gpu_data_parallel_forward(self): pass diff --git a/tests/models/prophetnet/test_modeling_prophetnet.py b/tests/models/prophetnet/test_modeling_prophetnet.py index e17e14072a..9ac8ea81e2 100644 --- a/tests/models/prophetnet/test_modeling_prophetnet.py +++ b/tests/models/prophetnet/test_modeling_prophetnet.py @@ -1226,7 +1226,15 @@ class ProphetNetModelIntegrationTest(unittest.TestCase): tokenizer = ProphetNetTokenizer.from_pretrained("microsoft/prophetnet-large-uncased-cnndm") - ARTICLE_TO_SUMMARIZE = "USTC was founded in Beijing by the Chinese Academy of Sciences (CAS) in September 1958. The Director of CAS, Mr. Guo Moruo was appointed the first president of USTC. USTC's founding mission was to develop a high-level science and technology workforce, as deemed critical for development of China's economy, defense, and science and technology education. The establishment was hailed as \"A Major Event in the History of Chinese Education and Science.\" CAS has supported USTC by combining most of its institutes with the departments of the university. USTC is listed in the top 16 national key universities, becoming the youngest national key university.".lower() + ARTICLE_TO_SUMMARIZE = ( + "USTC was founded in Beijing by the Chinese Academy of Sciences (CAS) in September 1958. The Director of" + " CAS, Mr. Guo Moruo was appointed the first president of USTC. USTC's founding mission was to develop a" + " high-level science and technology workforce, as deemed critical for development of China's economy," + ' defense, and science and technology education. The establishment was hailed as "A Major Event in the' + ' History of Chinese Education and Science." CAS has supported USTC by combining most of its institutes' + " with the departments of the university. USTC is listed in the top 16 national key universities, becoming" + " the youngest national key university.".lower() + ) input_ids = tokenizer([ARTICLE_TO_SUMMARIZE], max_length=511, return_tensors="pt").input_ids input_ids = input_ids.to(torch_device) @@ -1234,7 +1242,10 @@ class ProphetNetModelIntegrationTest(unittest.TestCase): summary_ids = model.generate( input_ids, num_beams=4, length_penalty=1.0, no_repeat_ngram_size=3, early_stopping=True ) - EXPECTED_SUMMARIZE_512 = "us ##tc was founded by the chinese academy of sciences ( cas ) in 1958 . [X_SEP] us ##tc is listed in the top 16 national key universities ." + EXPECTED_SUMMARIZE_512 = ( + "us ##tc was founded by the chinese academy of sciences ( cas ) in 1958 . [X_SEP] us ##tc is listed in the" + " top 16 national key universities ." + ) generated_titles = [ " ".join(tokenizer.convert_ids_to_tokens(g, skip_special_tokens=True)) for g in summary_ids ] @@ -1251,7 +1262,8 @@ class ProphetNetModelIntegrationTest(unittest.TestCase): EXPECTED_SUMMARIZE_100 = ( r"us ##tc was founded in beijing by the chinese academy of sciences ( cas ) in 1958 . [X_SEP] us ##tc " "'" - ' s founding mission was to develop a high - level science and technology workforce . [X_SEP] establishment hailed as " a major event in the history of chinese education and science "' + " s founding mission was to develop a high - level science and technology workforce . [X_SEP]" + ' establishment hailed as " a major event in the history of chinese education and science "' ) generated_titles = [ " ".join(tokenizer.convert_ids_to_tokens(g, skip_special_tokens=True)) for g in summary_ids diff --git a/tests/models/prophetnet/test_tokenization_prophetnet.py b/tests/models/prophetnet/test_tokenization_prophetnet.py index 5b44879d04..8d95eb3100 100644 --- a/tests/models/prophetnet/test_tokenization_prophetnet.py +++ b/tests/models/prophetnet/test_tokenization_prophetnet.py @@ -141,7 +141,7 @@ class ProphetNetTokenizationTest(TokenizerTesterMixin, unittest.TestCase): vocab_tokens = ["[UNK]", "[CLS]", "[SEP]", "want", "##want", "##ed", "wa", "un", "runn", "##ing"] vocab = {} - for (i, token) in enumerate(vocab_tokens): + for i, token in enumerate(vocab_tokens): vocab[token] = i tokenizer = WordpieceTokenizer(vocab=vocab, unk_token="[UNK]") diff --git a/tests/models/realm/test_tokenization_realm.py b/tests/models/realm/test_tokenization_realm.py index a54da28980..2a065ceee6 100644 --- a/tests/models/realm/test_tokenization_realm.py +++ b/tests/models/realm/test_tokenization_realm.py @@ -186,7 +186,7 @@ class RealmTokenizationTest(TokenizerTesterMixin, unittest.TestCase): vocab_tokens = ["[UNK]", "[CLS]", "[SEP]", "want", "##want", "##ed", "wa", "un", "runn", "##ing"] vocab = {} - for (i, token) in enumerate(vocab_tokens): + for i, token in enumerate(vocab_tokens): vocab[token] = i tokenizer = WordpieceTokenizer(vocab=vocab, unk_token="[UNK]") diff --git a/tests/models/reformer/test_modeling_reformer.py b/tests/models/reformer/test_modeling_reformer.py index 1929867521..0e5a801e7e 100644 --- a/tests/models/reformer/test_modeling_reformer.py +++ b/tests/models/reformer/test_modeling_reformer.py @@ -574,7 +574,10 @@ class ReformerTesterMixin: @require_torch_multi_gpu @unittest.skip( - reason="Reformer does not work with data parallel (DP) because of a bug in PyTorch: https://github.com/pytorch/pytorch/issues/36035" + reason=( + "Reformer does not work with data parallel (DP) because of a bug in PyTorch:" + " https://github.com/pytorch/pytorch/issues/36035" + ) ) def test_multi_gpu_data_parallel_forward(self): pass diff --git a/tests/models/reformer/test_tokenization_reformer.py b/tests/models/reformer/test_tokenization_reformer.py index 32f946c497..37ea66847f 100644 --- a/tests/models/reformer/test_tokenization_reformer.py +++ b/tests/models/reformer/test_tokenization_reformer.py @@ -214,7 +214,10 @@ class ReformerTokenizationTest(TokenizerTesterMixin, unittest.TestCase): @slow def test_tokenization_base_hard_symbols(self): - symbols = 'This is a very long text with a lot of weird characters, such as: . , ~ ? ( ) " [ ] ! : - . Also we will add words that should not exsist and be tokenized to , such as saoneuhaoesuth' + symbols = ( + 'This is a very long text with a lot of weird characters, such as: . , ~ ? ( ) " [ ] ! : - . Also we will' + " add words that should not exsist and be tokenized to , such as saoneuhaoesuth" + ) original_tokenizer_encodings = [ 108, 265, diff --git a/tests/models/retribert/test_tokenization_retribert.py b/tests/models/retribert/test_tokenization_retribert.py index cf1d6195c5..e6511bdbb7 100644 --- a/tests/models/retribert/test_tokenization_retribert.py +++ b/tests/models/retribert/test_tokenization_retribert.py @@ -189,7 +189,7 @@ class RetriBertTokenizationTest(TokenizerTesterMixin, unittest.TestCase): vocab_tokens = ["[UNK]", "[CLS]", "[SEP]", "want", "##want", "##ed", "wa", "un", "runn", "##ing"] vocab = {} - for (i, token) in enumerate(vocab_tokens): + for i, token in enumerate(vocab_tokens): vocab[token] = i tokenizer = WordpieceTokenizer(vocab=vocab, unk_token="[UNK]") diff --git a/tests/models/speech_to_text/test_modeling_speech_to_text.py b/tests/models/speech_to_text/test_modeling_speech_to_text.py index 08b94b6465..35fa96f1c7 100644 --- a/tests/models/speech_to_text/test_modeling_speech_to_text.py +++ b/tests/models/speech_to_text/test_modeling_speech_to_text.py @@ -770,8 +770,10 @@ class Speech2TextModelIntegrationTests(unittest.TestCase): EXPECTED_TRANSCRIPTIONS = [ "mister quilter is the apostle of the middle classes and we are glad to welcome his gospel", "nor is mister cultar's manner less interesting than his matter", - "he tells us that at this festive season of the year with christmas and roast beef looming before us similes drawn from eating and its results occur most readily to the mind", - "he has grave doubts whether sir frederick leyton's work is really greek after all and can discover in it but little of rocky ithaca", + "he tells us that at this festive season of the year with christmas and roast beef looming before us" + " similes drawn from eating and its results occur most readily to the mind", + "he has grave doubts whether sir frederick leyton's work is really greek after all and can discover in it" + " but little of rocky ithaca", ] self.assertListEqual(generated_transcripts, EXPECTED_TRANSCRIPTIONS) diff --git a/tests/models/speech_to_text/test_modeling_tf_speech_to_text.py b/tests/models/speech_to_text/test_modeling_tf_speech_to_text.py index 6485690645..613af6be0c 100644 --- a/tests/models/speech_to_text/test_modeling_tf_speech_to_text.py +++ b/tests/models/speech_to_text/test_modeling_tf_speech_to_text.py @@ -602,7 +602,9 @@ class TFSpeech2TextModelIntegrationTests(unittest.TestCase): EXPECTED_TRANSCRIPTIONS = [ "mister quilter is the apostle of the middle classes and we are glad to welcome his gospel", "nor is mister cultar's manner less interesting than his matter", - "he tells us that at this festive season of the year with christmas and roast beef looming before us similes drawn from eating and its results occur most readily to the mind", - "he has grave doubts whether sir frederick leyton's work is really greek after all and can discover in it but little of rocky ithaca", + "he tells us that at this festive season of the year with christmas and roast beef looming before us" + " similes drawn from eating and its results occur most readily to the mind", + "he has grave doubts whether sir frederick leyton's work is really greek after all and can discover in it" + " but little of rocky ithaca", ] self.assertListEqual(generated_transcripts, EXPECTED_TRANSCRIPTIONS) diff --git a/tests/models/t5/test_modeling_flax_t5.py b/tests/models/t5/test_modeling_flax_t5.py index 7971bb4116..f3b2c166ed 100644 --- a/tests/models/t5/test_modeling_flax_t5.py +++ b/tests/models/t5/test_modeling_flax_t5.py @@ -573,16 +573,208 @@ class FlaxT5ModelIntegrationTests(unittest.TestCase): model = FlaxT5ForConditionalGeneration.from_pretrained("t5-base") tok = T5Tokenizer.from_pretrained("t5-base") - FRANCE_ARTICLE = 'Marseille, France (CNN)The French prosecutor leading an investigation into the crash of Germanwings Flight 9525 insisted Wednesday that he was not aware of any video footage from on board the plane. Marseille prosecutor Brice Robin told CNN that "so far no videos were used in the crash investigation." He added, "A person who has such a video needs to immediately give it to the investigators." Robin\'s comments follow claims by two magazines, German daily Bild and French Paris Match, of a cell phone video showing the harrowing final seconds from on board Germanwings Flight 9525 as it crashed into the French Alps. All 150 on board were killed. Paris Match and Bild reported that the video was recovered from a phone at the wreckage site. The two publications described the supposed video, but did not post it on their websites. The publications said that they watched the video, which was found by a source close to the investigation. "One can hear cries of \'My God\' in several languages," Paris Match reported. "Metallic banging can also be heard more than three times, perhaps of the pilot trying to open the cockpit door with a heavy object. Towards the end, after a heavy shake, stronger than the others, the screaming intensifies. Then nothing." "It is a very disturbing scene," said Julian Reichelt, editor-in-chief of Bild online. An official with France\'s accident investigation agency, the BEA, said the agency is not aware of any such video. Lt. Col. Jean-Marc Menichini, a French Gendarmerie spokesman in charge of communications on rescue efforts around the Germanwings crash site, told CNN that the reports were "completely wrong" and "unwarranted." Cell phones have been collected at the site, he said, but that they "hadn\'t been exploited yet." Menichini said he believed the cell phones would need to be sent to the Criminal Research Institute in Rosny sous-Bois, near Paris, in order to be analyzed by specialized technicians working hand-in-hand with investigators. But none of the cell phones found so far have been sent to the institute, Menichini said. Asked whether staff involved in the search could have leaked a memory card to the media, Menichini answered with a categorical "no." Reichelt told "Erin Burnett: Outfront" that he had watched the video and stood by the report, saying Bild and Paris Match are "very confident" that the clip is real. He noted that investigators only revealed they\'d recovered cell phones from the crash site after Bild and Paris Match published their reports. "That is something we did not know before. ... Overall we can say many things of the investigation weren\'t revealed by the investigation at the beginning," he said. What was mental state of Germanwings co-pilot? German airline Lufthansa confirmed Tuesday that co-pilot Andreas Lubitz had battled depression years before he took the controls of Germanwings Flight 9525, which he\'s accused of deliberately crashing last week in the French Alps. Lubitz told his Lufthansa flight training school in 2009 that he had a "previous episode of severe depression," the airline said Tuesday. Email correspondence between Lubitz and the school discovered in an internal investigation, Lufthansa said, included medical documents he submitted in connection with resuming his flight training. The announcement indicates that Lufthansa, the parent company of Germanwings, knew of Lubitz\'s battle with depression, allowed him to continue training and ultimately put him in the cockpit. Lufthansa, whose CEO Carsten Spohr previously said Lubitz was 100% fit to fly, described its statement Tuesday as a "swift and seamless clarification" and said it was sharing the information and documents -- including training and medical records -- with public prosecutors. Spohr traveled to the crash site Wednesday, where recovery teams have been working for the past week to recover human remains and plane debris scattered across a steep mountainside. He saw the crisis center set up in Seyne-les-Alpes, laid a wreath in the village of Le Vernet, closer to the crash site, where grieving families have left flowers at a simple stone memorial. Menichini told CNN late Tuesday that no visible human remains were left at the site but recovery teams would keep searching. French President Francois Hollande, speaking Tuesday, said that it should be possible to identify all the victims using DNA analysis by the end of the week, sooner than authorities had previously suggested. In the meantime, the recovery of the victims\' personal belongings will start Wednesday, Menichini said. Among those personal belongings could be more cell phones belonging to the 144 passengers and six crew on board. Check out the latest from our correspondents . The details about Lubitz\'s correspondence with the flight school during his training were among several developments as investigators continued to delve into what caused the crash and Lubitz\'s possible motive for downing the jet. A Lufthansa spokesperson told CNN on Tuesday that Lubitz had a valid medical certificate, had passed all his examinations and "held all the licenses required." Earlier, a spokesman for the prosecutor\'s office in Dusseldorf, Christoph Kumpa, said medical records reveal Lubitz suffered from suicidal tendencies at some point before his aviation career and underwent psychotherapy before he got his pilot\'s license. Kumpa emphasized there\'s no evidence suggesting Lubitz was suicidal or acting aggressively before the crash. Investigators are looking into whether Lubitz feared his medical condition would cause him to lose his pilot\'s license, a European government official briefed on the investigation told CNN on Tuesday. While flying was "a big part of his life," the source said, it\'s only one theory being considered. Another source, a law enforcement official briefed on the investigation, also told CNN that authorities believe the primary motive for Lubitz to bring down the plane was that he feared he would not be allowed to fly because of his medical problems. Lubitz\'s girlfriend told investigators he had seen an eye doctor and a neuropsychologist, both of whom deemed him unfit to work recently and concluded he had psychological issues, the European government official said. But no matter what details emerge about his previous mental health struggles, there\'s more to the story, said Brian Russell, a forensic psychologist. "Psychology can explain why somebody would turn rage inward on themselves about the fact that maybe they weren\'t going to keep doing their job and they\'re upset about that and so they\'re suicidal," he said. "But there is no mental illness that explains why somebody then feels entitled to also take that rage and turn it outward on 149 other people who had nothing to do with the person\'s problems." Germanwings crash compensation: What we know . Who was the captain of Germanwings Flight 9525? CNN\'s Margot Haddad reported from Marseille and Pamela Brown from Dusseldorf, while Laura Smith-Spark wrote from London. CNN\'s Frederik Pleitgen, Pamela Boykoff, Antonia Mortensen, Sandrine Amiel and Anna-Maja Rappard contributed to this report.' # @noqa - SHORTER_ARTICLE = '(CNN)The Palestinian Authority officially became the 123rd member of the International Criminal Court on Wednesday, a step that gives the court jurisdiction over alleged crimes in Palestinian territories. The formal accession was marked with a ceremony at The Hague, in the Netherlands, where the court is based. The Palestinians signed the ICC\'s founding Rome Statute in January, when they also accepted its jurisdiction over alleged crimes committed "in the occupied Palestinian territory, including East Jerusalem, since June 13, 2014." Later that month, the ICC opened a preliminary examination into the situation in Palestinian territories, paving the way for possible war crimes investigations against Israelis. As members of the court, Palestinians may be subject to counter-charges as well. Israel and the United States, neither of which is an ICC member, opposed the Palestinians\' efforts to join the body. But Palestinian Foreign Minister Riad al-Malki, speaking at Wednesday\'s ceremony, said it was a move toward greater justice. "As Palestine formally becomes a State Party to the Rome Statute today, the world is also a step closer to ending a long era of impunity and injustice," he said, according to an ICC news release. "Indeed, today brings us closer to our shared goals of justice and peace." Judge Kuniko Ozaki, a vice president of the ICC, said acceding to the treaty was just the first step for the Palestinians. "As the Rome Statute today enters into force for the State of Palestine, Palestine acquires all the rights as well as responsibilities that come with being a State Party to the Statute. These are substantive commitments, which cannot be taken lightly," she said. Rights group Human Rights Watch welcomed the development. "Governments seeking to penalize Palestine for joining the ICC should immediately end their pressure, and countries that support universal acceptance of the court\'s treaty should speak out to welcome its membership," said Balkees Jarrah, international justice counsel for the group. "What\'s objectionable is the attempts to undermine international justice, not Palestine\'s decision to join a treaty to which over 100 countries around the world are members." In January, when the preliminary ICC examination was opened, Israeli Prime Minister Benjamin Netanyahu described it as an outrage, saying the court was overstepping its boundaries. The United States also said it "strongly" disagreed with the court\'s decision. "As we have said repeatedly, we do not believe that Palestine is a state and therefore we do not believe that it is eligible to join the ICC," the State Department said in a statement. It urged the warring sides to resolve their differences through direct negotiations. "We will continue to oppose actions against Israel at the ICC as counterproductive to the cause of peace," it said. But the ICC begs to differ with the definition of a state for its purposes and refers to the territories as "Palestine." While a preliminary examination is not a formal investigation, it allows the court to review evidence and determine whether to investigate suspects on both sides. Prosecutor Fatou Bensouda said her office would "conduct its analysis in full independence and impartiality." The war between Israel and Hamas militants in Gaza last summer left more than 2,000 people dead. The inquiry will include alleged war crimes committed since June. The International Criminal Court was set up in 2002 to prosecute genocide, crimes against humanity and war crimes. CNN\'s Vasco Cotovio, Kareem Khadder and Faith Karimi contributed to this report.' - IRAN_ARTICLE = "(CNN)The United States and its negotiating partners reached a very strong framework agreement with Iran in Lausanne, Switzerland, on Thursday that limits Iran's nuclear program in such a way as to effectively block it from building a nuclear weapon. Expect pushback anyway, if the recent past is any harbinger. Just last month, in an attempt to head off such an agreement, House Speaker John Boehner invited Israeli Prime Minister Benjamin Netanyahu to preemptively blast it before Congress, and 47 senators sent a letter to the Iranian leadership warning them away from a deal. The debate that has already begun since the announcement of the new framework will likely result in more heat than light. It will not be helped by the gathering swirl of dubious assumptions and doubtful assertions. Let us address some of these: . The most misleading assertion, despite universal rejection by experts, is that the negotiations' objective at the outset was the total elimination of any nuclear program in Iran. That is the position of Netanyahu and his acolytes in the U.S. Congress. But that is not and never was the objective. If it had been, there would have been no Iranian team at the negotiating table. Rather, the objective has always been to structure an agreement or series of agreements so that Iran could not covertly develop a nuclear arsenal before the United States and its allies could respond. The new framework has exceeded expectations in achieving that goal. It would reduce Iran's low-enriched uranium stockpile, cut by two-thirds its number of installed centrifuges and implement a rigorous inspection regime. Another dubious assumption of opponents is that the Iranian nuclear program is a covert weapons program. Despite sharp accusations by some in the United States and its allies, Iran denies having such a program, and U.S. intelligence contends that Iran has not yet made the decision to build a nuclear weapon. Iran's continued cooperation with International Atomic Energy Agency inspections is further evidence on this point, and we'll know even more about Iran's program in the coming months and years because of the deal. In fact, the inspections provisions that are part of this agreement are designed to protect against any covert action by the Iranians. What's more, the rhetoric of some members of Congress has implied that the negotiations have been between only the United States and Iran (i.e., the 47 senators' letter warning that a deal might be killed by Congress or a future president). This of course is not the case. The talks were between Iran and the five permanent members of the U.N. Security Council (United States, United Kingdom, France, China and Russia) plus Germany, dubbed the P5+1. While the United States has played a leading role in the effort, it negotiated the terms alongside its partners. If the agreement reached by the P5+1 is rejected by Congress, it could result in an unraveling of the sanctions on Iran and threaten NATO cohesion in other areas. Another questionable assertion is that this agreement contains a sunset clause, after which Iran will be free to do as it pleases. Again, this is not the case. Some of the restrictions on Iran's nuclear activities, such as uranium enrichment, will be eased or eliminated over time, as long as 15 years. But most importantly, the framework agreement includes Iran's ratification of the Additional Protocol, which allows IAEA inspectors expanded access to nuclear sites both declared and nondeclared. This provision will be permanent. It does not sunset. Thus, going forward, if Iran decides to enrich uranium to weapons-grade levels, monitors will be able to detect such a move in a matter of days and alert the U.N. Security Council. Many in Congress have said that the agreement should be a formal treaty requiring the Senate to \"advise and consent.\" But the issue is not suited for a treaty. Treaties impose equivalent obligations on all signatories. For example, the New START treaty limits Russia and the United States to 1,550 deployed strategic warheads. But any agreement with Iran will not be so balanced. The restrictions and obligations in the final framework agreement will be imposed almost exclusively on Iran. The P5+1 are obligated only to ease and eventually remove most but not all economic sanctions, which were imposed as leverage to gain this final deal. Finally some insist that any agreement must address Iranian missile programs, human rights violations or support for Hamas or Hezbollah. As important as these issues are, and they must indeed be addressed, they are unrelated to the most important aim of a nuclear deal: preventing a nuclear Iran. To include them in the negotiations would be a poison pill. This agreement should be judged on its merits and on how it affects the security of our negotiating partners and allies, including Israel. Those judgments should be fact-based, not based on questionable assertions or dubious assumptions." - ARTICLE_SUBWAY = 'New York (CNN)When Liana Barrientos was 23 years old, she got married in Westchester County, New York. A year later, she got married again in Westchester County, but to a different man and without divorcing her first husband. Only 18 days after that marriage, she got hitched yet again. Then, Barrientos declared "I do" five more times, sometimes only within two weeks of each other. In 2010, she married once more, this time in the Bronx. In an application for a marriage license, she stated it was her "first and only" marriage. Barrientos, now 39, is facing two criminal counts of "offering a false instrument for filing in the first degree," referring to her false statements on the 2010 marriage license application, according to court documents. Prosecutors said the marriages were part of an immigration scam. On Friday, she pleaded not guilty at State Supreme Court in the Bronx, according to her attorney, Christopher Wright, who declined to comment further. After leaving court, Barrientos was arrested and charged with theft of service and criminal trespass for allegedly sneaking into the New York subway through an emergency exit, said Detective Annette Markowski, a police spokeswoman. In total, Barrientos has been married 10 times, with nine of her marriages occurring between 1999 and 2002. All occurred either in Westchester County, Long Island, New Jersey or the Bronx. She is believed to still be married to four men, and at one time, she was married to eight men at once, prosecutors say. Prosecutors said the immigration scam involved some of her husbands, who filed for permanent residence status shortly after the marriages. Any divorces happened only after such filings were approved. It was unclear whether any of the men will be prosecuted. The case was referred to the Bronx District Attorney\'s Office by Immigration and Customs Enforcement and the Department of Homeland Security\'s Investigation Division. Seven of the men are from so-called "red-flagged" countries, including Egypt, Turkey, Georgia, Pakistan and Mali. Her eighth husband, Rashid Rajput, was deported in 2006 to his native Pakistan after an investigation by the Joint Terrorism Task Force. If convicted, Barrientos faces up to four years in prison. Her next court appearance is scheduled for May 18.' + FRANCE_ARTICLE = ( # @noqa + "Marseille, France (CNN)The French prosecutor leading an investigation into the crash of Germanwings" + " Flight 9525 insisted Wednesday that he was not aware of any video footage from on board the plane." + ' Marseille prosecutor Brice Robin told CNN that "so far no videos were used in the crash investigation."' + ' He added, "A person who has such a video needs to immediately give it to the investigators." Robin\'s' + " comments follow claims by two magazines, German daily Bild and French Paris Match, of a cell phone video" + " showing the harrowing final seconds from on board Germanwings Flight 9525 as it crashed into the French" + " Alps. All 150 on board were killed. Paris Match and Bild reported that the video was recovered from a" + " phone at the wreckage site. The two publications described the supposed video, but did not post it on" + " their websites. The publications said that they watched the video, which was found by a source close to" + " the investigation. \"One can hear cries of 'My God' in several languages,\" Paris Match reported." + ' "Metallic banging can also be heard more than three times, perhaps of the pilot trying to open the' + " cockpit door with a heavy object. Towards the end, after a heavy shake, stronger than the others, the" + ' screaming intensifies. Then nothing." "It is a very disturbing scene," said Julian Reichelt,' + " editor-in-chief of Bild online. An official with France's accident investigation agency, the BEA, said" + " the agency is not aware of any such video. Lt. Col. Jean-Marc Menichini, a French Gendarmerie spokesman" + " in charge of communications on rescue efforts around the Germanwings crash site, told CNN that the" + ' reports were "completely wrong" and "unwarranted." Cell phones have been collected at the site, he said,' + ' but that they "hadn\'t been exploited yet." Menichini said he believed the cell phones would need to be' + " sent to the Criminal Research Institute in Rosny sous-Bois, near Paris, in order to be analyzed by" + " specialized technicians working hand-in-hand with investigators. But none of the cell phones found so" + " far have been sent to the institute, Menichini said. Asked whether staff involved in the search could" + ' have leaked a memory card to the media, Menichini answered with a categorical "no." Reichelt told "Erin' + ' Burnett: Outfront" that he had watched the video and stood by the report, saying Bild and Paris Match' + ' are "very confident" that the clip is real. He noted that investigators only revealed they\'d recovered' + ' cell phones from the crash site after Bild and Paris Match published their reports. "That is something' + " we did not know before. ... Overall we can say many things of the investigation weren't revealed by the" + ' investigation at the beginning," he said. What was mental state of Germanwings co-pilot? German airline' + " Lufthansa confirmed Tuesday that co-pilot Andreas Lubitz had battled depression years before he took the" + " controls of Germanwings Flight 9525, which he's accused of deliberately crashing last week in the" + ' French Alps. Lubitz told his Lufthansa flight training school in 2009 that he had a "previous episode of' + ' severe depression," the airline said Tuesday. Email correspondence between Lubitz and the school' + " discovered in an internal investigation, Lufthansa said, included medical documents he submitted in" + " connection with resuming his flight training. The announcement indicates that Lufthansa, the parent" + " company of Germanwings, knew of Lubitz's battle with depression, allowed him to continue training and" + " ultimately put him in the cockpit. Lufthansa, whose CEO Carsten Spohr previously said Lubitz was 100%" + ' fit to fly, described its statement Tuesday as a "swift and seamless clarification" and said it was' + " sharing the information and documents -- including training and medical records -- with public" + " prosecutors. Spohr traveled to the crash site Wednesday, where recovery teams have been working for the" + " past week to recover human remains and plane debris scattered across a steep mountainside. He saw the" + " crisis center set up in Seyne-les-Alpes, laid a wreath in the village of Le Vernet, closer to the crash" + " site, where grieving families have left flowers at a simple stone memorial. Menichini told CNN late" + " Tuesday that no visible human remains were left at the site but recovery teams would keep searching." + " French President Francois Hollande, speaking Tuesday, said that it should be possible to identify all" + " the victims using DNA analysis by the end of the week, sooner than authorities had previously suggested." + " In the meantime, the recovery of the victims' personal belongings will start Wednesday, Menichini said." + " Among those personal belongings could be more cell phones belonging to the 144 passengers and six crew" + " on board. Check out the latest from our correspondents . The details about Lubitz's correspondence with" + " the flight school during his training were among several developments as investigators continued to" + " delve into what caused the crash and Lubitz's possible motive for downing the jet. A Lufthansa" + " spokesperson told CNN on Tuesday that Lubitz had a valid medical certificate, had passed all his" + ' examinations and "held all the licenses required." Earlier, a spokesman for the prosecutor\'s office in' + " Dusseldorf, Christoph Kumpa, said medical records reveal Lubitz suffered from suicidal tendencies at" + " some point before his aviation career and underwent psychotherapy before he got his pilot's license." + " Kumpa emphasized there's no evidence suggesting Lubitz was suicidal or acting aggressively before the" + " crash. Investigators are looking into whether Lubitz feared his medical condition would cause him to" + " lose his pilot's license, a European government official briefed on the investigation told CNN on" + ' Tuesday. While flying was "a big part of his life," the source said, it\'s only one theory being' + " considered. Another source, a law enforcement official briefed on the investigation, also told CNN that" + " authorities believe the primary motive for Lubitz to bring down the plane was that he feared he would" + " not be allowed to fly because of his medical problems. Lubitz's girlfriend told investigators he had" + " seen an eye doctor and a neuropsychologist, both of whom deemed him unfit to work recently and concluded" + " he had psychological issues, the European government official said. But no matter what details emerge" + " about his previous mental health struggles, there's more to the story, said Brian Russell, a forensic" + ' psychologist. "Psychology can explain why somebody would turn rage inward on themselves about the fact' + " that maybe they weren't going to keep doing their job and they're upset about that and so they're" + ' suicidal," he said. "But there is no mental illness that explains why somebody then feels entitled to' + " also take that rage and turn it outward on 149 other people who had nothing to do with the person's" + ' problems." Germanwings crash compensation: What we know . Who was the captain of Germanwings Flight' + " 9525? CNN's Margot Haddad reported from Marseille and Pamela Brown from Dusseldorf, while Laura" + " Smith-Spark wrote from London. CNN's Frederik Pleitgen, Pamela Boykoff, Antonia Mortensen, Sandrine" + " Amiel and Anna-Maja Rappard contributed to this report." + ) + SHORTER_ARTICLE = ( + "(CNN)The Palestinian Authority officially became the 123rd member of the International Criminal Court on" + " Wednesday, a step that gives the court jurisdiction over alleged crimes in Palestinian territories. The" + " formal accession was marked with a ceremony at The Hague, in the Netherlands, where the court is based." + " The Palestinians signed the ICC's founding Rome Statute in January, when they also accepted its" + ' jurisdiction over alleged crimes committed "in the occupied Palestinian territory, including East' + ' Jerusalem, since June 13, 2014." Later that month, the ICC opened a preliminary examination into the' + " situation in Palestinian territories, paving the way for possible war crimes investigations against" + " Israelis. As members of the court, Palestinians may be subject to counter-charges as well. Israel and" + " the United States, neither of which is an ICC member, opposed the Palestinians' efforts to join the" + " body. But Palestinian Foreign Minister Riad al-Malki, speaking at Wednesday's ceremony, said it was a" + ' move toward greater justice. "As Palestine formally becomes a State Party to the Rome Statute today, the' + ' world is also a step closer to ending a long era of impunity and injustice," he said, according to an' + ' ICC news release. "Indeed, today brings us closer to our shared goals of justice and peace." Judge' + " Kuniko Ozaki, a vice president of the ICC, said acceding to the treaty was just the first step for the" + ' Palestinians. "As the Rome Statute today enters into force for the State of Palestine, Palestine' + " acquires all the rights as well as responsibilities that come with being a State Party to the Statute." + ' These are substantive commitments, which cannot be taken lightly," she said. Rights group Human Rights' + ' Watch welcomed the development. "Governments seeking to penalize Palestine for joining the ICC should' + " immediately end their pressure, and countries that support universal acceptance of the court's treaty" + ' should speak out to welcome its membership," said Balkees Jarrah, international justice counsel for the' + " group. \"What's objectionable is the attempts to undermine international justice, not Palestine's" + ' decision to join a treaty to which over 100 countries around the world are members." In January, when' + " the preliminary ICC examination was opened, Israeli Prime Minister Benjamin Netanyahu described it as an" + ' outrage, saying the court was overstepping its boundaries. The United States also said it "strongly"' + " disagreed with the court's decision. \"As we have said repeatedly, we do not believe that Palestine is a" + ' state and therefore we do not believe that it is eligible to join the ICC," the State Department said in' + ' a statement. It urged the warring sides to resolve their differences through direct negotiations. "We' + ' will continue to oppose actions against Israel at the ICC as counterproductive to the cause of peace,"' + " it said. But the ICC begs to differ with the definition of a state for its purposes and refers to the" + ' territories as "Palestine." While a preliminary examination is not a formal investigation, it allows the' + " court to review evidence and determine whether to investigate suspects on both sides. Prosecutor Fatou" + ' Bensouda said her office would "conduct its analysis in full independence and impartiality." The war' + " between Israel and Hamas militants in Gaza last summer left more than 2,000 people dead. The inquiry" + " will include alleged war crimes committed since June. The International Criminal Court was set up in" + " 2002 to prosecute genocide, crimes against humanity and war crimes. CNN's Vasco Cotovio, Kareem Khadder" + " and Faith Karimi contributed to this report." + ) + IRAN_ARTICLE = ( + "(CNN)The United States and its negotiating partners reached a very strong framework agreement with Iran" + " in Lausanne, Switzerland, on Thursday that limits Iran's nuclear program in such a way as to effectively" + " block it from building a nuclear weapon. Expect pushback anyway, if the recent past is any harbinger." + " Just last month, in an attempt to head off such an agreement, House Speaker John Boehner invited Israeli" + " Prime Minister Benjamin Netanyahu to preemptively blast it before Congress, and 47 senators sent a" + " letter to the Iranian leadership warning them away from a deal. The debate that has already begun since" + " the announcement of the new framework will likely result in more heat than light. It will not be helped" + " by the gathering swirl of dubious assumptions and doubtful assertions. Let us address some of these: ." + " The most misleading assertion, despite universal rejection by experts, is that the negotiations'" + " objective at the outset was the total elimination of any nuclear program in Iran. That is the position" + " of Netanyahu and his acolytes in the U.S. Congress. But that is not and never was the objective. If it" + " had been, there would have been no Iranian team at the negotiating table. Rather, the objective has" + " always been to structure an agreement or series of agreements so that Iran could not covertly develop a" + " nuclear arsenal before the United States and its allies could respond. The new framework has exceeded" + " expectations in achieving that goal. It would reduce Iran's low-enriched uranium stockpile, cut by" + " two-thirds its number of installed centrifuges and implement a rigorous inspection regime. Another" + " dubious assumption of opponents is that the Iranian nuclear program is a covert weapons program. Despite" + " sharp accusations by some in the United States and its allies, Iran denies having such a program, and" + " U.S. intelligence contends that Iran has not yet made the decision to build a nuclear weapon. Iran's" + " continued cooperation with International Atomic Energy Agency inspections is further evidence on this" + " point, and we'll know even more about Iran's program in the coming months and years because of the deal." + " In fact, the inspections provisions that are part of this agreement are designed to protect against any" + " covert action by the Iranians. What's more, the rhetoric of some members of Congress has implied that" + " the negotiations have been between only the United States and Iran (i.e., the 47 senators' letter" + " warning that a deal might be killed by Congress or a future president). This of course is not the case." + " The talks were between Iran and the five permanent members of the U.N. Security Council (United States," + " United Kingdom, France, China and Russia) plus Germany, dubbed the P5+1. While the United States has" + " played a leading role in the effort, it negotiated the terms alongside its partners. If the agreement" + " reached by the P5+1 is rejected by Congress, it could result in an unraveling of the sanctions on Iran" + " and threaten NATO cohesion in other areas. Another questionable assertion is that this agreement" + " contains a sunset clause, after which Iran will be free to do as it pleases. Again, this is not the" + " case. Some of the restrictions on Iran's nuclear activities, such as uranium enrichment, will be eased" + " or eliminated over time, as long as 15 years. But most importantly, the framework agreement includes" + " Iran's ratification of the Additional Protocol, which allows IAEA inspectors expanded access to nuclear" + " sites both declared and nondeclared. This provision will be permanent. It does not sunset. Thus, going" + " forward, if Iran decides to enrich uranium to weapons-grade levels, monitors will be able to detect such" + " a move in a matter of days and alert the U.N. Security Council. Many in Congress have said that the" + ' agreement should be a formal treaty requiring the Senate to "advise and consent." But the issue is not' + " suited for a treaty. Treaties impose equivalent obligations on all signatories. For example, the New" + " START treaty limits Russia and the United States to 1,550 deployed strategic warheads. But any agreement" + " with Iran will not be so balanced. The restrictions and obligations in the final framework agreement" + " will be imposed almost exclusively on Iran. The P5+1 are obligated only to ease and eventually remove" + " most but not all economic sanctions, which were imposed as leverage to gain this final deal. Finally" + " some insist that any agreement must address Iranian missile programs, human rights violations or support" + " for Hamas or Hezbollah. As important as these issues are, and they must indeed be addressed, they are" + " unrelated to the most important aim of a nuclear deal: preventing a nuclear Iran. To include them in" + " the negotiations would be a poison pill. This agreement should be judged on its merits and on how it" + " affects the security of our negotiating partners and allies, including Israel. Those judgments should be" + " fact-based, not based on questionable assertions or dubious assumptions." + ) + ARTICLE_SUBWAY = ( + "New York (CNN)When Liana Barrientos was 23 years old, she got married in Westchester County, New York. A" + " year later, she got married again in Westchester County, but to a different man and without divorcing" + " her first husband. Only 18 days after that marriage, she got hitched yet again. Then, Barrientos" + ' declared "I do" five more times, sometimes only within two weeks of each other. In 2010, she married' + " once more, this time in the Bronx. In an application for a marriage license, she stated it was her" + ' "first and only" marriage. Barrientos, now 39, is facing two criminal counts of "offering a false' + ' instrument for filing in the first degree," referring to her false statements on the 2010 marriage' + " license application, according to court documents. Prosecutors said the marriages were part of an" + " immigration scam. On Friday, she pleaded not guilty at State Supreme Court in the Bronx, according to" + " her attorney, Christopher Wright, who declined to comment further. After leaving court, Barrientos was" + " arrested and charged with theft of service and criminal trespass for allegedly sneaking into the New" + " York subway through an emergency exit, said Detective Annette Markowski, a police spokeswoman. In total," + " Barrientos has been married 10 times, with nine of her marriages occurring between 1999 and 2002. All" + " occurred either in Westchester County, Long Island, New Jersey or the Bronx. She is believed to still be" + " married to four men, and at one time, she was married to eight men at once, prosecutors say. Prosecutors" + " said the immigration scam involved some of her husbands, who filed for permanent residence status" + " shortly after the marriages. Any divorces happened only after such filings were approved. It was" + " unclear whether any of the men will be prosecuted. The case was referred to the Bronx District" + " Attorney's Office by Immigration and Customs Enforcement and the Department of Homeland Security's" + ' Investigation Division. Seven of the men are from so-called "red-flagged" countries, including Egypt,' + " Turkey, Georgia, Pakistan and Mali. Her eighth husband, Rashid Rajput, was deported in 2006 to his" + " native Pakistan after an investigation by the Joint Terrorism Task Force. If convicted, Barrientos faces" + " up to four years in prison. Her next court appearance is scheduled for May 18." + ) expected_summaries = [ - 'prosecutor: "so far no videos were used in the crash investigation" two magazines claim to have found a cell phone video of the final seconds . "one can hear cries of \'My God\' in several languages," one magazine says . all 150 on board were killed when germanwings flight 9525 crashed .', - "the formal accession was marked by a ceremony at The Hague, in the Netherlands . the ICC opened a preliminary examination into the situation in the occupied Palestinian territory . as members of the court, Palestinians may be subject to counter-charges as well .", - "the u.s. and its negotiating partners reached a very strong framework agreement with Iran . aaron miller: the debate that has already begun since the announcement of the new framework will likely result in more heat than light . he says the new framework would reduce Iran's low-enriched uranium stockpile and cut centrifuges . miller: if it had been, there would have been no Iranian team at the table .", - 'prosecutors say the marriages were part of an immigration scam . if convicted, barrientos faces two criminal counts of "offering a false instrument for filing in the first degree" she has been married 10 times, with nine of her marriages occurring between 1999 and 2002 .', + 'prosecutor: "so far no videos were used in the crash investigation" two magazines claim to have found a' + " cell phone video of the final seconds . \"one can hear cries of 'My God' in several languages,\" one" + " magazine says . all 150 on board were killed when germanwings flight 9525 crashed .", + "the formal accession was marked by a ceremony at The Hague, in the Netherlands . the ICC opened a" + " preliminary examination into the situation in the occupied Palestinian territory . as members of the" + " court, Palestinians may be subject to counter-charges as well .", + "the u.s. and its negotiating partners reached a very strong framework agreement with Iran . aaron miller:" + " the debate that has already begun since the announcement of the new framework will likely result in more" + " heat than light . he says the new framework would reduce Iran's low-enriched uranium stockpile and cut" + " centrifuges . miller: if it had been, there would have been no Iranian team at the table .", + "prosecutors say the marriages were part of an immigration scam . if convicted, barrientos faces two" + ' criminal counts of "offering a false instrument for filing in the first degree" she has been married 10' + " times, with nine of her marriages occurring between 1999 and 2002 .", ] dct = tok( diff --git a/tests/models/t5/test_modeling_t5.py b/tests/models/t5/test_modeling_t5.py index d573004184..05a962e354 100644 --- a/tests/models/t5/test_modeling_t5.py +++ b/tests/models/t5/test_modeling_t5.py @@ -909,16 +909,208 @@ class T5ModelIntegrationTests(unittest.TestCase): model = self.model tok = self.tokenizer - FRANCE_ARTICLE = 'Marseille, France (CNN)The French prosecutor leading an investigation into the crash of Germanwings Flight 9525 insisted Wednesday that he was not aware of any video footage from on board the plane. Marseille prosecutor Brice Robin told CNN that "so far no videos were used in the crash investigation." He added, "A person who has such a video needs to immediately give it to the investigators." Robin\'s comments follow claims by two magazines, German daily Bild and French Paris Match, of a cell phone video showing the harrowing final seconds from on board Germanwings Flight 9525 as it crashed into the French Alps. All 150 on board were killed. Paris Match and Bild reported that the video was recovered from a phone at the wreckage site. The two publications described the supposed video, but did not post it on their websites. The publications said that they watched the video, which was found by a source close to the investigation. "One can hear cries of \'My God\' in several languages," Paris Match reported. "Metallic banging can also be heard more than three times, perhaps of the pilot trying to open the cockpit door with a heavy object. Towards the end, after a heavy shake, stronger than the others, the screaming intensifies. Then nothing." "It is a very disturbing scene," said Julian Reichelt, editor-in-chief of Bild online. An official with France\'s accident investigation agency, the BEA, said the agency is not aware of any such video. Lt. Col. Jean-Marc Menichini, a French Gendarmerie spokesman in charge of communications on rescue efforts around the Germanwings crash site, told CNN that the reports were "completely wrong" and "unwarranted." Cell phones have been collected at the site, he said, but that they "hadn\'t been exploited yet." Menichini said he believed the cell phones would need to be sent to the Criminal Research Institute in Rosny sous-Bois, near Paris, in order to be analyzed by specialized technicians working hand-in-hand with investigators. But none of the cell phones found so far have been sent to the institute, Menichini said. Asked whether staff involved in the search could have leaked a memory card to the media, Menichini answered with a categorical "no." Reichelt told "Erin Burnett: Outfront" that he had watched the video and stood by the report, saying Bild and Paris Match are "very confident" that the clip is real. He noted that investigators only revealed they\'d recovered cell phones from the crash site after Bild and Paris Match published their reports. "That is something we did not know before. ... Overall we can say many things of the investigation weren\'t revealed by the investigation at the beginning," he said. What was mental state of Germanwings co-pilot? German airline Lufthansa confirmed Tuesday that co-pilot Andreas Lubitz had battled depression years before he took the controls of Germanwings Flight 9525, which he\'s accused of deliberately crashing last week in the French Alps. Lubitz told his Lufthansa flight training school in 2009 that he had a "previous episode of severe depression," the airline said Tuesday. Email correspondence between Lubitz and the school discovered in an internal investigation, Lufthansa said, included medical documents he submitted in connection with resuming his flight training. The announcement indicates that Lufthansa, the parent company of Germanwings, knew of Lubitz\'s battle with depression, allowed him to continue training and ultimately put him in the cockpit. Lufthansa, whose CEO Carsten Spohr previously said Lubitz was 100% fit to fly, described its statement Tuesday as a "swift and seamless clarification" and said it was sharing the information and documents -- including training and medical records -- with public prosecutors. Spohr traveled to the crash site Wednesday, where recovery teams have been working for the past week to recover human remains and plane debris scattered across a steep mountainside. He saw the crisis center set up in Seyne-les-Alpes, laid a wreath in the village of Le Vernet, closer to the crash site, where grieving families have left flowers at a simple stone memorial. Menichini told CNN late Tuesday that no visible human remains were left at the site but recovery teams would keep searching. French President Francois Hollande, speaking Tuesday, said that it should be possible to identify all the victims using DNA analysis by the end of the week, sooner than authorities had previously suggested. In the meantime, the recovery of the victims\' personal belongings will start Wednesday, Menichini said. Among those personal belongings could be more cell phones belonging to the 144 passengers and six crew on board. Check out the latest from our correspondents . The details about Lubitz\'s correspondence with the flight school during his training were among several developments as investigators continued to delve into what caused the crash and Lubitz\'s possible motive for downing the jet. A Lufthansa spokesperson told CNN on Tuesday that Lubitz had a valid medical certificate, had passed all his examinations and "held all the licenses required." Earlier, a spokesman for the prosecutor\'s office in Dusseldorf, Christoph Kumpa, said medical records reveal Lubitz suffered from suicidal tendencies at some point before his aviation career and underwent psychotherapy before he got his pilot\'s license. Kumpa emphasized there\'s no evidence suggesting Lubitz was suicidal or acting aggressively before the crash. Investigators are looking into whether Lubitz feared his medical condition would cause him to lose his pilot\'s license, a European government official briefed on the investigation told CNN on Tuesday. While flying was "a big part of his life," the source said, it\'s only one theory being considered. Another source, a law enforcement official briefed on the investigation, also told CNN that authorities believe the primary motive for Lubitz to bring down the plane was that he feared he would not be allowed to fly because of his medical problems. Lubitz\'s girlfriend told investigators he had seen an eye doctor and a neuropsychologist, both of whom deemed him unfit to work recently and concluded he had psychological issues, the European government official said. But no matter what details emerge about his previous mental health struggles, there\'s more to the story, said Brian Russell, a forensic psychologist. "Psychology can explain why somebody would turn rage inward on themselves about the fact that maybe they weren\'t going to keep doing their job and they\'re upset about that and so they\'re suicidal," he said. "But there is no mental illness that explains why somebody then feels entitled to also take that rage and turn it outward on 149 other people who had nothing to do with the person\'s problems." Germanwings crash compensation: What we know . Who was the captain of Germanwings Flight 9525? CNN\'s Margot Haddad reported from Marseille and Pamela Brown from Dusseldorf, while Laura Smith-Spark wrote from London. CNN\'s Frederik Pleitgen, Pamela Boykoff, Antonia Mortensen, Sandrine Amiel and Anna-Maja Rappard contributed to this report.' # @noqa - SHORTER_ARTICLE = '(CNN)The Palestinian Authority officially became the 123rd member of the International Criminal Court on Wednesday, a step that gives the court jurisdiction over alleged crimes in Palestinian territories. The formal accession was marked with a ceremony at The Hague, in the Netherlands, where the court is based. The Palestinians signed the ICC\'s founding Rome Statute in January, when they also accepted its jurisdiction over alleged crimes committed "in the occupied Palestinian territory, including East Jerusalem, since June 13, 2014." Later that month, the ICC opened a preliminary examination into the situation in Palestinian territories, paving the way for possible war crimes investigations against Israelis. As members of the court, Palestinians may be subject to counter-charges as well. Israel and the United States, neither of which is an ICC member, opposed the Palestinians\' efforts to join the body. But Palestinian Foreign Minister Riad al-Malki, speaking at Wednesday\'s ceremony, said it was a move toward greater justice. "As Palestine formally becomes a State Party to the Rome Statute today, the world is also a step closer to ending a long era of impunity and injustice," he said, according to an ICC news release. "Indeed, today brings us closer to our shared goals of justice and peace." Judge Kuniko Ozaki, a vice president of the ICC, said acceding to the treaty was just the first step for the Palestinians. "As the Rome Statute today enters into force for the State of Palestine, Palestine acquires all the rights as well as responsibilities that come with being a State Party to the Statute. These are substantive commitments, which cannot be taken lightly," she said. Rights group Human Rights Watch welcomed the development. "Governments seeking to penalize Palestine for joining the ICC should immediately end their pressure, and countries that support universal acceptance of the court\'s treaty should speak out to welcome its membership," said Balkees Jarrah, international justice counsel for the group. "What\'s objectionable is the attempts to undermine international justice, not Palestine\'s decision to join a treaty to which over 100 countries around the world are members." In January, when the preliminary ICC examination was opened, Israeli Prime Minister Benjamin Netanyahu described it as an outrage, saying the court was overstepping its boundaries. The United States also said it "strongly" disagreed with the court\'s decision. "As we have said repeatedly, we do not believe that Palestine is a state and therefore we do not believe that it is eligible to join the ICC," the State Department said in a statement. It urged the warring sides to resolve their differences through direct negotiations. "We will continue to oppose actions against Israel at the ICC as counterproductive to the cause of peace," it said. But the ICC begs to differ with the definition of a state for its purposes and refers to the territories as "Palestine." While a preliminary examination is not a formal investigation, it allows the court to review evidence and determine whether to investigate suspects on both sides. Prosecutor Fatou Bensouda said her office would "conduct its analysis in full independence and impartiality." The war between Israel and Hamas militants in Gaza last summer left more than 2,000 people dead. The inquiry will include alleged war crimes committed since June. The International Criminal Court was set up in 2002 to prosecute genocide, crimes against humanity and war crimes. CNN\'s Vasco Cotovio, Kareem Khadder and Faith Karimi contributed to this report.' - IRAN_ARTICLE = "(CNN)The United States and its negotiating partners reached a very strong framework agreement with Iran in Lausanne, Switzerland, on Thursday that limits Iran's nuclear program in such a way as to effectively block it from building a nuclear weapon. Expect pushback anyway, if the recent past is any harbinger. Just last month, in an attempt to head off such an agreement, House Speaker John Boehner invited Israeli Prime Minister Benjamin Netanyahu to preemptively blast it before Congress, and 47 senators sent a letter to the Iranian leadership warning them away from a deal. The debate that has already begun since the announcement of the new framework will likely result in more heat than light. It will not be helped by the gathering swirl of dubious assumptions and doubtful assertions. Let us address some of these: . The most misleading assertion, despite universal rejection by experts, is that the negotiations' objective at the outset was the total elimination of any nuclear program in Iran. That is the position of Netanyahu and his acolytes in the U.S. Congress. But that is not and never was the objective. If it had been, there would have been no Iranian team at the negotiating table. Rather, the objective has always been to structure an agreement or series of agreements so that Iran could not covertly develop a nuclear arsenal before the United States and its allies could respond. The new framework has exceeded expectations in achieving that goal. It would reduce Iran's low-enriched uranium stockpile, cut by two-thirds its number of installed centrifuges and implement a rigorous inspection regime. Another dubious assumption of opponents is that the Iranian nuclear program is a covert weapons program. Despite sharp accusations by some in the United States and its allies, Iran denies having such a program, and U.S. intelligence contends that Iran has not yet made the decision to build a nuclear weapon. Iran's continued cooperation with International Atomic Energy Agency inspections is further evidence on this point, and we'll know even more about Iran's program in the coming months and years because of the deal. In fact, the inspections provisions that are part of this agreement are designed to protect against any covert action by the Iranians. What's more, the rhetoric of some members of Congress has implied that the negotiations have been between only the United States and Iran (i.e., the 47 senators' letter warning that a deal might be killed by Congress or a future president). This of course is not the case. The talks were between Iran and the five permanent members of the U.N. Security Council (United States, United Kingdom, France, China and Russia) plus Germany, dubbed the P5+1. While the United States has played a leading role in the effort, it negotiated the terms alongside its partners. If the agreement reached by the P5+1 is rejected by Congress, it could result in an unraveling of the sanctions on Iran and threaten NATO cohesion in other areas. Another questionable assertion is that this agreement contains a sunset clause, after which Iran will be free to do as it pleases. Again, this is not the case. Some of the restrictions on Iran's nuclear activities, such as uranium enrichment, will be eased or eliminated over time, as long as 15 years. But most importantly, the framework agreement includes Iran's ratification of the Additional Protocol, which allows IAEA inspectors expanded access to nuclear sites both declared and nondeclared. This provision will be permanent. It does not sunset. Thus, going forward, if Iran decides to enrich uranium to weapons-grade levels, monitors will be able to detect such a move in a matter of days and alert the U.N. Security Council. Many in Congress have said that the agreement should be a formal treaty requiring the Senate to \"advise and consent.\" But the issue is not suited for a treaty. Treaties impose equivalent obligations on all signatories. For example, the New START treaty limits Russia and the United States to 1,550 deployed strategic warheads. But any agreement with Iran will not be so balanced. The restrictions and obligations in the final framework agreement will be imposed almost exclusively on Iran. The P5+1 are obligated only to ease and eventually remove most but not all economic sanctions, which were imposed as leverage to gain this final deal. Finally some insist that any agreement must address Iranian missile programs, human rights violations or support for Hamas or Hezbollah. As important as these issues are, and they must indeed be addressed, they are unrelated to the most important aim of a nuclear deal: preventing a nuclear Iran. To include them in the negotiations would be a poison pill. This agreement should be judged on its merits and on how it affects the security of our negotiating partners and allies, including Israel. Those judgments should be fact-based, not based on questionable assertions or dubious assumptions." - ARTICLE_SUBWAY = 'New York (CNN)When Liana Barrientos was 23 years old, she got married in Westchester County, New York. A year later, she got married again in Westchester County, but to a different man and without divorcing her first husband. Only 18 days after that marriage, she got hitched yet again. Then, Barrientos declared "I do" five more times, sometimes only within two weeks of each other. In 2010, she married once more, this time in the Bronx. In an application for a marriage license, she stated it was her "first and only" marriage. Barrientos, now 39, is facing two criminal counts of "offering a false instrument for filing in the first degree," referring to her false statements on the 2010 marriage license application, according to court documents. Prosecutors said the marriages were part of an immigration scam. On Friday, she pleaded not guilty at State Supreme Court in the Bronx, according to her attorney, Christopher Wright, who declined to comment further. After leaving court, Barrientos was arrested and charged with theft of service and criminal trespass for allegedly sneaking into the New York subway through an emergency exit, said Detective Annette Markowski, a police spokeswoman. In total, Barrientos has been married 10 times, with nine of her marriages occurring between 1999 and 2002. All occurred either in Westchester County, Long Island, New Jersey or the Bronx. She is believed to still be married to four men, and at one time, she was married to eight men at once, prosecutors say. Prosecutors said the immigration scam involved some of her husbands, who filed for permanent residence status shortly after the marriages. Any divorces happened only after such filings were approved. It was unclear whether any of the men will be prosecuted. The case was referred to the Bronx District Attorney\'s Office by Immigration and Customs Enforcement and the Department of Homeland Security\'s Investigation Division. Seven of the men are from so-called "red-flagged" countries, including Egypt, Turkey, Georgia, Pakistan and Mali. Her eighth husband, Rashid Rajput, was deported in 2006 to his native Pakistan after an investigation by the Joint Terrorism Task Force. If convicted, Barrientos faces up to four years in prison. Her next court appearance is scheduled for May 18.' + FRANCE_ARTICLE = ( # @noqa + "Marseille, France (CNN)The French prosecutor leading an investigation into the crash of Germanwings" + " Flight 9525 insisted Wednesday that he was not aware of any video footage from on board the plane." + ' Marseille prosecutor Brice Robin told CNN that "so far no videos were used in the crash investigation."' + ' He added, "A person who has such a video needs to immediately give it to the investigators." Robin\'s' + " comments follow claims by two magazines, German daily Bild and French Paris Match, of a cell phone video" + " showing the harrowing final seconds from on board Germanwings Flight 9525 as it crashed into the French" + " Alps. All 150 on board were killed. Paris Match and Bild reported that the video was recovered from a" + " phone at the wreckage site. The two publications described the supposed video, but did not post it on" + " their websites. The publications said that they watched the video, which was found by a source close to" + " the investigation. \"One can hear cries of 'My God' in several languages,\" Paris Match reported." + ' "Metallic banging can also be heard more than three times, perhaps of the pilot trying to open the' + " cockpit door with a heavy object. Towards the end, after a heavy shake, stronger than the others, the" + ' screaming intensifies. Then nothing." "It is a very disturbing scene," said Julian Reichelt,' + " editor-in-chief of Bild online. An official with France's accident investigation agency, the BEA, said" + " the agency is not aware of any such video. Lt. Col. Jean-Marc Menichini, a French Gendarmerie spokesman" + " in charge of communications on rescue efforts around the Germanwings crash site, told CNN that the" + ' reports were "completely wrong" and "unwarranted." Cell phones have been collected at the site, he said,' + ' but that they "hadn\'t been exploited yet." Menichini said he believed the cell phones would need to be' + " sent to the Criminal Research Institute in Rosny sous-Bois, near Paris, in order to be analyzed by" + " specialized technicians working hand-in-hand with investigators. But none of the cell phones found so" + " far have been sent to the institute, Menichini said. Asked whether staff involved in the search could" + ' have leaked a memory card to the media, Menichini answered with a categorical "no." Reichelt told "Erin' + ' Burnett: Outfront" that he had watched the video and stood by the report, saying Bild and Paris Match' + ' are "very confident" that the clip is real. He noted that investigators only revealed they\'d recovered' + ' cell phones from the crash site after Bild and Paris Match published their reports. "That is something' + " we did not know before. ... Overall we can say many things of the investigation weren't revealed by the" + ' investigation at the beginning," he said. What was mental state of Germanwings co-pilot? German airline' + " Lufthansa confirmed Tuesday that co-pilot Andreas Lubitz had battled depression years before he took the" + " controls of Germanwings Flight 9525, which he's accused of deliberately crashing last week in the" + ' French Alps. Lubitz told his Lufthansa flight training school in 2009 that he had a "previous episode of' + ' severe depression," the airline said Tuesday. Email correspondence between Lubitz and the school' + " discovered in an internal investigation, Lufthansa said, included medical documents he submitted in" + " connection with resuming his flight training. The announcement indicates that Lufthansa, the parent" + " company of Germanwings, knew of Lubitz's battle with depression, allowed him to continue training and" + " ultimately put him in the cockpit. Lufthansa, whose CEO Carsten Spohr previously said Lubitz was 100%" + ' fit to fly, described its statement Tuesday as a "swift and seamless clarification" and said it was' + " sharing the information and documents -- including training and medical records -- with public" + " prosecutors. Spohr traveled to the crash site Wednesday, where recovery teams have been working for the" + " past week to recover human remains and plane debris scattered across a steep mountainside. He saw the" + " crisis center set up in Seyne-les-Alpes, laid a wreath in the village of Le Vernet, closer to the crash" + " site, where grieving families have left flowers at a simple stone memorial. Menichini told CNN late" + " Tuesday that no visible human remains were left at the site but recovery teams would keep searching." + " French President Francois Hollande, speaking Tuesday, said that it should be possible to identify all" + " the victims using DNA analysis by the end of the week, sooner than authorities had previously suggested." + " In the meantime, the recovery of the victims' personal belongings will start Wednesday, Menichini said." + " Among those personal belongings could be more cell phones belonging to the 144 passengers and six crew" + " on board. Check out the latest from our correspondents . The details about Lubitz's correspondence with" + " the flight school during his training were among several developments as investigators continued to" + " delve into what caused the crash and Lubitz's possible motive for downing the jet. A Lufthansa" + " spokesperson told CNN on Tuesday that Lubitz had a valid medical certificate, had passed all his" + ' examinations and "held all the licenses required." Earlier, a spokesman for the prosecutor\'s office in' + " Dusseldorf, Christoph Kumpa, said medical records reveal Lubitz suffered from suicidal tendencies at" + " some point before his aviation career and underwent psychotherapy before he got his pilot's license." + " Kumpa emphasized there's no evidence suggesting Lubitz was suicidal or acting aggressively before the" + " crash. Investigators are looking into whether Lubitz feared his medical condition would cause him to" + " lose his pilot's license, a European government official briefed on the investigation told CNN on" + ' Tuesday. While flying was "a big part of his life," the source said, it\'s only one theory being' + " considered. Another source, a law enforcement official briefed on the investigation, also told CNN that" + " authorities believe the primary motive for Lubitz to bring down the plane was that he feared he would" + " not be allowed to fly because of his medical problems. Lubitz's girlfriend told investigators he had" + " seen an eye doctor and a neuropsychologist, both of whom deemed him unfit to work recently and concluded" + " he had psychological issues, the European government official said. But no matter what details emerge" + " about his previous mental health struggles, there's more to the story, said Brian Russell, a forensic" + ' psychologist. "Psychology can explain why somebody would turn rage inward on themselves about the fact' + " that maybe they weren't going to keep doing their job and they're upset about that and so they're" + ' suicidal," he said. "But there is no mental illness that explains why somebody then feels entitled to' + " also take that rage and turn it outward on 149 other people who had nothing to do with the person's" + ' problems." Germanwings crash compensation: What we know . Who was the captain of Germanwings Flight' + " 9525? CNN's Margot Haddad reported from Marseille and Pamela Brown from Dusseldorf, while Laura" + " Smith-Spark wrote from London. CNN's Frederik Pleitgen, Pamela Boykoff, Antonia Mortensen, Sandrine" + " Amiel and Anna-Maja Rappard contributed to this report." + ) + SHORTER_ARTICLE = ( + "(CNN)The Palestinian Authority officially became the 123rd member of the International Criminal Court on" + " Wednesday, a step that gives the court jurisdiction over alleged crimes in Palestinian territories. The" + " formal accession was marked with a ceremony at The Hague, in the Netherlands, where the court is based." + " The Palestinians signed the ICC's founding Rome Statute in January, when they also accepted its" + ' jurisdiction over alleged crimes committed "in the occupied Palestinian territory, including East' + ' Jerusalem, since June 13, 2014." Later that month, the ICC opened a preliminary examination into the' + " situation in Palestinian territories, paving the way for possible war crimes investigations against" + " Israelis. As members of the court, Palestinians may be subject to counter-charges as well. Israel and" + " the United States, neither of which is an ICC member, opposed the Palestinians' efforts to join the" + " body. But Palestinian Foreign Minister Riad al-Malki, speaking at Wednesday's ceremony, said it was a" + ' move toward greater justice. "As Palestine formally becomes a State Party to the Rome Statute today, the' + ' world is also a step closer to ending a long era of impunity and injustice," he said, according to an' + ' ICC news release. "Indeed, today brings us closer to our shared goals of justice and peace." Judge' + " Kuniko Ozaki, a vice president of the ICC, said acceding to the treaty was just the first step for the" + ' Palestinians. "As the Rome Statute today enters into force for the State of Palestine, Palestine' + " acquires all the rights as well as responsibilities that come with being a State Party to the Statute." + ' These are substantive commitments, which cannot be taken lightly," she said. Rights group Human Rights' + ' Watch welcomed the development. "Governments seeking to penalize Palestine for joining the ICC should' + " immediately end their pressure, and countries that support universal acceptance of the court's treaty" + ' should speak out to welcome its membership," said Balkees Jarrah, international justice counsel for the' + " group. \"What's objectionable is the attempts to undermine international justice, not Palestine's" + ' decision to join a treaty to which over 100 countries around the world are members." In January, when' + " the preliminary ICC examination was opened, Israeli Prime Minister Benjamin Netanyahu described it as an" + ' outrage, saying the court was overstepping its boundaries. The United States also said it "strongly"' + " disagreed with the court's decision. \"As we have said repeatedly, we do not believe that Palestine is a" + ' state and therefore we do not believe that it is eligible to join the ICC," the State Department said in' + ' a statement. It urged the warring sides to resolve their differences through direct negotiations. "We' + ' will continue to oppose actions against Israel at the ICC as counterproductive to the cause of peace,"' + " it said. But the ICC begs to differ with the definition of a state for its purposes and refers to the" + ' territories as "Palestine." While a preliminary examination is not a formal investigation, it allows the' + " court to review evidence and determine whether to investigate suspects on both sides. Prosecutor Fatou" + ' Bensouda said her office would "conduct its analysis in full independence and impartiality." The war' + " between Israel and Hamas militants in Gaza last summer left more than 2,000 people dead. The inquiry" + " will include alleged war crimes committed since June. The International Criminal Court was set up in" + " 2002 to prosecute genocide, crimes against humanity and war crimes. CNN's Vasco Cotovio, Kareem Khadder" + " and Faith Karimi contributed to this report." + ) + IRAN_ARTICLE = ( + "(CNN)The United States and its negotiating partners reached a very strong framework agreement with Iran" + " in Lausanne, Switzerland, on Thursday that limits Iran's nuclear program in such a way as to effectively" + " block it from building a nuclear weapon. Expect pushback anyway, if the recent past is any harbinger." + " Just last month, in an attempt to head off such an agreement, House Speaker John Boehner invited Israeli" + " Prime Minister Benjamin Netanyahu to preemptively blast it before Congress, and 47 senators sent a" + " letter to the Iranian leadership warning them away from a deal. The debate that has already begun since" + " the announcement of the new framework will likely result in more heat than light. It will not be helped" + " by the gathering swirl of dubious assumptions and doubtful assertions. Let us address some of these: ." + " The most misleading assertion, despite universal rejection by experts, is that the negotiations'" + " objective at the outset was the total elimination of any nuclear program in Iran. That is the position" + " of Netanyahu and his acolytes in the U.S. Congress. But that is not and never was the objective. If it" + " had been, there would have been no Iranian team at the negotiating table. Rather, the objective has" + " always been to structure an agreement or series of agreements so that Iran could not covertly develop a" + " nuclear arsenal before the United States and its allies could respond. The new framework has exceeded" + " expectations in achieving that goal. It would reduce Iran's low-enriched uranium stockpile, cut by" + " two-thirds its number of installed centrifuges and implement a rigorous inspection regime. Another" + " dubious assumption of opponents is that the Iranian nuclear program is a covert weapons program. Despite" + " sharp accusations by some in the United States and its allies, Iran denies having such a program, and" + " U.S. intelligence contends that Iran has not yet made the decision to build a nuclear weapon. Iran's" + " continued cooperation with International Atomic Energy Agency inspections is further evidence on this" + " point, and we'll know even more about Iran's program in the coming months and years because of the deal." + " In fact, the inspections provisions that are part of this agreement are designed to protect against any" + " covert action by the Iranians. What's more, the rhetoric of some members of Congress has implied that" + " the negotiations have been between only the United States and Iran (i.e., the 47 senators' letter" + " warning that a deal might be killed by Congress or a future president). This of course is not the case." + " The talks were between Iran and the five permanent members of the U.N. Security Council (United States," + " United Kingdom, France, China and Russia) plus Germany, dubbed the P5+1. While the United States has" + " played a leading role in the effort, it negotiated the terms alongside its partners. If the agreement" + " reached by the P5+1 is rejected by Congress, it could result in an unraveling of the sanctions on Iran" + " and threaten NATO cohesion in other areas. Another questionable assertion is that this agreement" + " contains a sunset clause, after which Iran will be free to do as it pleases. Again, this is not the" + " case. Some of the restrictions on Iran's nuclear activities, such as uranium enrichment, will be eased" + " or eliminated over time, as long as 15 years. But most importantly, the framework agreement includes" + " Iran's ratification of the Additional Protocol, which allows IAEA inspectors expanded access to nuclear" + " sites both declared and nondeclared. This provision will be permanent. It does not sunset. Thus, going" + " forward, if Iran decides to enrich uranium to weapons-grade levels, monitors will be able to detect such" + " a move in a matter of days and alert the U.N. Security Council. Many in Congress have said that the" + ' agreement should be a formal treaty requiring the Senate to "advise and consent." But the issue is not' + " suited for a treaty. Treaties impose equivalent obligations on all signatories. For example, the New" + " START treaty limits Russia and the United States to 1,550 deployed strategic warheads. But any agreement" + " with Iran will not be so balanced. The restrictions and obligations in the final framework agreement" + " will be imposed almost exclusively on Iran. The P5+1 are obligated only to ease and eventually remove" + " most but not all economic sanctions, which were imposed as leverage to gain this final deal. Finally" + " some insist that any agreement must address Iranian missile programs, human rights violations or support" + " for Hamas or Hezbollah. As important as these issues are, and they must indeed be addressed, they are" + " unrelated to the most important aim of a nuclear deal: preventing a nuclear Iran. To include them in" + " the negotiations would be a poison pill. This agreement should be judged on its merits and on how it" + " affects the security of our negotiating partners and allies, including Israel. Those judgments should be" + " fact-based, not based on questionable assertions or dubious assumptions." + ) + ARTICLE_SUBWAY = ( + "New York (CNN)When Liana Barrientos was 23 years old, she got married in Westchester County, New York. A" + " year later, she got married again in Westchester County, but to a different man and without divorcing" + " her first husband. Only 18 days after that marriage, she got hitched yet again. Then, Barrientos" + ' declared "I do" five more times, sometimes only within two weeks of each other. In 2010, she married' + " once more, this time in the Bronx. In an application for a marriage license, she stated it was her" + ' "first and only" marriage. Barrientos, now 39, is facing two criminal counts of "offering a false' + ' instrument for filing in the first degree," referring to her false statements on the 2010 marriage' + " license application, according to court documents. Prosecutors said the marriages were part of an" + " immigration scam. On Friday, she pleaded not guilty at State Supreme Court in the Bronx, according to" + " her attorney, Christopher Wright, who declined to comment further. After leaving court, Barrientos was" + " arrested and charged with theft of service and criminal trespass for allegedly sneaking into the New" + " York subway through an emergency exit, said Detective Annette Markowski, a police spokeswoman. In total," + " Barrientos has been married 10 times, with nine of her marriages occurring between 1999 and 2002. All" + " occurred either in Westchester County, Long Island, New Jersey or the Bronx. She is believed to still be" + " married to four men, and at one time, she was married to eight men at once, prosecutors say. Prosecutors" + " said the immigration scam involved some of her husbands, who filed for permanent residence status" + " shortly after the marriages. Any divorces happened only after such filings were approved. It was" + " unclear whether any of the men will be prosecuted. The case was referred to the Bronx District" + " Attorney's Office by Immigration and Customs Enforcement and the Department of Homeland Security's" + ' Investigation Division. Seven of the men are from so-called "red-flagged" countries, including Egypt,' + " Turkey, Georgia, Pakistan and Mali. Her eighth husband, Rashid Rajput, was deported in 2006 to his" + " native Pakistan after an investigation by the Joint Terrorism Task Force. If convicted, Barrientos faces" + " up to four years in prison. Her next court appearance is scheduled for May 18." + ) expected_summaries = [ - 'prosecutor: "so far no videos were used in the crash investigation" two magazines claim to have found a cell phone video of the final seconds . "one can hear cries of \'My God\' in several languages," one magazine says .', - "the formal accession was marked by a ceremony at The Hague, in the Netherlands . the ICC opened a preliminary examination into the situation in the occupied Palestinian territory . as members of the court, Palestinians may be subject to counter-charges as well .", - "the u.s. and its negotiating partners reached a very strong framework agreement with Iran . aaron miller: the debate that has already begun since the announcement of the new framework will likely result in more heat than light . the deal would reduce Iran's low-enriched uranium stockpile, cut centrifuges and implement a rigorous inspection regime .", - 'prosecutors say the marriages were part of an immigration scam . if convicted, barrientos faces two criminal counts of "offering a false instrument for filing in the first degree" she has been married 10 times, with nine of her marriages occurring between 1999 and 2002 .', + 'prosecutor: "so far no videos were used in the crash investigation" two magazines claim to have found a' + " cell phone video of the final seconds . \"one can hear cries of 'My God' in several languages,\" one" + " magazine says .", + "the formal accession was marked by a ceremony at The Hague, in the Netherlands . the ICC opened a" + " preliminary examination into the situation in the occupied Palestinian territory . as members of the" + " court, Palestinians may be subject to counter-charges as well .", + "the u.s. and its negotiating partners reached a very strong framework agreement with Iran . aaron miller:" + " the debate that has already begun since the announcement of the new framework will likely result in more" + " heat than light . the deal would reduce Iran's low-enriched uranium stockpile, cut centrifuges and" + " implement a rigorous inspection regime .", + "prosecutors say the marriages were part of an immigration scam . if convicted, barrientos faces two" + ' criminal counts of "offering a false instrument for filing in the first degree" she has been married 10' + " times, with nine of her marriages occurring between 1999 and 2002 .", ] use_task_specific_params(model, "summarization") @@ -971,7 +1163,10 @@ class T5ModelIntegrationTests(unittest.TestCase): tok = self.tokenizer use_task_specific_params(model, "translation_en_to_fr") - en_text = ' This image section from an infrared recording by the Spitzer telescope shows a "family portrait" of countless generations of stars: the oldest stars are seen as blue dots. ' + en_text = ( + ' This image section from an infrared recording by the Spitzer telescope shows a "family portrait" of' + " countless generations of stars: the oldest stars are seen as blue dots. " + ) input_ids = tok.encode(model.config.prefix + en_text, return_tensors="pt") input_ids = input_ids.to(torch_device) diff --git a/tests/models/t5/test_modeling_tf_t5.py b/tests/models/t5/test_modeling_tf_t5.py index 1450a8c771..91bc63feda 100644 --- a/tests/models/t5/test_modeling_tf_t5.py +++ b/tests/models/t5/test_modeling_tf_t5.py @@ -700,19 +700,211 @@ class TFT5ModelIntegrationTests(unittest.TestCase): model = self.model tok = T5Tokenizer.from_pretrained("t5-base") - FRANCE_ARTICLE = 'Marseille, France (CNN)The French prosecutor leading an investigation into the crash of Germanwings Flight 9525 insisted Wednesday that he was not aware of any video footage from on board the plane. Marseille prosecutor Brice Robin told CNN that "so far no videos were used in the crash investigation." He added, "A person who has such a video needs to immediately give it to the investigators." Robin\'s comments follow claims by two magazines, German daily Bild and French Paris Match, of a cell phone video showing the harrowing final seconds from on board Germanwings Flight 9525 as it crashed into the French Alps. All 150 on board were killed. Paris Match and Bild reported that the video was recovered from a phone at the wreckage site. The two publications described the supposed video, but did not post it on their websites. The publications said that they watched the video, which was found by a source close to the investigation. "One can hear cries of \'My God\' in several languages," Paris Match reported. "Metallic banging can also be heard more than three times, perhaps of the pilot trying to open the cockpit door with a heavy object. Towards the end, after a heavy shake, stronger than the others, the screaming intensifies. Then nothing." "It is a very disturbing scene," said Julian Reichelt, editor-in-chief of Bild online. An official with France\'s accident investigation agency, the BEA, said the agency is not aware of any such video. Lt. Col. Jean-Marc Menichini, a French Gendarmerie spokesman in charge of communications on rescue efforts around the Germanwings crash site, told CNN that the reports were "completely wrong" and "unwarranted." Cell phones have been collected at the site, he said, but that they "hadn\'t been exploited yet." Menichini said he believed the cell phones would need to be sent to the Criminal Research Institute in Rosny sous-Bois, near Paris, in order to be analyzed by specialized technicians working hand-in-hand with investigators. But none of the cell phones found so far have been sent to the institute, Menichini said. Asked whether staff involved in the search could have leaked a memory card to the media, Menichini answered with a categorical "no." Reichelt told "Erin Burnett: Outfront" that he had watched the video and stood by the report, saying Bild and Paris Match are "very confident" that the clip is real. He noted that investigators only revealed they\'d recovered cell phones from the crash site after Bild and Paris Match published their reports. "That is something we did not know before. ... Overall we can say many things of the investigation weren\'t revealed by the investigation at the beginning," he said. What was mental state of Germanwings co-pilot? German airline Lufthansa confirmed Tuesday that co-pilot Andreas Lubitz had battled depression years before he took the controls of Germanwings Flight 9525, which he\'s accused of deliberately crashing last week in the French Alps. Lubitz told his Lufthansa flight training school in 2009 that he had a "previous episode of severe depression," the airline said Tuesday. Email correspondence between Lubitz and the school discovered in an internal investigation, Lufthansa said, included medical documents he submitted in connection with resuming his flight training. The announcement indicates that Lufthansa, the parent company of Germanwings, knew of Lubitz\'s battle with depression, allowed him to continue training and ultimately put him in the cockpit. Lufthansa, whose CEO Carsten Spohr previously said Lubitz was 100% fit to fly, described its statement Tuesday as a "swift and seamless clarification" and said it was sharing the information and documents -- including training and medical records -- with public prosecutors. Spohr traveled to the crash site Wednesday, where recovery teams have been working for the past week to recover human remains and plane debris scattered across a steep mountainside. He saw the crisis center set up in Seyne-les-Alpes, laid a wreath in the village of Le Vernet, closer to the crash site, where grieving families have left flowers at a simple stone memorial. Menichini told CNN late Tuesday that no visible human remains were left at the site but recovery teams would keep searching. French President Francois Hollande, speaking Tuesday, said that it should be possible to identify all the victims using DNA analysis by the end of the week, sooner than authorities had previously suggested. In the meantime, the recovery of the victims\' personal belongings will start Wednesday, Menichini said. Among those personal belongings could be more cell phones belonging to the 144 passengers and six crew on board. Check out the latest from our correspondents . The details about Lubitz\'s correspondence with the flight school during his training were among several developments as investigators continued to delve into what caused the crash and Lubitz\'s possible motive for downing the jet. A Lufthansa spokesperson told CNN on Tuesday that Lubitz had a valid medical certificate, had passed all his examinations and "held all the licenses required." Earlier, a spokesman for the prosecutor\'s office in Dusseldorf, Christoph Kumpa, said medical records reveal Lubitz suffered from suicidal tendencies at some point before his aviation career and underwent psychotherapy before he got his pilot\'s license. Kumpa emphasized there\'s no evidence suggesting Lubitz was suicidal or acting aggressively before the crash. Investigators are looking into whether Lubitz feared his medical condition would cause him to lose his pilot\'s license, a European government official briefed on the investigation told CNN on Tuesday. While flying was "a big part of his life," the source said, it\'s only one theory being considered. Another source, a law enforcement official briefed on the investigation, also told CNN that authorities believe the primary motive for Lubitz to bring down the plane was that he feared he would not be allowed to fly because of his medical problems. Lubitz\'s girlfriend told investigators he had seen an eye doctor and a neuropsychologist, both of whom deemed him unfit to work recently and concluded he had psychological issues, the European government official said. But no matter what details emerge about his previous mental health struggles, there\'s more to the story, said Brian Russell, a forensic psychologist. "Psychology can explain why somebody would turn rage inward on themselves about the fact that maybe they weren\'t going to keep doing their job and they\'re upset about that and so they\'re suicidal," he said. "But there is no mental illness that explains why somebody then feels entitled to also take that rage and turn it outward on 149 other people who had nothing to do with the person\'s problems." Germanwings crash compensation: What we know . Who was the captain of Germanwings Flight 9525? CNN\'s Margot Haddad reported from Marseille and Pamela Brown from Dusseldorf, while Laura Smith-Spark wrote from London. CNN\'s Frederik Pleitgen, Pamela Boykoff, Antonia Mortensen, Sandrine Amiel and Anna-Maja Rappard contributed to this report.' # @noqa + FRANCE_ARTICLE = ( # @noqa + "Marseille, France (CNN)The French prosecutor leading an investigation into the crash of Germanwings" + " Flight 9525 insisted Wednesday that he was not aware of any video footage from on board the plane." + ' Marseille prosecutor Brice Robin told CNN that "so far no videos were used in the crash investigation."' + ' He added, "A person who has such a video needs to immediately give it to the investigators." Robin\'s' + " comments follow claims by two magazines, German daily Bild and French Paris Match, of a cell phone video" + " showing the harrowing final seconds from on board Germanwings Flight 9525 as it crashed into the French" + " Alps. All 150 on board were killed. Paris Match and Bild reported that the video was recovered from a" + " phone at the wreckage site. The two publications described the supposed video, but did not post it on" + " their websites. The publications said that they watched the video, which was found by a source close to" + " the investigation. \"One can hear cries of 'My God' in several languages,\" Paris Match reported." + ' "Metallic banging can also be heard more than three times, perhaps of the pilot trying to open the' + " cockpit door with a heavy object. Towards the end, after a heavy shake, stronger than the others, the" + ' screaming intensifies. Then nothing." "It is a very disturbing scene," said Julian Reichelt,' + " editor-in-chief of Bild online. An official with France's accident investigation agency, the BEA, said" + " the agency is not aware of any such video. Lt. Col. Jean-Marc Menichini, a French Gendarmerie spokesman" + " in charge of communications on rescue efforts around the Germanwings crash site, told CNN that the" + ' reports were "completely wrong" and "unwarranted." Cell phones have been collected at the site, he said,' + ' but that they "hadn\'t been exploited yet." Menichini said he believed the cell phones would need to be' + " sent to the Criminal Research Institute in Rosny sous-Bois, near Paris, in order to be analyzed by" + " specialized technicians working hand-in-hand with investigators. But none of the cell phones found so" + " far have been sent to the institute, Menichini said. Asked whether staff involved in the search could" + ' have leaked a memory card to the media, Menichini answered with a categorical "no." Reichelt told "Erin' + ' Burnett: Outfront" that he had watched the video and stood by the report, saying Bild and Paris Match' + ' are "very confident" that the clip is real. He noted that investigators only revealed they\'d recovered' + ' cell phones from the crash site after Bild and Paris Match published their reports. "That is something' + " we did not know before. ... Overall we can say many things of the investigation weren't revealed by the" + ' investigation at the beginning," he said. What was mental state of Germanwings co-pilot? German airline' + " Lufthansa confirmed Tuesday that co-pilot Andreas Lubitz had battled depression years before he took the" + " controls of Germanwings Flight 9525, which he's accused of deliberately crashing last week in the" + ' French Alps. Lubitz told his Lufthansa flight training school in 2009 that he had a "previous episode of' + ' severe depression," the airline said Tuesday. Email correspondence between Lubitz and the school' + " discovered in an internal investigation, Lufthansa said, included medical documents he submitted in" + " connection with resuming his flight training. The announcement indicates that Lufthansa, the parent" + " company of Germanwings, knew of Lubitz's battle with depression, allowed him to continue training and" + " ultimately put him in the cockpit. Lufthansa, whose CEO Carsten Spohr previously said Lubitz was 100%" + ' fit to fly, described its statement Tuesday as a "swift and seamless clarification" and said it was' + " sharing the information and documents -- including training and medical records -- with public" + " prosecutors. Spohr traveled to the crash site Wednesday, where recovery teams have been working for the" + " past week to recover human remains and plane debris scattered across a steep mountainside. He saw the" + " crisis center set up in Seyne-les-Alpes, laid a wreath in the village of Le Vernet, closer to the crash" + " site, where grieving families have left flowers at a simple stone memorial. Menichini told CNN late" + " Tuesday that no visible human remains were left at the site but recovery teams would keep searching." + " French President Francois Hollande, speaking Tuesday, said that it should be possible to identify all" + " the victims using DNA analysis by the end of the week, sooner than authorities had previously suggested." + " In the meantime, the recovery of the victims' personal belongings will start Wednesday, Menichini said." + " Among those personal belongings could be more cell phones belonging to the 144 passengers and six crew" + " on board. Check out the latest from our correspondents . The details about Lubitz's correspondence with" + " the flight school during his training were among several developments as investigators continued to" + " delve into what caused the crash and Lubitz's possible motive for downing the jet. A Lufthansa" + " spokesperson told CNN on Tuesday that Lubitz had a valid medical certificate, had passed all his" + ' examinations and "held all the licenses required." Earlier, a spokesman for the prosecutor\'s office in' + " Dusseldorf, Christoph Kumpa, said medical records reveal Lubitz suffered from suicidal tendencies at" + " some point before his aviation career and underwent psychotherapy before he got his pilot's license." + " Kumpa emphasized there's no evidence suggesting Lubitz was suicidal or acting aggressively before the" + " crash. Investigators are looking into whether Lubitz feared his medical condition would cause him to" + " lose his pilot's license, a European government official briefed on the investigation told CNN on" + ' Tuesday. While flying was "a big part of his life," the source said, it\'s only one theory being' + " considered. Another source, a law enforcement official briefed on the investigation, also told CNN that" + " authorities believe the primary motive for Lubitz to bring down the plane was that he feared he would" + " not be allowed to fly because of his medical problems. Lubitz's girlfriend told investigators he had" + " seen an eye doctor and a neuropsychologist, both of whom deemed him unfit to work recently and concluded" + " he had psychological issues, the European government official said. But no matter what details emerge" + " about his previous mental health struggles, there's more to the story, said Brian Russell, a forensic" + ' psychologist. "Psychology can explain why somebody would turn rage inward on themselves about the fact' + " that maybe they weren't going to keep doing their job and they're upset about that and so they're" + ' suicidal," he said. "But there is no mental illness that explains why somebody then feels entitled to' + " also take that rage and turn it outward on 149 other people who had nothing to do with the person's" + ' problems." Germanwings crash compensation: What we know . Who was the captain of Germanwings Flight' + " 9525? CNN's Margot Haddad reported from Marseille and Pamela Brown from Dusseldorf, while Laura" + " Smith-Spark wrote from London. CNN's Frederik Pleitgen, Pamela Boykoff, Antonia Mortensen, Sandrine" + " Amiel and Anna-Maja Rappard contributed to this report." + ) - SHORTER_ARTICLE = '(CNN)The Palestinian Authority officially became the 123rd member of the International Criminal Court on Wednesday, a step that gives the court jurisdiction over alleged crimes in Palestinian territories. The formal accession was marked with a ceremony at The Hague, in the Netherlands, where the court is based. The Palestinians signed the ICC\'s founding Rome Statute in January, when they also accepted its jurisdiction over alleged crimes committed "in the occupied Palestinian territory, including East Jerusalem, since June 13, 2014." Later that month, the ICC opened a preliminary examination into the situation in Palestinian territories, paving the way for possible war crimes investigations against Israelis. As members of the court, Palestinians may be subject to counter-charges as well. Israel and the United States, neither of which is an ICC member, opposed the Palestinians\' efforts to join the body. But Palestinian Foreign Minister Riad al-Malki, speaking at Wednesday\'s ceremony, said it was a move toward greater justice. "As Palestine formally becomes a State Party to the Rome Statute today, the world is also a step closer to ending a long era of impunity and injustice," he said, according to an ICC news release. "Indeed, today brings us closer to our shared goals of justice and peace." Judge Kuniko Ozaki, a vice president of the ICC, said acceding to the treaty was just the first step for the Palestinians. "As the Rome Statute today enters into force for the State of Palestine, Palestine acquires all the rights as well as responsibilities that come with being a State Party to the Statute. These are substantive commitments, which cannot be taken lightly," she said. Rights group Human Rights Watch welcomed the development. "Governments seeking to penalize Palestine for joining the ICC should immediately end their pressure, and countries that support universal acceptance of the court\'s treaty should speak out to welcome its membership," said Balkees Jarrah, international justice counsel for the group. "What\'s objectionable is the attempts to undermine international justice, not Palestine\'s decision to join a treaty to which over 100 countries around the world are members." In January, when the preliminary ICC examination was opened, Israeli Prime Minister Benjamin Netanyahu described it as an outrage, saying the court was overstepping its boundaries. The United States also said it "strongly" disagreed with the court\'s decision. "As we have said repeatedly, we do not believe that Palestine is a state and therefore we do not believe that it is eligible to join the ICC," the State Department said in a statement. It urged the warring sides to resolve their differences through direct negotiations. "We will continue to oppose actions against Israel at the ICC as counterproductive to the cause of peace," it said. But the ICC begs to differ with the definition of a state for its purposes and refers to the territories as "Palestine." While a preliminary examination is not a formal investigation, it allows the court to review evidence and determine whether to investigate suspects on both sides. Prosecutor Fatou Bensouda said her office would "conduct its analysis in full independence and impartiality." The war between Israel and Hamas militants in Gaza last summer left more than 2,000 people dead. The inquiry will include alleged war crimes committed since June. The International Criminal Court was set up in 2002 to prosecute genocide, crimes against humanity and war crimes. CNN\'s Vasco Cotovio, Kareem Khadder and Faith Karimi contributed to this report.' + SHORTER_ARTICLE = ( + "(CNN)The Palestinian Authority officially became the 123rd member of the International Criminal Court on" + " Wednesday, a step that gives the court jurisdiction over alleged crimes in Palestinian territories. The" + " formal accession was marked with a ceremony at The Hague, in the Netherlands, where the court is based." + " The Palestinians signed the ICC's founding Rome Statute in January, when they also accepted its" + ' jurisdiction over alleged crimes committed "in the occupied Palestinian territory, including East' + ' Jerusalem, since June 13, 2014." Later that month, the ICC opened a preliminary examination into the' + " situation in Palestinian territories, paving the way for possible war crimes investigations against" + " Israelis. As members of the court, Palestinians may be subject to counter-charges as well. Israel and" + " the United States, neither of which is an ICC member, opposed the Palestinians' efforts to join the" + " body. But Palestinian Foreign Minister Riad al-Malki, speaking at Wednesday's ceremony, said it was a" + ' move toward greater justice. "As Palestine formally becomes a State Party to the Rome Statute today, the' + ' world is also a step closer to ending a long era of impunity and injustice," he said, according to an' + ' ICC news release. "Indeed, today brings us closer to our shared goals of justice and peace." Judge' + " Kuniko Ozaki, a vice president of the ICC, said acceding to the treaty was just the first step for the" + ' Palestinians. "As the Rome Statute today enters into force for the State of Palestine, Palestine' + " acquires all the rights as well as responsibilities that come with being a State Party to the Statute." + ' These are substantive commitments, which cannot be taken lightly," she said. Rights group Human Rights' + ' Watch welcomed the development. "Governments seeking to penalize Palestine for joining the ICC should' + " immediately end their pressure, and countries that support universal acceptance of the court's treaty" + ' should speak out to welcome its membership," said Balkees Jarrah, international justice counsel for the' + " group. \"What's objectionable is the attempts to undermine international justice, not Palestine's" + ' decision to join a treaty to which over 100 countries around the world are members." In January, when' + " the preliminary ICC examination was opened, Israeli Prime Minister Benjamin Netanyahu described it as an" + ' outrage, saying the court was overstepping its boundaries. The United States also said it "strongly"' + " disagreed with the court's decision. \"As we have said repeatedly, we do not believe that Palestine is a" + ' state and therefore we do not believe that it is eligible to join the ICC," the State Department said in' + ' a statement. It urged the warring sides to resolve their differences through direct negotiations. "We' + ' will continue to oppose actions against Israel at the ICC as counterproductive to the cause of peace,"' + " it said. But the ICC begs to differ with the definition of a state for its purposes and refers to the" + ' territories as "Palestine." While a preliminary examination is not a formal investigation, it allows the' + " court to review evidence and determine whether to investigate suspects on both sides. Prosecutor Fatou" + ' Bensouda said her office would "conduct its analysis in full independence and impartiality." The war' + " between Israel and Hamas militants in Gaza last summer left more than 2,000 people dead. The inquiry" + " will include alleged war crimes committed since June. The International Criminal Court was set up in" + " 2002 to prosecute genocide, crimes against humanity and war crimes. CNN's Vasco Cotovio, Kareem Khadder" + " and Faith Karimi contributed to this report." + ) - IRAN_ARTICLE = "(CNN)The United States and its negotiating partners reached a very strong framework agreement with Iran in Lausanne, Switzerland, on Thursday that limits Iran's nuclear program in such a way as to effectively block it from building a nuclear weapon. Expect pushback anyway, if the recent past is any harbinger. Just last month, in an attempt to head off such an agreement, House Speaker John Boehner invited Israeli Prime Minister Benjamin Netanyahu to preemptively blast it before Congress, and 47 senators sent a letter to the Iranian leadership warning them away from a deal. The debate that has already begun since the announcement of the new framework will likely result in more heat than light. It will not be helped by the gathering swirl of dubious assumptions and doubtful assertions. Let us address some of these: . The most misleading assertion, despite universal rejection by experts, is that the negotiations' objective at the outset was the total elimination of any nuclear program in Iran. That is the position of Netanyahu and his acolytes in the U.S. Congress. But that is not and never was the objective. If it had been, there would have been no Iranian team at the negotiating table. Rather, the objective has always been to structure an agreement or series of agreements so that Iran could not covertly develop a nuclear arsenal before the United States and its allies could respond. The new framework has exceeded expectations in achieving that goal. It would reduce Iran's low-enriched uranium stockpile, cut by two-thirds its number of installed centrifuges and implement a rigorous inspection regime. Another dubious assumption of opponents is that the Iranian nuclear program is a covert weapons program. Despite sharp accusations by some in the United States and its allies, Iran denies having such a program, and U.S. intelligence contends that Iran has not yet made the decision to build a nuclear weapon. Iran's continued cooperation with International Atomic Energy Agency inspections is further evidence on this point, and we'll know even more about Iran's program in the coming months and years because of the deal. In fact, the inspections provisions that are part of this agreement are designed to protect against any covert action by the Iranians. What's more, the rhetoric of some members of Congress has implied that the negotiations have been between only the United States and Iran (i.e., the 47 senators' letter warning that a deal might be killed by Congress or a future president). This of course is not the case. The talks were between Iran and the five permanent members of the U.N. Security Council (United States, United Kingdom, France, China and Russia) plus Germany, dubbed the P5+1. While the United States has played a leading role in the effort, it negotiated the terms alongside its partners. If the agreement reached by the P5+1 is rejected by Congress, it could result in an unraveling of the sanctions on Iran and threaten NATO cohesion in other areas. Another questionable assertion is that this agreement contains a sunset clause, after which Iran will be free to do as it pleases. Again, this is not the case. Some of the restrictions on Iran's nuclear activities, such as uranium enrichment, will be eased or eliminated over time, as long as 15 years. But most importantly, the framework agreement includes Iran's ratification of the Additional Protocol, which allows IAEA inspectors expanded access to nuclear sites both declared and nondeclared. This provision will be permanent. It does not sunset. Thus, going forward, if Iran decides to enrich uranium to weapons-grade levels, monitors will be able to detect such a move in a matter of days and alert the U.N. Security Council. Many in Congress have said that the agreement should be a formal treaty requiring the Senate to \"advise and consent.\" But the issue is not suited for a treaty. Treaties impose equivalent obligations on all signatories. For example, the New START treaty limits Russia and the United States to 1,550 deployed strategic warheads. But any agreement with Iran will not be so balanced. The restrictions and obligations in the final framework agreement will be imposed almost exclusively on Iran. The P5+1 are obligated only to ease and eventually remove most but not all economic sanctions, which were imposed as leverage to gain this final deal. Finally some insist that any agreement must address Iranian missile programs, human rights violations or support for Hamas or Hezbollah. As important as these issues are, and they must indeed be addressed, they are unrelated to the most important aim of a nuclear deal: preventing a nuclear Iran. To include them in the negotiations would be a poison pill. This agreement should be judged on its merits and on how it affects the security of our negotiating partners and allies, including Israel. Those judgments should be fact-based, not based on questionable assertions or dubious assumptions." + IRAN_ARTICLE = ( + "(CNN)The United States and its negotiating partners reached a very strong framework agreement with Iran" + " in Lausanne, Switzerland, on Thursday that limits Iran's nuclear program in such a way as to effectively" + " block it from building a nuclear weapon. Expect pushback anyway, if the recent past is any harbinger." + " Just last month, in an attempt to head off such an agreement, House Speaker John Boehner invited Israeli" + " Prime Minister Benjamin Netanyahu to preemptively blast it before Congress, and 47 senators sent a" + " letter to the Iranian leadership warning them away from a deal. The debate that has already begun since" + " the announcement of the new framework will likely result in more heat than light. It will not be helped" + " by the gathering swirl of dubious assumptions and doubtful assertions. Let us address some of these: ." + " The most misleading assertion, despite universal rejection by experts, is that the negotiations'" + " objective at the outset was the total elimination of any nuclear program in Iran. That is the position" + " of Netanyahu and his acolytes in the U.S. Congress. But that is not and never was the objective. If it" + " had been, there would have been no Iranian team at the negotiating table. Rather, the objective has" + " always been to structure an agreement or series of agreements so that Iran could not covertly develop a" + " nuclear arsenal before the United States and its allies could respond. The new framework has exceeded" + " expectations in achieving that goal. It would reduce Iran's low-enriched uranium stockpile, cut by" + " two-thirds its number of installed centrifuges and implement a rigorous inspection regime. Another" + " dubious assumption of opponents is that the Iranian nuclear program is a covert weapons program. Despite" + " sharp accusations by some in the United States and its allies, Iran denies having such a program, and" + " U.S. intelligence contends that Iran has not yet made the decision to build a nuclear weapon. Iran's" + " continued cooperation with International Atomic Energy Agency inspections is further evidence on this" + " point, and we'll know even more about Iran's program in the coming months and years because of the deal." + " In fact, the inspections provisions that are part of this agreement are designed to protect against any" + " covert action by the Iranians. What's more, the rhetoric of some members of Congress has implied that" + " the negotiations have been between only the United States and Iran (i.e., the 47 senators' letter" + " warning that a deal might be killed by Congress or a future president). This of course is not the case." + " The talks were between Iran and the five permanent members of the U.N. Security Council (United States," + " United Kingdom, France, China and Russia) plus Germany, dubbed the P5+1. While the United States has" + " played a leading role in the effort, it negotiated the terms alongside its partners. If the agreement" + " reached by the P5+1 is rejected by Congress, it could result in an unraveling of the sanctions on Iran" + " and threaten NATO cohesion in other areas. Another questionable assertion is that this agreement" + " contains a sunset clause, after which Iran will be free to do as it pleases. Again, this is not the" + " case. Some of the restrictions on Iran's nuclear activities, such as uranium enrichment, will be eased" + " or eliminated over time, as long as 15 years. But most importantly, the framework agreement includes" + " Iran's ratification of the Additional Protocol, which allows IAEA inspectors expanded access to nuclear" + " sites both declared and nondeclared. This provision will be permanent. It does not sunset. Thus, going" + " forward, if Iran decides to enrich uranium to weapons-grade levels, monitors will be able to detect such" + " a move in a matter of days and alert the U.N. Security Council. Many in Congress have said that the" + ' agreement should be a formal treaty requiring the Senate to "advise and consent." But the issue is not' + " suited for a treaty. Treaties impose equivalent obligations on all signatories. For example, the New" + " START treaty limits Russia and the United States to 1,550 deployed strategic warheads. But any agreement" + " with Iran will not be so balanced. The restrictions and obligations in the final framework agreement" + " will be imposed almost exclusively on Iran. The P5+1 are obligated only to ease and eventually remove" + " most but not all economic sanctions, which were imposed as leverage to gain this final deal. Finally" + " some insist that any agreement must address Iranian missile programs, human rights violations or support" + " for Hamas or Hezbollah. As important as these issues are, and they must indeed be addressed, they are" + " unrelated to the most important aim of a nuclear deal: preventing a nuclear Iran. To include them in" + " the negotiations would be a poison pill. This agreement should be judged on its merits and on how it" + " affects the security of our negotiating partners and allies, including Israel. Those judgments should be" + " fact-based, not based on questionable assertions or dubious assumptions." + ) - ARTICLE_SUBWAY = 'New York (CNN)When Liana Barrientos was 23 years old, she got married in Westchester County, New York. A year later, she got married again in Westchester County, but to a different man and without divorcing her first husband. Only 18 days after that marriage, she got hitched yet again. Then, Barrientos declared "I do" five more times, sometimes only within two weeks of each other. In 2010, she married once more, this time in the Bronx. In an application for a marriage license, she stated it was her "first and only" marriage. Barrientos, now 39, is facing two criminal counts of "offering a false instrument for filing in the first degree," referring to her false statements on the 2010 marriage license application, according to court documents. Prosecutors said the marriages were part of an immigration scam. On Friday, she pleaded not guilty at State Supreme Court in the Bronx, according to her attorney, Christopher Wright, who declined to comment further. After leaving court, Barrientos was arrested and charged with theft of service and criminal trespass for allegedly sneaking into the New York subway through an emergency exit, said Detective Annette Markowski, a police spokeswoman. In total, Barrientos has been married 10 times, with nine of her marriages occurring between 1999 and 2002. All occurred either in Westchester County, Long Island, New Jersey or the Bronx. She is believed to still be married to four men, and at one time, she was married to eight men at once, prosecutors say. Prosecutors said the immigration scam involved some of her husbands, who filed for permanent residence status shortly after the marriages. Any divorces happened only after such filings were approved. It was unclear whether any of the men will be prosecuted. The case was referred to the Bronx District Attorney\'s Office by Immigration and Customs Enforcement and the Department of Homeland Security\'s Investigation Division. Seven of the men are from so-called "red-flagged" countries, including Egypt, Turkey, Georgia, Pakistan and Mali. Her eighth husband, Rashid Rajput, was deported in 2006 to his native Pakistan after an investigation by the Joint Terrorism Task Force. If convicted, Barrientos faces up to four years in prison. Her next court appearance is scheduled for May 18.' + ARTICLE_SUBWAY = ( + "New York (CNN)When Liana Barrientos was 23 years old, she got married in Westchester County, New York. A" + " year later, she got married again in Westchester County, but to a different man and without divorcing" + " her first husband. Only 18 days after that marriage, she got hitched yet again. Then, Barrientos" + ' declared "I do" five more times, sometimes only within two weeks of each other. In 2010, she married' + " once more, this time in the Bronx. In an application for a marriage license, she stated it was her" + ' "first and only" marriage. Barrientos, now 39, is facing two criminal counts of "offering a false' + ' instrument for filing in the first degree," referring to her false statements on the 2010 marriage' + " license application, according to court documents. Prosecutors said the marriages were part of an" + " immigration scam. On Friday, she pleaded not guilty at State Supreme Court in the Bronx, according to" + " her attorney, Christopher Wright, who declined to comment further. After leaving court, Barrientos was" + " arrested and charged with theft of service and criminal trespass for allegedly sneaking into the New" + " York subway through an emergency exit, said Detective Annette Markowski, a police spokeswoman. In total," + " Barrientos has been married 10 times, with nine of her marriages occurring between 1999 and 2002. All" + " occurred either in Westchester County, Long Island, New Jersey or the Bronx. She is believed to still be" + " married to four men, and at one time, she was married to eight men at once, prosecutors say. Prosecutors" + " said the immigration scam involved some of her husbands, who filed for permanent residence status" + " shortly after the marriages. Any divorces happened only after such filings were approved. It was" + " unclear whether any of the men will be prosecuted. The case was referred to the Bronx District" + " Attorney's Office by Immigration and Customs Enforcement and the Department of Homeland Security's" + ' Investigation Division. Seven of the men are from so-called "red-flagged" countries, including Egypt,' + " Turkey, Georgia, Pakistan and Mali. Her eighth husband, Rashid Rajput, was deported in 2006 to his" + " native Pakistan after an investigation by the Joint Terrorism Task Force. If convicted, Barrientos faces" + " up to four years in prison. Her next court appearance is scheduled for May 18." + ) expected_summaries = [ - 'prosecutor: "so far no videos were used in the crash investigation" two magazines claim to have found a cell phone video of the final seconds . "one can hear cries of \'My God\' in several languages," one magazine says .', - "the formal accession was marked by a ceremony at The Hague, in the Netherlands . the ICC opened a preliminary examination into the situation in the occupied Palestinian territory . as members of the court, Palestinians may be subject to counter-charges as well .", - "the u.s. and its negotiating partners reached a very strong framework agreement with Iran . aaron miller: the debate that has already begun since the announcement of the new framework will likely result in more heat than light . the deal would reduce Iran's low-enriched uranium stockpile, cut centrifuges and implement a rigorous inspection regime .", - 'prosecutors say the marriages were part of an immigration scam . if convicted, barrientos faces two criminal counts of "offering a false instrument for filing in the first degree" she has been married 10 times, with nine of her marriages occurring between 1999 and 2002 .', + 'prosecutor: "so far no videos were used in the crash investigation" two magazines claim to have found a' + " cell phone video of the final seconds . \"one can hear cries of 'My God' in several languages,\" one" + " magazine says .", + "the formal accession was marked by a ceremony at The Hague, in the Netherlands . the ICC opened a" + " preliminary examination into the situation in the occupied Palestinian territory . as members of the" + " court, Palestinians may be subject to counter-charges as well .", + "the u.s. and its negotiating partners reached a very strong framework agreement with Iran . aaron miller:" + " the debate that has already begun since the announcement of the new framework will likely result in more" + " heat than light . the deal would reduce Iran's low-enriched uranium stockpile, cut centrifuges and" + " implement a rigorous inspection regime .", + "prosecutors say the marriages were part of an immigration scam . if convicted, barrientos faces two" + ' criminal counts of "offering a false instrument for filing in the first degree" she has been married 10' + " times, with nine of her marriages occurring between 1999 and 2002 .", ] task_specific_config = getattr(model.config, "task_specific_params", {}) @@ -787,7 +979,10 @@ class TFT5ModelIntegrationTests(unittest.TestCase): translation_config = task_specific_config.get("translation_en_to_fr", {}) model.config.update(translation_config) - en_text = ' This image section from an infrared recording by the Spitzer telescope shows a "family portrait" of countless generations of stars: the oldest stars are seen as blue dots. ' + en_text = ( + ' This image section from an infrared recording by the Spitzer telescope shows a "family portrait" of' + " countless generations of stars: the oldest stars are seen as blue dots. " + ) new_truncated_translation = ( "Cette section d'images provenant de l'enregistrement infrarouge effectué par le télescope Spitzer montre " diff --git a/tests/models/tapas/test_tokenization_tapas.py b/tests/models/tapas/test_tokenization_tapas.py index 002f8c7e75..8aed45a5b6 100644 --- a/tests/models/tapas/test_tokenization_tapas.py +++ b/tests/models/tapas/test_tokenization_tapas.py @@ -251,7 +251,7 @@ class TapasTokenizationTest(TokenizerTesterMixin, unittest.TestCase): vocab_tokens = ["[UNK]", "[CLS]", "[SEP]", "want", "##want", "##ed", "wa", "un", "runn", "##ing"] vocab = {} - for (i, token) in enumerate(vocab_tokens): + for i, token in enumerate(vocab_tokens): vocab[token] = i tokenizer = WordpieceTokenizer(vocab=vocab, unk_token="[UNK]") diff --git a/tests/models/vilt/test_modeling_vilt.py b/tests/models/vilt/test_modeling_vilt.py index 0c6783c439..1a2f95d0e6 100644 --- a/tests/models/vilt/test_modeling_vilt.py +++ b/tests/models/vilt/test_modeling_vilt.py @@ -589,7 +589,10 @@ class ViltModelIntegrationTest(unittest.TestCase): image1 = Image.open(dataset[0]["file"]).convert("RGB") image2 = Image.open(dataset[1]["file"]).convert("RGB") - text = "The left image contains twice the number of dogs as the right image, and at least two dogs in total are standing." + text = ( + "The left image contains twice the number of dogs as the right image, and at least two dogs in total are" + " standing." + ) encoding_1 = processor(image1, text, return_tensors="pt") encoding_2 = processor(image2, text, return_tensors="pt") diff --git a/tests/models/wav2vec2/test_modeling_flax_wav2vec2.py b/tests/models/wav2vec2/test_modeling_flax_wav2vec2.py index a228ebfa19..b74e271c02 100644 --- a/tests/models/wav2vec2/test_modeling_flax_wav2vec2.py +++ b/tests/models/wav2vec2/test_modeling_flax_wav2vec2.py @@ -463,7 +463,8 @@ class FlaxWav2Vec2ModelIntegrationTest(unittest.TestCase): EXPECTED_TRANSCRIPTIONS = [ "a man said to the universe sir i exist", "sweat covered brion's body trickling into the tight loin cloth that was the only garment he wore", - "the cut on his chest still dripping blood the ache of his overstrained eyes even the soaring arena around him with the thousands of spectators were trivialities not worth thinking about", + "the cut on his chest still dripping blood the ache of his overstrained eyes even the soaring arena around" + " him with the thousands of spectators were trivialities not worth thinking about", "his instant panic was followed by a small sharp blow high on his chest", ] self.assertListEqual(predicted_trans, EXPECTED_TRANSCRIPTIONS) diff --git a/tests/models/wav2vec2/test_modeling_tf_wav2vec2.py b/tests/models/wav2vec2/test_modeling_tf_wav2vec2.py index 3187303982..323f44ba99 100644 --- a/tests/models/wav2vec2/test_modeling_tf_wav2vec2.py +++ b/tests/models/wav2vec2/test_modeling_tf_wav2vec2.py @@ -548,7 +548,8 @@ class TFWav2Vec2ModelIntegrationTest(unittest.TestCase): EXPECTED_TRANSCRIPTIONS = [ "a man said to the universe sir i exist", "sweat covered brion's body trickling into the tight loin cloth that was the only garment he wore", - "the cut on his chest still dripping blood the ache of his overstrained eyes even the soaring arena around him with the thousands of spectators were trivialities not worth thinking about", + "the cut on his chest still dripping blood the ache of his overstrained eyes even the soaring arena around" + " him with the thousands of spectators were trivialities not worth thinking about", "his instant panic was followed by a small sharp blow high on his chest", ] self.assertListEqual(predicted_trans, EXPECTED_TRANSCRIPTIONS) diff --git a/tests/models/wav2vec2/test_modeling_wav2vec2.py b/tests/models/wav2vec2/test_modeling_wav2vec2.py index 98aebdd728..21f77b19a5 100644 --- a/tests/models/wav2vec2/test_modeling_wav2vec2.py +++ b/tests/models/wav2vec2/test_modeling_wav2vec2.py @@ -1179,7 +1179,8 @@ class Wav2Vec2ModelIntegrationTest(unittest.TestCase): EXPECTED_TRANSCRIPTIONS = [ "a man said to the universe sir i exist", "sweat covered brion's body trickling into the tight loin cloth that was the only garment he wore", - "the cut on his chest still dripping blood the ache of his overstrained eyes even the soaring arena around him with the thousands of spectators were trivialities not worth thinking about", + "the cut on his chest still dripping blood the ache of his overstrained eyes even the soaring arena around" + " him with the thousands of spectators were trivialities not worth thinking about", "his instant panic was followed by a small sharp blow high on his chest", ] self.assertListEqual(predicted_trans, EXPECTED_TRANSCRIPTIONS) @@ -1461,8 +1462,11 @@ class Wav2Vec2ModelIntegrationTest(unittest.TestCase): EXPECTED_TRANSCRIPTIONS = [ "ɐ m æ n s ɛ d t ə ð ə j uː n ɪ v ɚ s s ɚ aɪ ɛ ɡ z ɪ s t", - "s w ɛ t k ʌ v ɚ d b ɹ iː ɔ n z b ɑː d i t ɹ ɪ k l ɪ ŋ ɪ n t ə ð ə t aɪ t l oɪ n k l ɑː θ ð æ w ʌ z ð ɪ oʊ n l i ɡ ɑːɹ m ə n t h iː w ɔːɹ", - "ð ə k aɪ t ɔ n h ɪ z tʃ ɛ s t s t ɪ l d ɹ ɪ p ɪ ŋ b l ʌ d ð ɪ eɪ k ʌ v h ɪ z oʊ v ɚ s t ɹ eɪ n d aɪ z iː v ə n ð ə s ɔːɹ ɹ ɪ ŋ ɐ ɹ iː n ɐ ɚ ɹ aʊ n d h ɪ m w ɪ ð ə θ aʊ z ə n d z ʌ v s p ɛ k t eɪ ɾ ɚ z w ɜː t ɹ ɪ v ɪ æ l ᵻ ɾ i z n ɑː t w ɜː θ θ ɪ ŋ k ɪ ŋ ɐ b aʊ t", + "s w ɛ t k ʌ v ɚ d b ɹ iː ɔ n z b ɑː d i t ɹ ɪ k l ɪ ŋ ɪ n t ə ð ə t aɪ t l oɪ n k l ɑː θ ð æ w ʌ z ð ɪ oʊ" + " n l i ɡ ɑːɹ m ə n t h iː w ɔːɹ", + "ð ə k aɪ t ɔ n h ɪ z tʃ ɛ s t s t ɪ l d ɹ ɪ p ɪ ŋ b l ʌ d ð ɪ eɪ k ʌ v h ɪ z oʊ v ɚ s t ɹ eɪ n d aɪ z iː" + " v ə n ð ə s ɔːɹ ɹ ɪ ŋ ɐ ɹ iː n ɐ ɚ ɹ aʊ n d h ɪ m w ɪ ð ə θ aʊ z ə n d z ʌ v s p ɛ k t eɪ ɾ ɚ z w ɜː t ɹ" + " ɪ v ɪ æ l ᵻ ɾ i z n ɑː t w ɜː θ θ ɪ ŋ k ɪ ŋ ɐ b aʊ t", "h ɪ z ɪ n s t ə n t v p æ n ɪ k w ʌ z f ɑː l oʊ d b aɪ ɐ s m ɔː l ʃ ɑːɹ p b l oʊ h aɪ ɔ n h ɪ z tʃ ɛ s t", ] # should correspond to =>: diff --git a/tests/models/xglm/test_tokenization_xglm.py b/tests/models/xglm/test_tokenization_xglm.py index dd5c9f5e6a..05259ffaf9 100644 --- a/tests/models/xglm/test_tokenization_xglm.py +++ b/tests/models/xglm/test_tokenization_xglm.py @@ -179,7 +179,10 @@ class XGLMTokenizationTest(TokenizerTesterMixin, unittest.TestCase): @slow def test_tokenization_base_hard_symbols(self): - symbols = 'This is a very long text with a lot of weird characters, such as: . , ~ ? ( ) " [ ] ! : - . Also we will add words that should not exsist and be tokenized to unk, such as saoneuhaoesuth' + symbols = ( + 'This is a very long text with a lot of weird characters, such as: . , ~ ? ( ) " [ ] ! : - . Also we will' + " add words that should not exsist and be tokenized to unk, such as saoneuhaoesuth" + ) # fmt: off original_tokenizer_encodings = [2, 1018, 67, 11, 1988, 2617, 5631, 278, 11, 3407, 48, 71630, 28085, 4, 3234, 157, 13, 6, 5, 6, 4, 3526, 768, 15, 659, 57, 298, 3983, 864, 129, 21, 6, 5, 13675, 377, 652, 7580, 10341, 155, 2817, 422, 1666, 7, 1674, 53, 113, 202277, 17892, 33, 60, 87, 4, 3234, 157, 61, 2667, 52376, 19, 88, 23, 735] # fmt: on diff --git a/tests/models/xlm_prophetnet/test_modeling_xlm_prophetnet.py b/tests/models/xlm_prophetnet/test_modeling_xlm_prophetnet.py index 51e8502b9b..5dec186bc7 100644 --- a/tests/models/xlm_prophetnet/test_modeling_xlm_prophetnet.py +++ b/tests/models/xlm_prophetnet/test_modeling_xlm_prophetnet.py @@ -102,8 +102,18 @@ class XLMProphetNetModelIntegrationTest(unittest.TestCase): tokenizer = XLMProphetNetTokenizer.from_pretrained("microsoft/xprophetnet-large-wiki100-cased-xglue-ntg") - EN_SENTENCE = "Microsoft Corporation intends to officially end free support for the Windows 7 operating system after January 14, 2020, according to the official portal of the organization. From that day, users of this system will not be able to receive security updates, which could make their computers vulnerable to cyber attacks." - RU_SENTENCE = "орпорация Microsoft намерена официально прекратить бесплатную поддержку операционной системы Windows 7 после 14 января 2020 года, сообщается на официальном портале организации . С указанного дня пользователи этой системы не смогут получать обновления безопасности, из-за чего их компьютеры могут стать уязвимыми к кибератакам." + EN_SENTENCE = ( + "Microsoft Corporation intends to officially end free support for the Windows 7 operating system after" + " January 14, 2020, according to the official portal of the organization. From that day, users of this" + " system will not be able to receive security updates, which could make their computers vulnerable to" + " cyber attacks." + ) + RU_SENTENCE = ( + "орпорация Microsoft намерена официально прекратить бесплатную поддержку операционной системы Windows 7" + " после 14 января 2020 года, сообщается на официальном портале организации . С указанного дня пользователи" + " этой системы не смогут получать обновления безопасности, из-за чего их компьютеры могут стать уязвимыми" + " к кибератакам." + ) ZH_SENTENCE = ( "根据该组织的官方门户网站,微软公司打算在2020年1月14日之后正式终止对Windows 7操作系统的免费支持。从那时起,该系统的用户将无法接收安全更新,这可能会使他们的计算机容易受到网络攻击。" ) @@ -132,8 +142,9 @@ class XLMProphetNetModelIntegrationTest(unittest.TestCase): tokenizer.convert_ids_to_tokens(g, skip_special_tokens=True) for g in summary_ids_beam1 ] EXPECTED_TITLE_EN_BEAM1_TOK = "▁Microsoft ▁to ▁end ▁free ▁support ▁for ▁Windows ▁7".split(" ") - EXPECTED_TITLE_RU_BEAM1_TOK = "▁Microsoft ▁намерен а ▁прекрати ть ▁бес плат ную ▁поддержку ▁Windows ▁7 ▁после ▁14 ▁января ▁2020 ▁года".split( - " " + EXPECTED_TITLE_RU_BEAM1_TOK = ( + "▁Microsoft ▁намерен а ▁прекрати ть ▁бес плат ную ▁поддержку ▁Windows ▁7 ▁после ▁14 ▁января ▁2020 ▁года" + .split(" ") ) EXPECTED_TITLE_ZH_BEAM1_TOK = "微软 公司 打算 终止 对 Windows ▁7 操作 系统的 免费 支持".split(" ") self.assertListEqual( diff --git a/tests/models/xlm_roberta/test_tokenization_xlm_roberta.py b/tests/models/xlm_roberta/test_tokenization_xlm_roberta.py index 53c5987fb2..c8f934b258 100644 --- a/tests/models/xlm_roberta/test_tokenization_xlm_roberta.py +++ b/tests/models/xlm_roberta/test_tokenization_xlm_roberta.py @@ -256,7 +256,10 @@ class XLMRobertaTokenizationTest(TokenizerTesterMixin, unittest.TestCase): @slow def test_tokenization_base_hard_symbols(self): - symbols = 'This is a very long text with a lot of weird characters, such as: . , ~ ? ( ) " [ ] ! : - . Also we will add words that should not exsist and be tokenized to , such as saoneuhaoesuth' + symbols = ( + 'This is a very long text with a lot of weird characters, such as: . , ~ ? ( ) " [ ] ! : - . Also we will' + " add words that should not exsist and be tokenized to , such as saoneuhaoesuth" + ) original_tokenizer_encodings = [ 0, 3293, diff --git a/tests/onnx/test_onnx_v2.py b/tests/onnx/test_onnx_v2.py index 43a3ad45e1..eb234e9896 100644 --- a/tests/onnx/test_onnx_v2.py +++ b/tests/onnx/test_onnx_v2.py @@ -224,7 +224,7 @@ TENSORFLOW_EXPORT_SEQ2SEQ_WITH_PAST_MODELS = {} def _get_models_to_test(export_models_list): models_to_test = [] if is_torch_available() or is_tf_available(): - for (name, model) in export_models_list: + for name, model in export_models_list: for feature, onnx_config_class_constructor in FeaturesManager.get_supported_features_for_model_type( name ).items(): @@ -255,7 +255,8 @@ class OnnxExportTestCaseV2(TestCase): if torch_version < onnx_config.torch_onnx_minimum_version: pytest.skip( - f"Skipping due to incompatible PyTorch version. Minimum required is {onnx_config.torch_onnx_minimum_version}, got: {torch_version}" + "Skipping due to incompatible PyTorch version. Minimum required is" + f" {onnx_config.torch_onnx_minimum_version}, got: {torch_version}" ) # Check the modality of the inputs and instantiate the appropriate preprocessor diff --git a/tests/pipelines/test_pipelines_automatic_speech_recognition.py b/tests/pipelines/test_pipelines_automatic_speech_recognition.py index ec54055d7d..25bf520eaf 100644 --- a/tests/pipelines/test_pipelines_automatic_speech_recognition.py +++ b/tests/pipelines/test_pipelines_automatic_speech_recognition.py @@ -184,7 +184,9 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase, metaclass=Pipel self.assertEqual( output, { - "text": "y en las ramas medio sumergidas revoloteaban algunos pájaros de quimérico y legendario plumajre" + "text": ( + "y en las ramas medio sumergidas revoloteaban algunos pájaros de quimérico y legendario plumajre" + ) }, ) @@ -194,7 +196,9 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase, metaclass=Pipel self.assertEqual( output, { - "text": "y en las ramas medio sumergidas revoloteaban algunos pájaros de quimérico y legendario plumajcri", + "text": ( + "y en las ramas medio sumergidas revoloteaban algunos pájaros de quimérico y legendario plumajcri" + ), "chunks": [ {"text": "y", "timestamp": (0.52, 0.54)}, {"text": "en", "timestamp": (0.6, 0.68)}, diff --git a/tests/pipelines/test_pipelines_common.py b/tests/pipelines/test_pipelines_common.py index 818191b725..6a6c8b73e5 100644 --- a/tests/pipelines/test_pipelines_common.py +++ b/tests/pipelines/test_pipelines_common.py @@ -184,7 +184,8 @@ class PipelineTestCaseMeta(type): if tokenizer is None and feature_extractor is None: self.skipTest( - f"Ignoring {ModelClass}, cannot create a tokenizer or feature_extractor (PerceiverConfig with no FastTokenizer ?)" + f"Ignoring {ModelClass}, cannot create a tokenizer or feature_extractor (PerceiverConfig with" + " no FastTokenizer ?)" ) pipeline, examples = self.get_test_pipeline(model, tokenizer, feature_extractor) if pipeline is None: diff --git a/tests/pipelines/test_pipelines_question_answering.py b/tests/pipelines/test_pipelines_question_answering.py index 844ed0b683..b775f7b7d3 100644 --- a/tests/pipelines/test_pipelines_question_answering.py +++ b/tests/pipelines/test_pipelines_question_answering.py @@ -199,7 +199,42 @@ class QAPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta): ) outputs = qa_pipeline( { - "context": "Yes Bank founder Rana Kapoor has approached the Bombay High Court, challenging a special court's order from August this year that had remanded him in police custody for a week in a multi-crore loan fraud case. Kapoor, who is currently lodged in Taloja Jail, is an accused in the loan fraud case and some related matters being probed by the CBI and Enforcement Directorate. A single bench presided over by Justice S K Shinde on Tuesday posted the plea for further hearing on October 14. In his plea filed through advocate Vijay Agarwal, Kapoor claimed that the special court's order permitting the CBI's request for police custody on August 14 was illegal and in breach of the due process of law. Therefore, his police custody and subsequent judicial custody in the case were all illegal. Kapoor has urged the High Court to quash and set aside the special court's order dated August 14. As per his plea, in August this year, the CBI had moved two applications before the special court, one seeking permission to arrest Kapoor, who was already in judicial custody at the time in another case, and the other, seeking his police custody. While the special court refused to grant permission to the CBI to arrest Kapoor, it granted the central agency's plea for his custody. Kapoor, however, said in his plea that before filing an application for his arrest, the CBI had not followed the process of issuing him a notice under Section 41 of the CrPC for appearance before it. He further said that the CBI had not taken prior sanction as mandated under section 17 A of the Prevention of Corruption Act for prosecuting him. The special court, however, had said in its order at the time that as Kapoor was already in judicial custody in another case and was not a free man the procedure mandated under Section 41 of the CrPC need not have been adhered to as far as issuing a prior notice of appearance was concerned. ADVERTISING It had also said that case records showed that the investigating officer had taken an approval from a managing director of Yes Bank before beginning the proceedings against Kapoor and such a permission was a valid sanction. However, Kapoor in his plea said that the above order was bad in law and sought that it be quashed and set aside. The law mandated that if initial action was not in consonance with legal procedures, then all subsequent actions must be held as illegal, he said, urging the High Court to declare the CBI remand and custody and all subsequent proceedings including the further custody as illegal and void ab-initio. In a separate plea before the High Court, Kapoor's daughter Rakhee Kapoor-Tandon has sought exemption from in-person appearance before a special PMLA court. Rakhee has stated that she is a resident of the United Kingdom and is unable to travel to India owing to restrictions imposed due to the COVID-19 pandemic. According to the CBI, in the present case, Kapoor had obtained a gratification or pecuniary advantage of ₹ 307 crore, and thereby caused Yes Bank a loss of ₹ 1,800 crore by extending credit facilities to Avantha Group, when it was not eligible for the same", + "context": ( + "Yes Bank founder Rana Kapoor has approached the Bombay High Court, challenging a special court's" + " order from August this year that had remanded him in police custody for a week in a multi-crore" + " loan fraud case. Kapoor, who is currently lodged in Taloja Jail, is an accused in the loan fraud" + " case and some related matters being probed by the CBI and Enforcement Directorate. A single" + " bench presided over by Justice S K Shinde on Tuesday posted the plea for further hearing on" + " October 14. In his plea filed through advocate Vijay Agarwal, Kapoor claimed that the special" + " court's order permitting the CBI's request for police custody on August 14 was illegal and in" + " breach of the due process of law. Therefore, his police custody and subsequent judicial custody" + " in the case were all illegal. Kapoor has urged the High Court to quash and set aside the special" + " court's order dated August 14. As per his plea, in August this year, the CBI had moved two" + " applications before the special court, one seeking permission to arrest Kapoor, who was already" + " in judicial custody at the time in another case, and the other, seeking his police custody." + " While the special court refused to grant permission to the CBI to arrest Kapoor, it granted the" + " central agency's plea for his custody. Kapoor, however, said in his plea that before filing an" + " application for his arrest, the CBI had not followed the process of issuing him a notice under" + " Section 41 of the CrPC for appearance before it. He further said that the CBI had not taken" + " prior sanction as mandated under section 17 A of the Prevention of Corruption Act for" + " prosecuting him. The special court, however, had said in its order at the time that as Kapoor" + " was already in judicial custody in another case and was not a free man the procedure mandated" + " under Section 41 of the CrPC need not have been adhered to as far as issuing a prior notice of" + " appearance was concerned. ADVERTISING It had also said that case records showed that the" + " investigating officer had taken an approval from a managing director of Yes Bank before" + " beginning the proceedings against Kapoor and such a permission was a valid sanction. However," + " Kapoor in his plea said that the above order was bad in law and sought that it be quashed and" + " set aside. The law mandated that if initial action was not in consonance with legal procedures," + " then all subsequent actions must be held as illegal, he said, urging the High Court to declare" + " the CBI remand and custody and all subsequent proceedings including the further custody as" + " illegal and void ab-initio. In a separate plea before the High Court, Kapoor's daughter Rakhee" + " Kapoor-Tandon has sought exemption from in-person appearance before a special PMLA court. Rakhee" + " has stated that she is a resident of the United Kingdom and is unable to travel to India owing" + " to restrictions imposed due to the COVID-19 pandemic. According to the CBI, in the present case," + " Kapoor had obtained a gratification or pecuniary advantage of ₹ 307 crore, and thereby caused" + " Yes Bank a loss of ₹ 1,800 crore by extending credit facilities to Avantha Group, when it was" + " not eligible for the same" + ), "question": "Is this person invovled in fraud?", } ) diff --git a/tests/pipelines/test_pipelines_summarization.py b/tests/pipelines/test_pipelines_summarization.py index e434ed742d..f802e5b63d 100644 --- a/tests/pipelines/test_pipelines_summarization.py +++ b/tests/pipelines/test_pipelines_summarization.py @@ -91,7 +91,49 @@ class SummarizationPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMe @slow def test_integration_torch_summarization(self): summarizer = pipeline(task="summarization", device=DEFAULT_DEVICE_NUM) - cnn_article = ' (CNN)The Palestinian Authority officially became the 123rd member of the International Criminal Court on Wednesday, a step that gives the court jurisdiction over alleged crimes in Palestinian territories. The formal accession was marked with a ceremony at The Hague, in the Netherlands, where the court is based. The Palestinians signed the ICC\'s founding Rome Statute in January, when they also accepted its jurisdiction over alleged crimes committed "in the occupied Palestinian territory, including East Jerusalem, since June 13, 2014." Later that month, the ICC opened a preliminary examination into the situation in Palestinian territories, paving the way for possible war crimes investigations against Israelis. As members of the court, Palestinians may be subject to counter-charges as well. Israel and the United States, neither of which is an ICC member, opposed the Palestinians\' efforts to join the body. But Palestinian Foreign Minister Riad al-Malki, speaking at Wednesday\'s ceremony, said it was a move toward greater justice. "As Palestine formally becomes a State Party to the Rome Statute today, the world is also a step closer to ending a long era of impunity and injustice," he said, according to an ICC news release. "Indeed, today brings us closer to our shared goals of justice and peace." Judge Kuniko Ozaki, a vice president of the ICC, said acceding to the treaty was just the first step for the Palestinians. "As the Rome Statute today enters into force for the State of Palestine, Palestine acquires all the rights as well as responsibilities that come with being a State Party to the Statute. These are substantive commitments, which cannot be taken lightly," she said. Rights group Human Rights Watch welcomed the development. "Governments seeking to penalize Palestine for joining the ICC should immediately end their pressure, and countries that support universal acceptance of the court\'s treaty should speak out to welcome its membership," said Balkees Jarrah, international justice counsel for the group. "What\'s objectionable is the attempts to undermine international justice, not Palestine\'s decision to join a treaty to which over 100 countries around the world are members." In January, when the preliminary ICC examination was opened, Israeli Prime Minister Benjamin Netanyahu described it as an outrage, saying the court was overstepping its boundaries. The United States also said it "strongly" disagreed with the court\'s decision. "As we have said repeatedly, we do not believe that Palestine is a state and therefore we do not believe that it is eligible to join the ICC," the State Department said in a statement. It urged the warring sides to resolve their differences through direct negotiations. "We will continue to oppose actions against Israel at the ICC as counterproductive to the cause of peace," it said. But the ICC begs to differ with the definition of a state for its purposes and refers to the territories as "Palestine." While a preliminary examination is not a formal investigation, it allows the court to review evidence and determine whether to investigate suspects on both sides. Prosecutor Fatou Bensouda said her office would "conduct its analysis in full independence and impartiality." The war between Israel and Hamas militants in Gaza last summer left more than 2,000 people dead. The inquiry will include alleged war crimes committed since June. The International Criminal Court was set up in 2002 to prosecute genocide, crimes against humanity and war crimes. CNN\'s Vasco Cotovio, Kareem Khadder and Faith Karimi contributed to this report.' - expected_cnn_summary = " The Palestinian Authority becomes the 123rd member of the International Criminal Court . The move gives the court jurisdiction over alleged crimes in Palestinian territories . Israel and the United States opposed the Palestinians' efforts to join the court . Rights group Human Rights Watch welcomes the move, says governments seeking to penalize Palestine should end pressure ." + cnn_article = ( + " (CNN)The Palestinian Authority officially became the 123rd member of the International Criminal Court on" + " Wednesday, a step that gives the court jurisdiction over alleged crimes in Palestinian territories. The" + " formal accession was marked with a ceremony at The Hague, in the Netherlands, where the court is based." + " The Palestinians signed the ICC's founding Rome Statute in January, when they also accepted its" + ' jurisdiction over alleged crimes committed "in the occupied Palestinian territory, including East' + ' Jerusalem, since June 13, 2014." Later that month, the ICC opened a preliminary examination into the' + " situation in Palestinian territories, paving the way for possible war crimes investigations against" + " Israelis. As members of the court, Palestinians may be subject to counter-charges as well. Israel and" + " the United States, neither of which is an ICC member, opposed the Palestinians' efforts to join the" + " body. But Palestinian Foreign Minister Riad al-Malki, speaking at Wednesday's ceremony, said it was a" + ' move toward greater justice. "As Palestine formally becomes a State Party to the Rome Statute today, the' + ' world is also a step closer to ending a long era of impunity and injustice," he said, according to an' + ' ICC news release. "Indeed, today brings us closer to our shared goals of justice and peace." Judge' + " Kuniko Ozaki, a vice president of the ICC, said acceding to the treaty was just the first step for the" + ' Palestinians. "As the Rome Statute today enters into force for the State of Palestine, Palestine' + " acquires all the rights as well as responsibilities that come with being a State Party to the Statute." + ' These are substantive commitments, which cannot be taken lightly," she said. Rights group Human Rights' + ' Watch welcomed the development. "Governments seeking to penalize Palestine for joining the ICC should' + " immediately end their pressure, and countries that support universal acceptance of the court's treaty" + ' should speak out to welcome its membership," said Balkees Jarrah, international justice counsel for the' + " group. \"What's objectionable is the attempts to undermine international justice, not Palestine's" + ' decision to join a treaty to which over 100 countries around the world are members." In January, when' + " the preliminary ICC examination was opened, Israeli Prime Minister Benjamin Netanyahu described it as an" + ' outrage, saying the court was overstepping its boundaries. The United States also said it "strongly"' + " disagreed with the court's decision. \"As we have said repeatedly, we do not believe that Palestine is a" + ' state and therefore we do not believe that it is eligible to join the ICC," the State Department said in' + ' a statement. It urged the warring sides to resolve their differences through direct negotiations. "We' + ' will continue to oppose actions against Israel at the ICC as counterproductive to the cause of peace,"' + " it said. But the ICC begs to differ with the definition of a state for its purposes and refers to the" + ' territories as "Palestine." While a preliminary examination is not a formal investigation, it allows the' + " court to review evidence and determine whether to investigate suspects on both sides. Prosecutor Fatou" + ' Bensouda said her office would "conduct its analysis in full independence and impartiality." The war' + " between Israel and Hamas militants in Gaza last summer left more than 2,000 people dead. The inquiry" + " will include alleged war crimes committed since June. The International Criminal Court was set up in" + " 2002 to prosecute genocide, crimes against humanity and war crimes. CNN's Vasco Cotovio, Kareem Khadder" + " and Faith Karimi contributed to this report." + ) + expected_cnn_summary = ( + " The Palestinian Authority becomes the 123rd member of the International Criminal Court . The move gives" + " the court jurisdiction over alleged crimes in Palestinian territories . Israel and the United States" + " opposed the Palestinians' efforts to join the court . Rights group Human Rights Watch welcomes the move," + " says governments seeking to penalize Palestine should end pressure ." + ) result = summarizer(cnn_article) self.assertEqual(result[0]["summary_text"], expected_cnn_summary) diff --git a/tests/pipelines/test_pipelines_table_question_answering.py b/tests/pipelines/test_pipelines_table_question_answering.py index 86bbf991b0..ba7fdaa75c 100644 --- a/tests/pipelines/test_pipelines_table_question_answering.py +++ b/tests/pipelines/test_pipelines_table_question_answering.py @@ -92,7 +92,8 @@ class TQAPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta): }, query=[ "What repository has the largest number of stars?", - "Given that the numbers of stars defines if a repository is active, what repository is the most active?", + "Given that the numbers of stars defines if a repository is active, what repository is the most" + " active?", "What is the number of repositories?", "What is the average number of stars?", "What is the total amount of stars?", @@ -194,7 +195,8 @@ class TQAPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta): }, query=[ "What repository has the largest number of stars?", - "Given that the numbers of stars defines if a repository is active, what repository is the most active?", + "Given that the numbers of stars defines if a repository is active, what repository is the most" + " active?", "What is the number of repositories?", "What is the average number of stars?", "What is the total amount of stars?", @@ -313,7 +315,8 @@ class TQAPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta): }, query=[ "What repository has the largest number of stars?", - "Given that the numbers of stars defines if a repository is active, what repository is the most active?", + "Given that the numbers of stars defines if a repository is active, what repository is the most" + " active?", "What is the number of repositories?", "What is the average number of stars?", "What is the total amount of stars?", @@ -434,7 +437,8 @@ class TQAPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta): }, query=[ "What repository has the largest number of stars?", - "Given that the numbers of stars defines if a repository is active, what repository is the most active?", + "Given that the numbers of stars defines if a repository is active, what repository is the most" + " active?", "What is the number of repositories?", "What is the average number of stars?", "What is the total amount of stars?", diff --git a/tests/pipelines/test_pipelines_text_generation.py b/tests/pipelines/test_pipelines_text_generation.py index ca67c3bea1..929e2732f0 100644 --- a/tests/pipelines/test_pipelines_text_generation.py +++ b/tests/pipelines/test_pipelines_text_generation.py @@ -34,7 +34,10 @@ class TextGenerationPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseM outputs, [ { - "generated_text": "This is a test ☃ ☃ segmental segmental segmental 议议eski eski flutter flutter Lacy oscope. oscope. FiliFili@@" + "generated_text": ( + "This is a test ☃ ☃ segmental segmental segmental 议议eski eski flutter flutter Lacy oscope." + " oscope. FiliFili@@" + ) } ], ) @@ -45,12 +48,18 @@ class TextGenerationPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseM [ [ { - "generated_text": "This is a test ☃ ☃ segmental segmental segmental 议议eski eski flutter flutter Lacy oscope. oscope. FiliFili@@" + "generated_text": ( + "This is a test ☃ ☃ segmental segmental segmental 议议eski eski flutter flutter Lacy oscope." + " oscope. FiliFili@@" + ) } ], [ { - "generated_text": "This is a second test ☃ segmental segmental segmental 议议eski eski flutter flutter Lacy oscope. oscope. FiliFili@@" + "generated_text": ( + "This is a second test ☃ segmental segmental segmental 议议eski eski flutter flutter Lacy" + " oscope. oscope. FiliFili@@" + ) } ], ], @@ -97,7 +106,10 @@ class TextGenerationPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseM outputs, [ { - "generated_text": "This is a test FeyFeyFey(Croatis.), s.), Cannes Cannes Cannes 閲閲Cannes Cannes Cannes 攵 please," + "generated_text": ( + "This is a test FeyFeyFey(Croatis.), s.), Cannes Cannes Cannes 閲閲Cannes Cannes Cannes 攵" + " please," + ) } ], ) @@ -108,12 +120,18 @@ class TextGenerationPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseM [ [ { - "generated_text": "This is a test FeyFeyFey(Croatis.), s.), Cannes Cannes Cannes 閲閲Cannes Cannes Cannes 攵 please," + "generated_text": ( + "This is a test FeyFeyFey(Croatis.), s.), Cannes Cannes Cannes 閲閲Cannes Cannes Cannes 攵" + " please," + ) } ], [ { - "generated_text": "This is a second test Chieftain Chieftain prefecture prefecture prefecture Cannes Cannes Cannes 閲閲Cannes Cannes Cannes 攵 please," + "generated_text": ( + "This is a second test Chieftain Chieftain prefecture prefecture prefecture Cannes Cannes" + " Cannes 閲閲Cannes Cannes Cannes 攵 please," + ) } ], ], diff --git a/tests/pipelines/test_pipelines_translation.py b/tests/pipelines/test_pipelines_translation.py index 368f6bc9c5..3c5999f36e 100644 --- a/tests/pipelines/test_pipelines_translation.py +++ b/tests/pipelines/test_pipelines_translation.py @@ -61,7 +61,10 @@ class TranslationPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta outputs, [ { - "translation_text": "Beide Beide Beide Beide Beide Beide Beide Beide Beide Beide Beide Beide Beide Beide Beide Beide Beide" + "translation_text": ( + "Beide Beide Beide Beide Beide Beide Beide Beide Beide Beide Beide Beide Beide Beide Beide" + " Beide Beide" + ) } ], ) @@ -74,7 +77,10 @@ class TranslationPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta outputs, [ { - "translation_text": "Beide Beide Beide Beide Beide Beide Beide Beide Beide Beide Beide Beide Beide Beide Beide Beide Beide" + "translation_text": ( + "Beide Beide Beide Beide Beide Beide Beide Beide Beide Beide Beide Beide Beide Beide Beide" + " Beide Beide" + ) } ], ) @@ -87,7 +93,10 @@ class TranslationPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta outputs, [ { - "translation_text": "monoton monoton monoton monoton monoton monoton monoton monoton monoton monoton urine urine urine urine urine urine urine urine urine" + "translation_text": ( + "monoton monoton monoton monoton monoton monoton monoton monoton monoton monoton urine urine" + " urine urine urine urine urine urine urine" + ) } ], ) @@ -100,7 +109,10 @@ class TranslationPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta outputs, [ { - "translation_text": "monoton monoton monoton monoton monoton monoton monoton monoton monoton monoton urine urine urine urine urine urine urine urine urine" + "translation_text": ( + "monoton monoton monoton monoton monoton monoton monoton monoton monoton monoton urine urine" + " urine urine urine urine urine urine urine" + ) } ], ) diff --git a/tests/pipelines/test_pipelines_zero_shot.py b/tests/pipelines/test_pipelines_zero_shot.py index ed564581e5..af98ac0201 100644 --- a/tests/pipelines/test_pipelines_zero_shot.py +++ b/tests/pipelines/test_pipelines_zero_shot.py @@ -202,14 +202,39 @@ class ZeroShotClassificationPipelineTests(unittest.TestCase, metaclass=PipelineT }, ) outputs = zero_shot_classifier( - "The dominant sequence transduction models are based on complex recurrent or convolutional neural networks in an encoder-decoder configuration. The best performing models also connect the encoder and decoder through an attention mechanism. We propose a new simple network architecture, the Transformer, based solely on attention mechanisms, dispensing with recurrence and convolutions entirely. Experiments on two machine translation tasks show these models to be superior in quality while being more parallelizable and requiring significantly less time to train. Our model achieves 28.4 BLEU on the WMT 2014 English-to-German translation task, improving over the existing best results, including ensembles by over 2 BLEU. On the WMT 2014 English-to-French translation task, our model establishes a new single-model state-of-the-art BLEU score of 41.8 after training for 3.5 days on eight GPUs, a small fraction of the training costs of the best models from the literature. We show that the Transformer generalizes well to other tasks by applying it successfully to English constituency parsing both with large and limited training data.", + "The dominant sequence transduction models are based on complex recurrent or convolutional neural networks" + " in an encoder-decoder configuration. The best performing models also connect the encoder and decoder" + " through an attention mechanism. We propose a new simple network architecture, the Transformer, based" + " solely on attention mechanisms, dispensing with recurrence and convolutions entirely. Experiments on two" + " machine translation tasks show these models to be superior in quality while being more parallelizable" + " and requiring significantly less time to train. Our model achieves 28.4 BLEU on the WMT 2014" + " English-to-German translation task, improving over the existing best results, including ensembles by" + " over 2 BLEU. On the WMT 2014 English-to-French translation task, our model establishes a new" + " single-model state-of-the-art BLEU score of 41.8 after training for 3.5 days on eight GPUs, a small" + " fraction of the training costs of the best models from the literature. We show that the Transformer" + " generalizes well to other tasks by applying it successfully to English constituency parsing both with" + " large and limited training data.", candidate_labels=["machine learning", "statistics", "translation", "vision"], multi_label=True, ) self.assertEqual( nested_simplify(outputs), { - "sequence": "The dominant sequence transduction models are based on complex recurrent or convolutional neural networks in an encoder-decoder configuration. The best performing models also connect the encoder and decoder through an attention mechanism. We propose a new simple network architecture, the Transformer, based solely on attention mechanisms, dispensing with recurrence and convolutions entirely. Experiments on two machine translation tasks show these models to be superior in quality while being more parallelizable and requiring significantly less time to train. Our model achieves 28.4 BLEU on the WMT 2014 English-to-German translation task, improving over the existing best results, including ensembles by over 2 BLEU. On the WMT 2014 English-to-French translation task, our model establishes a new single-model state-of-the-art BLEU score of 41.8 after training for 3.5 days on eight GPUs, a small fraction of the training costs of the best models from the literature. We show that the Transformer generalizes well to other tasks by applying it successfully to English constituency parsing both with large and limited training data.", + "sequence": ( + "The dominant sequence transduction models are based on complex recurrent or convolutional neural" + " networks in an encoder-decoder configuration. The best performing models also connect the" + " encoder and decoder through an attention mechanism. We propose a new simple network" + " architecture, the Transformer, based solely on attention mechanisms, dispensing with recurrence" + " and convolutions entirely. Experiments on two machine translation tasks show these models to be" + " superior in quality while being more parallelizable and requiring significantly less time to" + " train. Our model achieves 28.4 BLEU on the WMT 2014 English-to-German translation task," + " improving over the existing best results, including ensembles by over 2 BLEU. On the WMT 2014" + " English-to-French translation task, our model establishes a new single-model state-of-the-art" + " BLEU score of 41.8 after training for 3.5 days on eight GPUs, a small fraction of the training" + " costs of the best models from the literature. We show that the Transformer generalizes well to" + " other tasks by applying it successfully to English constituency parsing both with large and" + " limited training data." + ), "labels": ["translation", "machine learning", "vision", "statistics"], "scores": [0.817, 0.713, 0.018, 0.018], }, @@ -232,14 +257,39 @@ class ZeroShotClassificationPipelineTests(unittest.TestCase, metaclass=PipelineT }, ) outputs = zero_shot_classifier( - "The dominant sequence transduction models are based on complex recurrent or convolutional neural networks in an encoder-decoder configuration. The best performing models also connect the encoder and decoder through an attention mechanism. We propose a new simple network architecture, the Transformer, based solely on attention mechanisms, dispensing with recurrence and convolutions entirely. Experiments on two machine translation tasks show these models to be superior in quality while being more parallelizable and requiring significantly less time to train. Our model achieves 28.4 BLEU on the WMT 2014 English-to-German translation task, improving over the existing best results, including ensembles by over 2 BLEU. On the WMT 2014 English-to-French translation task, our model establishes a new single-model state-of-the-art BLEU score of 41.8 after training for 3.5 days on eight GPUs, a small fraction of the training costs of the best models from the literature. We show that the Transformer generalizes well to other tasks by applying it successfully to English constituency parsing both with large and limited training data.", + "The dominant sequence transduction models are based on complex recurrent or convolutional neural networks" + " in an encoder-decoder configuration. The best performing models also connect the encoder and decoder" + " through an attention mechanism. We propose a new simple network architecture, the Transformer, based" + " solely on attention mechanisms, dispensing with recurrence and convolutions entirely. Experiments on two" + " machine translation tasks show these models to be superior in quality while being more parallelizable" + " and requiring significantly less time to train. Our model achieves 28.4 BLEU on the WMT 2014" + " English-to-German translation task, improving over the existing best results, including ensembles by" + " over 2 BLEU. On the WMT 2014 English-to-French translation task, our model establishes a new" + " single-model state-of-the-art BLEU score of 41.8 after training for 3.5 days on eight GPUs, a small" + " fraction of the training costs of the best models from the literature. We show that the Transformer" + " generalizes well to other tasks by applying it successfully to English constituency parsing both with" + " large and limited training data.", candidate_labels=["machine learning", "statistics", "translation", "vision"], multi_label=True, ) self.assertEqual( nested_simplify(outputs), { - "sequence": "The dominant sequence transduction models are based on complex recurrent or convolutional neural networks in an encoder-decoder configuration. The best performing models also connect the encoder and decoder through an attention mechanism. We propose a new simple network architecture, the Transformer, based solely on attention mechanisms, dispensing with recurrence and convolutions entirely. Experiments on two machine translation tasks show these models to be superior in quality while being more parallelizable and requiring significantly less time to train. Our model achieves 28.4 BLEU on the WMT 2014 English-to-German translation task, improving over the existing best results, including ensembles by over 2 BLEU. On the WMT 2014 English-to-French translation task, our model establishes a new single-model state-of-the-art BLEU score of 41.8 after training for 3.5 days on eight GPUs, a small fraction of the training costs of the best models from the literature. We show that the Transformer generalizes well to other tasks by applying it successfully to English constituency parsing both with large and limited training data.", + "sequence": ( + "The dominant sequence transduction models are based on complex recurrent or convolutional neural" + " networks in an encoder-decoder configuration. The best performing models also connect the" + " encoder and decoder through an attention mechanism. We propose a new simple network" + " architecture, the Transformer, based solely on attention mechanisms, dispensing with recurrence" + " and convolutions entirely. Experiments on two machine translation tasks show these models to be" + " superior in quality while being more parallelizable and requiring significantly less time to" + " train. Our model achieves 28.4 BLEU on the WMT 2014 English-to-German translation task," + " improving over the existing best results, including ensembles by over 2 BLEU. On the WMT 2014" + " English-to-French translation task, our model establishes a new single-model state-of-the-art" + " BLEU score of 41.8 after training for 3.5 days on eight GPUs, a small fraction of the training" + " costs of the best models from the literature. We show that the Transformer generalizes well to" + " other tasks by applying it successfully to English constituency parsing both with large and" + " limited training data." + ), "labels": ["translation", "machine learning", "vision", "statistics"], "scores": [0.817, 0.713, 0.018, 0.018], }, diff --git a/tests/sagemaker/scripts/pytorch/run_glue_model_parallelism.py b/tests/sagemaker/scripts/pytorch/run_glue_model_parallelism.py index 6bec48fda7..534b1656d1 100644 --- a/tests/sagemaker/scripts/pytorch/run_glue_model_parallelism.py +++ b/tests/sagemaker/scripts/pytorch/run_glue_model_parallelism.py @@ -81,8 +81,10 @@ class DataTrainingArguments: max_seq_length: int = field( default=128, metadata={ - "help": "The maximum total input sequence length after tokenization. Sequences longer " - "than this will be truncated, sequences shorter will be padded." + "help": ( + "The maximum total input sequence length after tokenization. Sequences longer " + "than this will be truncated, sequences shorter will be padded." + ) }, ) overwrite_cache: bool = field( @@ -91,29 +93,37 @@ class DataTrainingArguments: pad_to_max_length: bool = field( default=True, metadata={ - "help": "Whether to pad all samples to `max_seq_length`. " - "If False, will pad the samples dynamically when batching to the maximum length in the batch." + "help": ( + "Whether to pad all samples to `max_seq_length`. " + "If False, will pad the samples dynamically when batching to the maximum length in the batch." + ) }, ) max_train_samples: Optional[int] = field( default=None, metadata={ - "help": "For debugging purposes or quicker training, truncate the number of training examples to this " - "value if set." + "help": ( + "For debugging purposes or quicker training, truncate the number of training examples to this " + "value if set." + ) }, ) max_val_samples: Optional[int] = field( default=None, metadata={ - "help": "For debugging purposes or quicker training, truncate the number of validation examples to this " - "value if set." + "help": ( + "For debugging purposes or quicker training, truncate the number of validation examples to this " + "value if set." + ) }, ) max_test_samples: Optional[int] = field( default=None, metadata={ - "help": "For debugging purposes or quicker training, truncate the number of test examples to this " - "value if set." + "help": ( + "For debugging purposes or quicker training, truncate the number of test examples to this " + "value if set." + ) }, ) train_file: Optional[str] = field( @@ -170,8 +180,10 @@ class ModelArguments: use_auth_token: bool = field( default=False, metadata={ - "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script " - "with private models)." + "help": ( + "Will use the token generated when running `transformers-cli login` (necessary to use this script " + "with private models)." + ) }, ) diff --git a/tests/test_configuration_common.py b/tests/test_configuration_common.py index 853a19c3ec..93723d11ba 100644 --- a/tests/test_configuration_common.py +++ b/tests/test_configuration_common.py @@ -300,8 +300,9 @@ class ConfigTestUtils(unittest.TestCase): keys_with_defaults = [key for key, value in config_common_kwargs.items() if value == getattr(base_config, key)] if len(keys_with_defaults) > 0: raise ValueError( - "The following keys are set with the default values in `test_configuration_common.config_common_kwargs` " - f"pick another value for them: {', '.join(keys_with_defaults)}." + "The following keys are set with the default values in" + " `test_configuration_common.config_common_kwargs` pick another value for them:" + f" {', '.join(keys_with_defaults)}." ) def test_cached_files_are_used_when_internet_is_down(self): diff --git a/tests/test_modeling_common.py b/tests/test_modeling_common.py index 09fd338d3d..900b425b38 100755 --- a/tests/test_modeling_common.py +++ b/tests/test_modeling_common.py @@ -1440,7 +1440,12 @@ class ModelTesterMixin: torch.allclose( set_nan_tensor_to_zero(tuple_object), set_nan_tensor_to_zero(dict_object), atol=1e-5 ), - msg=f"Tuple and dict output are not equal. Difference: {torch.max(torch.abs(tuple_object - dict_object))}. Tuple has `nan`: {torch.isnan(tuple_object).any()} and `inf`: {torch.isinf(tuple_object)}. Dict has `nan`: {torch.isnan(dict_object).any()} and `inf`: {torch.isinf(dict_object)}.", + msg=( + "Tuple and dict output are not equal. Difference:" + f" {torch.max(torch.abs(tuple_object - dict_object))}. Tuple has `nan`:" + f" {torch.isnan(tuple_object).any()} and `inf`: {torch.isinf(tuple_object)}. Dict has" + f" `nan`: {torch.isnan(dict_object).any()} and `inf`: {torch.isinf(dict_object)}." + ), ) recursive_check(tuple_output, dict_output) @@ -1629,7 +1634,8 @@ class ModelTesterMixin: self.assertLessEqual(max_diff, tol, f"{name}: Difference between torch and tf is {max_diff} (>= {tol}).") else: raise ValueError( - f"`tf_outputs` should be an instance of `tf.Tensor`, a `tuple`, or an instance of `tf.Tensor`. Got {type(tf_outputs)} instead." + "`tf_outputs` should be an instance of `tf.Tensor`, a `tuple`, or an instance of `tf.Tensor`. Got" + f" {type(tf_outputs)} instead." ) def prepare_tf_inputs_from_pt_inputs(self, pt_inputs_dict): diff --git a/tests/test_modeling_tf_common.py b/tests/test_modeling_tf_common.py index 6edc6b20c2..50f83ba65d 100644 --- a/tests/test_modeling_tf_common.py +++ b/tests/test_modeling_tf_common.py @@ -505,7 +505,8 @@ class TFModelTesterMixin: self.assertLessEqual(max_diff, tol, f"{name}: Difference between torch and tf is {max_diff} (>= {tol}).") else: raise ValueError( - f"`tf_outputs` should be an instance of `tf.Tensor`, a `tuple`, or an instance of `tf.Tensor`. Got {type(tf_outputs)} instead." + "`tf_outputs` should be an instance of `tf.Tensor`, a `tuple`, or an instance of `tf.Tensor`. Got" + f" {type(tf_outputs)} instead." ) def prepare_pt_inputs_from_tf_inputs(self, tf_inputs_dict): @@ -956,7 +957,10 @@ class TFModelTesterMixin: else: self.assertTrue( all(tf.equal(tuple_object, dict_object)), - msg=f"Tuple and dict output are not equal. Difference: {tf.math.reduce_max(tf.abs(tuple_object - dict_object))}", + msg=( + "Tuple and dict output are not equal. Difference:" + f" {tf.math.reduce_max(tf.abs(tuple_object - dict_object))}" + ), ) recursive_check(tuple_output, dict_output) diff --git a/tests/test_tokenization_common.py b/tests/test_tokenization_common.py index fe16e5e1cd..2189d3c7f8 100644 --- a/tests/test_tokenization_common.py +++ b/tests/test_tokenization_common.py @@ -1005,7 +1005,8 @@ class TokenizerTesterMixin: self.assertEqual(len(cm.records), 1) self.assertTrue( cm.records[0].message.startswith( - "Token indices sequence length is longer than the specified maximum sequence length for this model" + "Token indices sequence length is longer than the specified maximum sequence length" + " for this model" ) ) @@ -1016,7 +1017,8 @@ class TokenizerTesterMixin: self.assertEqual(len(cm.records), 1) self.assertTrue( cm.records[0].message.startswith( - "Token indices sequence length is longer than the specified maximum sequence length for this model" + "Token indices sequence length is longer than the specified maximum sequence length" + " for this model" ) ) @@ -1131,7 +1133,8 @@ class TokenizerTesterMixin: self.assertEqual(len(cm.records), 1) self.assertTrue( cm.records[0].message.startswith( - "Token indices sequence length is longer than the specified maximum sequence length for this model" + "Token indices sequence length is longer than the specified maximum sequence length" + " for this model" ) ) @@ -1142,7 +1145,8 @@ class TokenizerTesterMixin: self.assertEqual(len(cm.records), 1) self.assertTrue( cm.records[0].message.startswith( - "Token indices sequence length is longer than the specified maximum sequence length for this model" + "Token indices sequence length is longer than the specified maximum sequence length" + " for this model" ) ) @@ -2401,13 +2405,15 @@ class TokenizerTesterMixin: # Longer text that will definitely require truncation. src_text = [ " UN Chief Says There Is No Military Solution in Syria", - " Secretary-General Ban Ki-moon says his response to Russia's stepped up military support for Syria is that 'there is no military solution' to the nearly five-year conflict and more weapons will only worsen the violence and misery for millions of people.", + " Secretary-General Ban Ki-moon says his response to Russia's stepped up military support for" + " Syria is that 'there is no military solution' to the nearly five-year conflict and more weapons" + " will only worsen the violence and misery for millions of people.", ] tgt_text = [ "Şeful ONU declară că nu există o soluţie militară în Siria", - "Secretarul General Ban Ki-moon declară că răspunsul său la intensificarea sprijinului militar al Rusiei " - 'pentru Siria este că "nu există o soluţie militară" la conflictul de aproape cinci ani şi că noi arme nu ' - "vor face decât să înrăutăţească violenţele şi mizeria pentru milioane de oameni.", + "Secretarul General Ban Ki-moon declară că răspunsul său la intensificarea sprijinului militar al" + ' Rusiei pentru Siria este că "nu există o soluţie militară" la conflictul de aproape cinci ani şi' + " că noi arme nu vor face decât să înrăutăţească violenţele şi mizeria pentru milioane de oameni.", ] try: batch = tokenizer.prepare_seq2seq_batch( @@ -3658,11 +3664,9 @@ class TokenizerTesterMixin: break self.assertTrue( find, - ( - f"'{new_special_token_str}' doesn't appear in the list " - f"'{new_tokenizer.all_special_tokens_extended}' as an AddedToken with the same parameters as " - f"'{special_token}' in the list {tokenizer.all_special_tokens_extended}" - ), + f"'{new_special_token_str}' doesn't appear in the list " + f"'{new_tokenizer.all_special_tokens_extended}' as an AddedToken with the same parameters as " + f"'{special_token}' in the list {tokenizer.all_special_tokens_extended}", ) elif special_token not in special_tokens_map: # The special token must appear identically in the list of the new tokenizer. @@ -3725,7 +3729,8 @@ class TokenizerTesterMixin: finally: self.assertTrue( cm.records[0].message.startswith( - "The tokenizer class you load from this checkpoint is not the same type as the class this function is called from." + "The tokenizer class you load from this checkpoint is not the same type as the class" + " this function is called from." ) ) diff --git a/tests/utils/test_convert_slow_tokenizer.py b/tests/utils/test_convert_slow_tokenizer.py index f7bb60acfd..8655ea4602 100644 --- a/tests/utils/test_convert_slow_tokenizer.py +++ b/tests/utils/test_convert_slow_tokenizer.py @@ -28,9 +28,7 @@ class ConvertSlowTokenizerTest(unittest.TestCase): _ = SpmConverter(original_tokenizer_with_bytefallback) self.assertEqual(len(w), 1) self.assertIn( - ( - "The sentencepiece tokenizer that you are converting to a fast tokenizer uses the byte fallback option" - " which is not implemented in the fast tokenizers." - ), + "The sentencepiece tokenizer that you are converting to a fast tokenizer uses the byte fallback option" + " which is not implemented in the fast tokenizers.", str(w[0].message), ) diff --git a/tests/utils/test_model_card.py b/tests/utils/test_model_card.py index 1004642a92..7d0e8795e0 100644 --- a/tests/utils/test_model_card.py +++ b/tests/utils/test_model_card.py @@ -38,7 +38,10 @@ class ModelCardTester(unittest.TestCase): }, "training_data": { "Dataset": "English Wikipedia dump dated 2018-12-01", - "Preprocessing": "Using SentencePiece vocabulary of size 52k tokens. See details on https://arxiv.org/pdf/1810.03993.pdf", + "Preprocessing": ( + "Using SentencePiece vocabulary of size 52k tokens. See details on" + " https://arxiv.org/pdf/1810.03993.pdf" + ), }, "quantitative_analyses": {"BLEU": 55.1, "ROUGE-1": 76}, } diff --git a/tests/utils/test_utils_check_copies.py b/tests/utils/test_utils_check_copies.py index 7c81df714c..57cecf6653 100644 --- a/tests/utils/test_utils_check_copies.py +++ b/tests/utils/test_utils_check_copies.py @@ -125,9 +125,48 @@ class CopyCheckTester(unittest.TestCase): def test_convert_to_localized_md(self): localized_readme = check_copies.LOCALIZED_READMES["README_zh-hans.md"] - md_list = "1. **[ALBERT](https://huggingface.co/transformers/model_doc/albert.html)** (from Google Research and the Toyota Technological Institute at Chicago) released with the paper [ALBERT: A Lite BERT for Self-supervised Learning of Language Representations](https://arxiv.org/abs/1909.11942), by Zhenzhong Lan, Mingda Chen, Sebastian Goodman, Kevin Gimpel, Piyush Sharma, Radu Soricut.\n1. **[DistilBERT](https://huggingface.co/transformers/model_doc/distilbert.html)** (from HuggingFace), released together with the paper [DistilBERT, a distilled version of BERT: smaller, faster, cheaper and lighter](https://arxiv.org/abs/1910.01108) by Victor Sanh, Lysandre Debut and Thomas Wolf. The same method has been applied to compress GPT2 into [DistilGPT2](https://github.com/huggingface/transformers/tree/main/examples/distillation), RoBERTa into [DistilRoBERTa](https://github.com/huggingface/transformers/tree/main/examples/distillation), Multilingual BERT into [DistilmBERT](https://github.com/huggingface/transformers/tree/main/examples/distillation) and a German version of DistilBERT.\n1. **[ELECTRA](https://huggingface.co/transformers/model_doc/electra.html)** (from Google Research/Stanford University) released with the paper [ELECTRA: Pre-training text encoders as discriminators rather than generators](https://arxiv.org/abs/2003.10555) by Kevin Clark, Minh-Thang Luong, Quoc V. Le, Christopher D. Manning." - localized_md_list = "1. **[ALBERT](https://huggingface.co/transformers/model_doc/albert.html)** (来自 Google Research and the Toyota Technological Institute at Chicago) 伴随论文 [ALBERT: A Lite BERT for Self-supervised Learning of Language Representations](https://arxiv.org/abs/1909.11942), 由 Zhenzhong Lan, Mingda Chen, Sebastian Goodman, Kevin Gimpel, Piyush Sharma, Radu Soricut 发布。\n" - converted_md_list_sample = "1. **[ALBERT](https://huggingface.co/transformers/model_doc/albert.html)** (来自 Google Research and the Toyota Technological Institute at Chicago) 伴随论文 [ALBERT: A Lite BERT for Self-supervised Learning of Language Representations](https://arxiv.org/abs/1909.11942), 由 Zhenzhong Lan, Mingda Chen, Sebastian Goodman, Kevin Gimpel, Piyush Sharma, Radu Soricut 发布。\n1. **[DistilBERT](https://huggingface.co/transformers/model_doc/distilbert.html)** (来自 HuggingFace) 伴随论文 [DistilBERT, a distilled version of BERT: smaller, faster, cheaper and lighter](https://arxiv.org/abs/1910.01108) 由 Victor Sanh, Lysandre Debut and Thomas Wolf 发布。 The same method has been applied to compress GPT2 into [DistilGPT2](https://github.com/huggingface/transformers/tree/main/examples/distillation), RoBERTa into [DistilRoBERTa](https://github.com/huggingface/transformers/tree/main/examples/distillation), Multilingual BERT into [DistilmBERT](https://github.com/huggingface/transformers/tree/main/examples/distillation) and a German version of DistilBERT.\n1. **[ELECTRA](https://huggingface.co/transformers/model_doc/electra.html)** (来自 Google Research/Stanford University) 伴随论文 [ELECTRA: Pre-training text encoders as discriminators rather than generators](https://arxiv.org/abs/2003.10555) 由 Kevin Clark, Minh-Thang Luong, Quoc V. Le, Christopher D. Manning 发布。\n" + md_list = ( + "1. **[ALBERT](https://huggingface.co/transformers/model_doc/albert.html)** (from Google Research and the" + " Toyota Technological Institute at Chicago) released with the paper [ALBERT: A Lite BERT for" + " Self-supervised Learning of Language Representations](https://arxiv.org/abs/1909.11942), by Zhenzhong" + " Lan, Mingda Chen, Sebastian Goodman, Kevin Gimpel, Piyush Sharma, Radu Soricut.\n1." + " **[DistilBERT](https://huggingface.co/transformers/model_doc/distilbert.html)** (from HuggingFace)," + " released together with the paper [DistilBERT, a distilled version of BERT: smaller, faster, cheaper and" + " lighter](https://arxiv.org/abs/1910.01108) by Victor Sanh, Lysandre Debut and Thomas Wolf. The same" + " method has been applied to compress GPT2 into" + " [DistilGPT2](https://github.com/huggingface/transformers/tree/main/examples/distillation), RoBERTa into" + " [DistilRoBERTa](https://github.com/huggingface/transformers/tree/main/examples/distillation)," + " Multilingual BERT into" + " [DistilmBERT](https://github.com/huggingface/transformers/tree/main/examples/distillation) and a German" + " version of DistilBERT.\n1. **[ELECTRA](https://huggingface.co/transformers/model_doc/electra.html)**" + " (from Google Research/Stanford University) released with the paper [ELECTRA: Pre-training text encoders" + " as discriminators rather than generators](https://arxiv.org/abs/2003.10555) by Kevin Clark, Minh-Thang" + " Luong, Quoc V. Le, Christopher D. Manning." + ) + localized_md_list = ( + "1. **[ALBERT](https://huggingface.co/transformers/model_doc/albert.html)** (来自 Google Research and the" + " Toyota Technological Institute at Chicago) 伴随论文 [ALBERT: A Lite BERT for Self-supervised Learning of" + " Language Representations](https://arxiv.org/abs/1909.11942), 由 Zhenzhong Lan, Mingda Chen, Sebastian" + " Goodman, Kevin Gimpel, Piyush Sharma, Radu Soricut 发布。\n" + ) + converted_md_list_sample = ( + "1. **[ALBERT](https://huggingface.co/transformers/model_doc/albert.html)** (来自 Google Research and the" + " Toyota Technological Institute at Chicago) 伴随论文 [ALBERT: A Lite BERT for Self-supervised Learning of" + " Language Representations](https://arxiv.org/abs/1909.11942), 由 Zhenzhong Lan, Mingda Chen, Sebastian" + " Goodman, Kevin Gimpel, Piyush Sharma, Radu Soricut 发布。\n1." + " **[DistilBERT](https://huggingface.co/transformers/model_doc/distilbert.html)** (来自 HuggingFace) 伴随论文" + " [DistilBERT, a distilled version of BERT: smaller, faster, cheaper and" + " lighter](https://arxiv.org/abs/1910.01108) 由 Victor Sanh, Lysandre Debut and Thomas Wolf 发布。 The same" + " method has been applied to compress GPT2 into" + " [DistilGPT2](https://github.com/huggingface/transformers/tree/main/examples/distillation), RoBERTa into" + " [DistilRoBERTa](https://github.com/huggingface/transformers/tree/main/examples/distillation)," + " Multilingual BERT into" + " [DistilmBERT](https://github.com/huggingface/transformers/tree/main/examples/distillation) and a German" + " version of DistilBERT.\n1. **[ELECTRA](https://huggingface.co/transformers/model_doc/electra.html)** (来自" + " Google Research/Stanford University) 伴随论文 [ELECTRA: Pre-training text encoders as discriminators rather" + " than generators](https://arxiv.org/abs/2003.10555) 由 Kevin Clark, Minh-Thang Luong, Quoc V. Le," + " Christopher D. Manning 发布。\n" + ) num_models_equal, converted_md_list = check_copies.convert_to_localized_md( md_list, localized_md_list, localized_readme["format_model_list"] @@ -143,9 +182,24 @@ class CopyCheckTester(unittest.TestCase): # Check whether the number of models is equal to README.md after conversion. self.assertTrue(num_models_equal) - link_changed_md_list = "1. **[ALBERT](https://huggingface.co/transformers/model_doc/albert.html)** (from Google Research and the Toyota Technological Institute at Chicago) released with the paper [ALBERT: A Lite BERT for Self-supervised Learning of Language Representations](https://arxiv.org/abs/1909.11942), by Zhenzhong Lan, Mingda Chen, Sebastian Goodman, Kevin Gimpel, Piyush Sharma, Radu Soricut." - link_unchanged_md_list = "1. **[ALBERT](https://huggingface.co/transformers/main/model_doc/albert.html)** (来自 Google Research and the Toyota Technological Institute at Chicago) 伴随论文 [ALBERT: A Lite BERT for Self-supervised Learning of Language Representations](https://arxiv.org/abs/1909.11942), 由 Zhenzhong Lan, Mingda Chen, Sebastian Goodman, Kevin Gimpel, Piyush Sharma, Radu Soricut 发布。\n" - converted_md_list_sample = "1. **[ALBERT](https://huggingface.co/transformers/model_doc/albert.html)** (来自 Google Research and the Toyota Technological Institute at Chicago) 伴随论文 [ALBERT: A Lite BERT for Self-supervised Learning of Language Representations](https://arxiv.org/abs/1909.11942), 由 Zhenzhong Lan, Mingda Chen, Sebastian Goodman, Kevin Gimpel, Piyush Sharma, Radu Soricut 发布。\n" + link_changed_md_list = ( + "1. **[ALBERT](https://huggingface.co/transformers/model_doc/albert.html)** (from Google Research and the" + " Toyota Technological Institute at Chicago) released with the paper [ALBERT: A Lite BERT for" + " Self-supervised Learning of Language Representations](https://arxiv.org/abs/1909.11942), by Zhenzhong" + " Lan, Mingda Chen, Sebastian Goodman, Kevin Gimpel, Piyush Sharma, Radu Soricut." + ) + link_unchanged_md_list = ( + "1. **[ALBERT](https://huggingface.co/transformers/main/model_doc/albert.html)** (来自 Google Research and" + " the Toyota Technological Institute at Chicago) 伴随论文 [ALBERT: A Lite BERT for Self-supervised Learning of" + " Language Representations](https://arxiv.org/abs/1909.11942), 由 Zhenzhong Lan, Mingda Chen, Sebastian" + " Goodman, Kevin Gimpel, Piyush Sharma, Radu Soricut 发布。\n" + ) + converted_md_list_sample = ( + "1. **[ALBERT](https://huggingface.co/transformers/model_doc/albert.html)** (来自 Google Research and the" + " Toyota Technological Institute at Chicago) 伴随论文 [ALBERT: A Lite BERT for Self-supervised Learning of" + " Language Representations](https://arxiv.org/abs/1909.11942), 由 Zhenzhong Lan, Mingda Chen, Sebastian" + " Goodman, Kevin Gimpel, Piyush Sharma, Radu Soricut 发布。\n" + ) num_models_equal, converted_md_list = check_copies.convert_to_localized_md( link_changed_md_list, link_unchanged_md_list, localized_readme["format_model_list"] diff --git a/utils/check_copies.py b/utils/check_copies.py index 5363fd1ff3..512615da27 100644 --- a/utils/check_copies.py +++ b/utils/check_copies.py @@ -40,22 +40,34 @@ LOCALIZED_READMES = { "README.md": { "start_prompt": "🤗 Transformers currently provides the following architectures", "end_prompt": "1. Want to contribute a new model?", - "format_model_list": "**[{title}]({model_link})** (from {paper_affiliations}) released with the paper {paper_title_link} by {paper_authors}.{supplements}", + "format_model_list": ( + "**[{title}]({model_link})** (from {paper_affiliations}) released with the paper {paper_title_link} by" + " {paper_authors}.{supplements}" + ), }, "README_zh-hans.md": { "start_prompt": "🤗 Transformers 目前支持如下的架构", "end_prompt": "1. 想要贡献新的模型?", - "format_model_list": "**[{title}]({model_link})** (来自 {paper_affiliations}) 伴随论文 {paper_title_link} 由 {paper_authors} 发布。{supplements}", + "format_model_list": ( + "**[{title}]({model_link})** (来自 {paper_affiliations}) 伴随论文 {paper_title_link} 由 {paper_authors}" + " 发布。{supplements}" + ), }, "README_zh-hant.md": { "start_prompt": "🤗 Transformers 目前支援以下的架構", "end_prompt": "1. 想要貢獻新的模型?", - "format_model_list": "**[{title}]({model_link})** (from {paper_affiliations}) released with the paper {paper_title_link} by {paper_authors}.{supplements}", + "format_model_list": ( + "**[{title}]({model_link})** (from {paper_affiliations}) released with the paper {paper_title_link} by" + " {paper_authors}.{supplements}" + ), }, "README_ko.md": { "start_prompt": "🤗 Transformers는 다음 모델들을 제공합니다", "end_prompt": "1. 새로운 모델을 올리고 싶나요?", - "format_model_list": "**[{title}]({model_link})** (from {paper_affiliations}) released with the paper {paper_title_link} by {paper_authors}.{supplements}", + "format_model_list": ( + "**[{title}]({model_link})** (from {paper_affiliations}) released with the paper {paper_title_link} by" + " {paper_authors}.{supplements}" + ), }, } @@ -130,7 +142,7 @@ def blackify(code): has_indent = len(get_indent(code)) > 0 if has_indent: code = f"class Bla:\n{code}" - mode = black.Mode(target_versions={black.TargetVersion.PY35}, line_length=119) + mode = black.Mode(target_versions={black.TargetVersion.PY35}, line_length=119, preview=True) result = black.format_str(code, mode=mode) result, _ = style_docstrings_in_code(result) return result[len("class Bla:\n") :] if has_indent else result diff --git a/utils/check_repo.py b/utils/check_repo.py index c5087a8606..6eaa2d752a 100644 --- a/utils/check_repo.py +++ b/utils/check_repo.py @@ -524,7 +524,8 @@ def check_all_decorator_order(): if len(errors) > 0: msg = "\n".join(errors) raise ValueError( - f"The parameterized decorator (and its variants) should always be first, but this is not the case in the following files:\n{msg}" + "The parameterized decorator (and its variants) should always be first, but this is not the case in the" + f" following files:\n{msg}" ) diff --git a/utils/notification_service.py b/utils/notification_service.py index 47e85d867e..8a50c745ef 100644 --- a/utils/notification_service.py +++ b/utils/notification_service.py @@ -180,7 +180,10 @@ class Message: "type": "section", "text": { "type": "plain_text", - "text": f"There were {self.n_failures} failures, out of {self.n_tests} tests.\nThe suite ran in {self.time}.", + "text": ( + f"There were {self.n_failures} failures, out of {self.n_tests} tests.\nThe suite ran in" + f" {self.time}." + ), "emoji": True, }, "accessory": { diff --git a/utils/notification_service_deprecated.py b/utils/notification_service_deprecated.py index b14bff1751..cd147480b9 100644 --- a/utils/notification_service_deprecated.py +++ b/utils/notification_service_deprecated.py @@ -94,7 +94,10 @@ def format_for_slack(total_results, results, scheduled: bool, title: str): "type": "section", "text": { "type": "mrkdwn", - "text": f"", + "text": ( + f"" + ), }, } diff --git a/utils/notification_service_doc_tests.py b/utils/notification_service_doc_tests.py index 58ceb567ad..72c6be2ce0 100644 --- a/utils/notification_service_doc_tests.py +++ b/utils/notification_service_doc_tests.py @@ -118,7 +118,10 @@ class Message: "type": "section", "text": { "type": "plain_text", - "text": f"There were {self.n_failures} failures, out of {self.n_tests} tests.\nThe suite ran in {self.time}.", + "text": ( + f"There were {self.n_failures} failures, out of {self.n_tests} tests.\nThe suite ran in" + f" {self.time}." + ), "emoji": True, }, "accessory": {